Files
emacs/lisp/ledger-mode/ledger-regex.el
2025-06-22 17:08:08 +02:00

438 lines
14 KiB
EmacsLisp

;;; ledger-regex.el --- Helper code for use with the "ledger" command-line tool -*- lexical-binding: t; -*-
;; Copyright (C) 2003-2016 John Wiegley (johnw AT gnu DOT org)
;; This file is not part of GNU Emacs.
;; This is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 2, or (at your option) any later
;; version.
;;
;; This is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
;; MA 02110-1301 USA.
;;; Commentary:
;; Regular expressions used by ledger-mode.
;;; Code:
(require 'rx)
(require 'cl-lib)
(defvar ledger-iso-date-regexp)
(defconst ledger-amount-decimal-comma-regex
"-?\\(?:[1-9][0-9.]\\|0\\)*[,]?[0-9]*")
(defconst ledger-amount-decimal-period-regex
"-?\\(?:[1-9][0-9,]*\\|0\\)[.]?[0-9]*")
(defconst ledger-other-entries-regex
"\\(^[~=A-Za-z].+\\)+")
(defconst ledger-comment-regex
"^[;#|\\*%].*\\|[ \t]+;.*")
(defconst ledger-multiline-comment-start-regex
"^!comment$")
(defconst ledger-multiline-comment-end-regex
"^!end_comment$")
(defconst ledger-multiline-comment-regex
"^!comment\n\\(.*\n\\)*?!end_comment$")
(defconst ledger-payee-any-status-regex
"^[0-9]+[-/][-/.=0-9]+\\(?:\\s-+\\*\\)?\\(?:\\s-+(.*?)\\)?\\s-+\\(.+?\\)\\s-*\\(?:;\\|$\\)")
(defconst ledger-payee-pending-regex
"^[0-9]+[-/][-/.=0-9]+\\s-!\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
(defconst ledger-payee-cleared-regex
"^[0-9]+[-/][-/.=0-9]+\\s-\\*\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
(defconst ledger-payee-uncleared-regex
"^[0-9]+[-/][-/.=0-9]+\\s-+\\(([^)]+)\\s-+\\)?\\([^*].+?\\)\\s-*\\(;\\|$\\)")
(defconst ledger-payee-directive-regex
(concat "^payee[ \t]+\\(.*?\\)[ \t]*$"))
(defconst ledger-payee-name-or-directive-regex
(format "\\(?:%s\\|%s\\)" ledger-payee-any-status-regex ledger-payee-directive-regex))
(defconst ledger-init-string-regex
"^--.+?\\($\\|[ ]\\)")
(defconst ledger-account-name-regex
"\\(?1:[^][(); \t\r\n]+\\(?: [^][(); \t\r\n]+\\)*\\)")
(defconst ledger-account-directive-regex
(concat "^account[ \t]+" ledger-account-name-regex))
(defconst ledger-account-name-maybe-virtual-regex
(concat "[[(]?" ledger-account-name-regex "[])]?"))
(defconst ledger-account-any-status-regex
(concat "^[ \t]+\\(?:[!*][ \t]*\\)?" ledger-account-name-maybe-virtual-regex))
;; This would incorrectly match "account (foo)", but writing the regexp this way
;; allows us to have just one match result
(defconst ledger-account-name-or-directive-regex
(format "\\(?:%s\\|%s\\)" ledger-account-any-status-regex ledger-account-directive-regex))
(defconst ledger-account-pending-regex
(concat "\\(^[ \t]+\\)!" ledger-account-name-maybe-virtual-regex))
(defconst ledger-account-cleared-regex
(concat "\\(^[ \t]+\\)*" ledger-account-name-maybe-virtual-regex))
(defmacro ledger-define-regexp (name regex docs &rest args)
"Simplify the creation of a Ledger regex and helper functions."
(let* ((regex (eval regex))
(group-count (regexp-opt-depth regex))
(defs
(list
`(defconst
,(intern (concat "ledger-" (symbol-name name) "-regexp"))
,regex
,docs)
`(defconst
,(intern (concat "ledger-regex-" (symbol-name name)
"-group--count"))
,group-count)))
(addend 0) last-group)
(if (null args)
(progn
(when (cl-plusp group-count)
(nconc
defs
(list
`(defconst
,(intern
(concat "ledger-regex-" (symbol-name name) "-group"))
1)))
(nconc
defs
(list
`(defmacro
,(intern (concat "ledger-regex-" (symbol-name name)))
(&optional string)
,(format "Return the match string for the %s" name)
(match-string
,(intern (concat "ledger-regex-" (symbol-name name)
"-group"))
string))))))
(while args
(let (arg var grouping target force-increment)
(setq arg (pop args))
(when (eq arg :separate)
(setq arg (pop args))
(setq force-increment t))
(if (symbolp arg)
(setq var arg target arg)
(cl-assert (listp arg))
(if (= 2 (length arg))
(setq var (car arg)
target (cadr arg))
(setq var (car arg)
grouping (cadr arg)
target (cl-caddr arg))))
(if (and last-group
(or (not (eq last-group (or grouping target)))
force-increment))
(cl-incf addend
(symbol-value
(intern-soft (concat "ledger-regex-"
(symbol-name last-group)
"-group--count")))))
(nconc
defs
(list
`(defconst
,(intern (concat "ledger-regex-" (symbol-name name)
"-group-" (symbol-name var)))
,(+ addend
(symbol-value
(intern-soft
(if grouping
(concat "ledger-regex-" (symbol-name grouping)
"-group-" (symbol-name target))
(concat "ledger-regex-" (symbol-name target)
"-group"))))))))
(nconc
defs
(list
`(defmacro
,(intern (concat "ledger-regex-" (symbol-name name)
"-" (symbol-name var)))
(&optional string)
,(format "Return the sub-group match for the %s %s."
name var)
(match-string
,(intern (concat "ledger-regex-" (symbol-name name)
"-group-" (symbol-name var)))
string))))
(setq last-group (or grouping target)))))
(cons 'eval-and-compile defs)))
(put 'ledger-define-regexp 'lisp-indent-function 1)
(ledger-define-regexp iso-date
(let ((sep '(or ?- ?/)))
(rx (group
(and (group (= 4 num))
(eval sep)
(group (and num (? num)))
(eval sep)
(group (and num (? num)))))))
"Match a single date, in its \"written\" form.")
(ledger-define-regexp full-date
(macroexpand
`(rx (and (regexp ,ledger-iso-date-regexp)
(? (and ?= (regexp ,ledger-iso-date-regexp))))))
"Match a compound date, of the form ACTUAL=EFFECTIVE"
(actual iso-date)
:separate
(effective iso-date))
(ledger-define-regexp state
(rx (group (any ?! ?*)))
"Match a transaction or posting's \"state\" character.")
(ledger-define-regexp code
(rx (and ?\( (group (+? (not (any ?\))))) ?\)))
"Match the transaction code.")
(ledger-define-regexp long-space
(rx (and (*? blank)
(or (and ? (or ? ?\t)) ?\t)))
"Match a \"long space\".")
(ledger-define-regexp note
(rx (group (+ nonl)))
"")
(ledger-define-regexp end-note
(macroexpand
`(rx (and (regexp ,ledger-long-space-regexp) ?\;
(regexp ,ledger-note-regexp))))
"")
(ledger-define-regexp full-note
(macroexpand
`(rx (and line-start (+ blank)
?\; (regexp ,ledger-note-regexp))))
"")
(ledger-define-regexp xact-line
(macroexpand
`(rx (and line-start
(regexp ,ledger-full-date-regexp)
(? (and (+ blank) (regexp ,ledger-state-regexp)))
(? (and (+ blank) (regexp ,ledger-code-regexp)))
(+ blank) (+? nonl)
(? (regexp ,ledger-end-note-regexp))
line-end)))
"Match a transaction's first line (and optional notes)."
(actual-date full-date actual)
(effective-date full-date effective)
state
code
(note end-note))
(ledger-define-regexp account
(rx (group (and (not (any blank ?\[ ?\( ?: ?\;)) (*? nonl))))
"")
(ledger-define-regexp account-kind
(rx (group (? (any ?\[ ?\())))
"")
(ledger-define-regexp full-account
(macroexpand
`(rx (and (regexp ,ledger-account-kind-regexp)
(regexp ,ledger-account-regexp)
(? (any ?\] ?\))))))
""
(kind account-kind)
(name account))
(ledger-define-regexp commodity-no-group
(rx (or (and ?\" (+ (not (any ?\"))) ?\")
(+ (not (any blank ?\n
digit
?- ?\[ ?\]
?. ?, ?\; ?+ ?* ?/ ?^ ?? ?: ?& ?| ?! ?=
?\< ?\> ?\{ ?\} ?\( ?\) ?@)))))
"")
(ledger-define-regexp commodity
(macroexpand
`(rx (group (regexp ,ledger-commodity-no-group-regexp))))
"")
(ledger-define-regexp amount-no-group
(rx (and (? ?-)
(+ digit)
(* (and (any ?. ?,) (+ digit)))))
"")
(ledger-define-regexp amount
(macroexpand
`(rx (group (regexp ,ledger-amount-no-group-regexp))))
"")
(ledger-define-regexp commoditized-amount
(macroexpand
`(rx (group
(or
;; Match commodity before amount, with optional
;; minus sign allowed before commodity.
;; Ex: "$100" or "-$100"
(and (opt ?-)
(regexp ,ledger-commodity-no-group-regexp)
(*? blank)
(regexp ,ledger-amount-no-group-regexp))
;; Match commodity after amount
;; Ex: "100 Dollars"
(and (regexp ,ledger-amount-no-group-regexp)
(*? blank)
(regexp ,ledger-commodity-no-group-regexp))))))
"Regexp to match a commodity with amount such as \"$100\" or \"100 Dollars\"")
(ledger-define-regexp commodity-annotations
(macroexpand
`(rx (* (+ blank)
(or (and ?\{ (regexp ,ledger-commoditized-amount-regexp) ?\})
(and ?\[ (regexp ,ledger-iso-date-regexp) ?\])
(and ?\( (not (any ?\))) ?\))))))
""
commoditized-amount
iso-date)
(ledger-define-regexp cost
(macroexpand
`(rx (and (or "@" "@@") (+ blank)
(regexp ,ledger-commoditized-amount-regexp))))
"")
(ledger-define-regexp balance-assertion
(macroexpand
`(rx (and ?= (+ blank)
(regexp ,ledger-commoditized-amount-regexp))))
"")
(ledger-define-regexp full-amount
(rx (group (+? (not (any ?\;)))))
"")
(ledger-define-regexp post-line
(macroexpand
`(rx (and line-start (+ blank)
(? (and (regexp ,ledger-state-regexp) (* blank)))
(regexp ,ledger-full-account-regexp)
(? (and (regexp ,ledger-long-space-regexp)
(regexp ,ledger-full-amount-regexp)))
(? (regexp ,ledger-end-note-regexp))
line-end)))
""
state
(account-kind full-account kind)
(account full-account name)
(amount full-amount)
(note end-note))
(defconst ledger-amount-regex
(concat "\\( \\|\t\\| \t\\)[ \t]*[-+=]? *"
"\\(?:" ledger-commodity-regexp " *\\)?"
;; We either match just a number after the commodity with no
;; decimal or thousand separators or a number with thousand
;; separators. If we have a decimal part starting with `,'
;; or `.', because the match is non-greedy, it must leave at
;; least one of those symbols for the following capture
;; group, which then finishes the decimal part.
"\\([-+=]? *\\(?:[0-9]+\\|[0-9,.]+?\\)\\)"
"\\([,.][0-9)]+\\)?"
"\\(?: *" ledger-commodity-regexp "\\)?"
"\\([ \t]*[@={]@?[^\n;]+?\\)?"
"\\([ \t]+;.+?\\|[ \t]*\\)?$"))
(ledger-define-regexp year
(rx (group (+ (any "0-9"))))
"")
(ledger-define-regexp payee
(rx (group (+? nonl)))
"")
(ledger-define-regexp iterate
(macroexpand `(rx (or (and (or "Y" "year")
(+ (syntax ?-))
(regexp ,ledger-year-regexp))
(and (regexp ,ledger-full-date-regexp)
(? (and (+ blank) (regexp ,ledger-state-regexp)))
(? (and (+ blank) (regexp ,ledger-code-regexp)))
(+ blank)
(regexp ,ledger-payee-regexp)
(? (regexp ,ledger-end-note-regexp))))))
""
year
(actual-date full-date actual)
(effective-date full-date effective)
state
code
payee
(note end-note))
(defconst ledger-incomplete-date-regexp
"\\(?:\\([0-9]\\{1,2\\}\\)[-/]\\)?\\([0-9]\\{1,2\\}\\)")
(defconst ledger-xact-start-regex
(concat "^" ledger-iso-date-regexp ;; subexp 1
"\\(=" ledger-iso-date-regexp "\\)?"
))
(defconst ledger-xact-after-date-regex
(concat "\\(?:[ \t]+\\([*!]\\)\\)?" ;; mark, subexp 1
"\\(?:[ \t]+\\((.*?)\\)\\)?" ;; code, subexp 2
"\\(?:[ \t]+\\(.+?\\)\\)?" ;; desc, subexp 3
"\\(?:\\(?:\t\\|[ \t]\\{2,\\}\\)\\(;[^\n]*\\)\\)?$" ;; comment, subexp 4
))
(defconst ledger-posting-regex
(concat "^[[:blank:]]+" ; initial white space
;; state and account, subexp 1
"\\(\\([*!]\\)?" ; state, subexp 2
"[[:blank:]]*\\(.*?\\)\\)?" ; account, subexp 3
"\\(?:\\(?:\t\\|[[:blank:]]\\{2,\\}\\)"
"\\([^;\n]*?\\)\\)?" ; amount, subexp 4
"\\(?:\\(?:\t\\|[[:blank:]]\\{2,\\}\\)"
"\\(;.*\\)\\)?$" ; comment, subexp 5
))
(defconst ledger-directive-start-regex
"[=~;#%|\\*[A-Za-z]")
(provide 'ledger-regex)
;;; ledger-regex.el ends here