update packages

This commit is contained in:
2022-01-04 21:35:17 +01:00
parent 1d5275c946
commit 8de00e5202
700 changed files with 42441 additions and 85378 deletions

View File

@@ -1,17 +1,17 @@
;;; parsebib.el --- A library for parsing bib files -*- lexical-binding: t -*-
;; Copyright (c) 2014-2017 Joost Kremers
;; Copyright (c) 2014-2021 Joost Kremers
;; All rights reserved.
;; Author: Joost Kremers <joostkremers@fastmail.fm>
;; Maintainer: Joost Kremers <joostkremers@fastmail.fm>
;; Created: 2014
;; Version: 2.3
;; Package-Version: 20200513.2352
;; Package-Commit: 3497b6068d78ae15ba1eaf94e4315d18e9ae6b00
;; Version: 3.0
;; Package-Version: 20211208.2335
;; Package-Commit: 3d46fb939371664682c711750367de088aa66f92
;; Keywords: text bibtex
;; URL: https://github.com/joostkremers/parsebib
;; Package-Requires: ((emacs "24.3"))
;; Package-Requires: ((emacs "25.1"))
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions
@@ -45,99 +45,114 @@
(require 'bibtex)
(require 'cl-lib)
(eval-when-compile (require 'subr-x)) ; for `string-join'.
(eval-and-compile (unless (fboundp 'json-parse-buffer)
(require 'json)
(defvar json-object-type)))
(defvar parsebib--biblatex-inheritances '(("all"
"all"
(("ids" . none)
("crossref" . none)
("xref" . none)
("entryset" . none)
("entrysubtype" . none)
("execute" . none)
("label" . none)
("options" . none)
("presort" . none)
("related" . none)
("relatedoptions" . none)
("relatedstring" . none)
("relatedtype" . none)
("shorthand" . none)
("shorthandintro" . none)
("sortkey" . none)))
(define-error 'parsebib-entry-type-error "[Parsebib] Illegal entry type at point" 'error)
("mvbook, book"
"inbook, bookinbook, suppbook"
(("author" . "author")
("author" . "bookauthor")))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; BibTeX / biblatex parser ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
("mvbook"
"book, inbook, bookinbook, suppbook"
(("title" . "maintitle")
("subtitle" . "mainsubtitle")
("titleaddon" . "maintitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
(defvar parsebib-hashid-fields nil
"List of fields used to create a hash id for each entry.
Hash ids can only be created for BibTeX/biblatex files. The hash
id is stored in the entry in the special field `=hashid='.")
("mvcollection, mvreference"
"collection, reference, incollection, inreference, suppcollection"
(("title" . "maintitle")
("subtitle" . "mainsubtitle")
("titleaddon" . "maintitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
(defvar parsebib--biblatex-inheritances '(;; Source Target
("all" "all"
(("ids" . none)
("crossref" . none)
("xref" . none)
("entryset" . none)
("entrysubtype" . none)
("execute" . none)
("label" . none)
("options" . none)
("presort" . none)
("related" . none)
("relatedoptions" . none)
("relatedstring" . none)
("relatedtype" . none)
("shorthand" . none)
("shorthandintro" . none)
("sortkey" . none)))
("mvproceedings"
"proceedings, inproceedings"
(("title" . "maintitle")
("subtitle" . "mainsubtitle")
("titleaddon" . "maintitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("mvbook, book" "inbook, bookinbook, suppbook"
(("author" . "author")
("author" . "bookauthor")))
("book"
"inbook, bookinbook, suppbook"
(("title" . "booktitle")
("subtitle" . "booksubtitle")
("titleaddon" . "booktitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("mvbook" "book, inbook, bookinbook, suppbook"
(("title" . "maintitle")
("subtitle" . "mainsubtitle")
("titleaddon" . "maintitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
("collection, reference"
"incollection, inreference, suppcollection"
(("title" . "booktitle")
("subtitle" . "booksubtitle")
("titleaddon" . "booktitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("mvcollection, mvreference" "collection, reference, incollection, inreference, suppcollection"
(("title" . "maintitle")
("subtitle" . "mainsubtitle")
("titleaddon" . "maintitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
("proceedings"
"inproceedings"
(("title" . "booktitle")
("subtitle" . "booksubtitle")
("titleaddon" . "booktitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("mvproceedings" "proceedings, inproceedings"
(("title" . "maintitle")
("subtitle" . "mainsubtitle")
("titleaddon" . "maintitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("book" "inbook, bookinbook, suppbook"
(("title" . "booktitle")
("subtitle" . "booksubtitle")
("titleaddon" . "booktitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("collection, reference" "incollection, inreference, suppcollection"
(("title" . "booktitle")
("subtitle" . "booksubtitle")
("titleaddon" . "booktitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("proceedings" "inproceedings"
(("title" . "booktitle")
("subtitle" . "booksubtitle")
("titleaddon" . "booktitleaddon")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none)))
;; Source Target
("periodical" "article, suppperiodical"
(("title" . "journaltitle")
("subtitle" . "journalsubtitle")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none))))
("periodical"
"article, suppperiodical"
(("title" . "journaltitle")
("subtitle" . "journalsubtitle")
("shorttitle" . none)
("sorttitle" . none)
("indextitle" . none)
("indexsorttitle" . none))))
"Inheritance scheme for BibLaTeX cross-referencing.
Inheritances are specified for pairs of source and target entry
type, where the target is the cross-referencing entry and the
@@ -161,36 +176,11 @@ target field is set to the symbol `none'.")
;; fact not be necessary (or desirable) to distinguish the two, but until
;; someone complains, I'll keep it this way.
(defconst parsebib--bibtex-identifier "[^\"@\\#%',={}() \t\n\f]+" "Regexp describing a licit BibTeX identifier.")
(defconst parsebib--key-regexp "[^\"@\\#%',={} \t\n\f]+" "Regexp describing a licit key.")
(defconst parsebib--key-regexp "[^\"@\\#%',={} \t\n\f]+" "Regexp describing a licit key.")
(defconst parsebib--entry-start "^[ \t]*@" "Regexp describing the start of an entry.")
;; Emacs 24.3 compatibility code.
(unless (fboundp 'define-error)
;; This definition is simply copied from the Emacs 24.4 sources
(defun define-error (name message &optional parent)
"Define NAME as a new error signal.
MESSAGE is a string that will be output to the echo area if such an error
is signaled without being caught by a `condition-case'.
PARENT is either a signal or a list of signals from which it inherits.
Defaults to `error'."
(unless parent (setq parent 'error))
(let ((conditions
(if (consp parent)
(apply #'nconc
(mapcar (lambda (parent)
(cons parent
(or (get parent 'error-conditions)
(error "Unknown signal `%s'" parent))))
parent))
(cons parent (get parent 'error-conditions)))))
(put name 'error-conditions
(delete-dups (copy-sequence (cons name conditions))))
(when message (put name 'error-message message)))))
(define-error 'parsebib-entry-type-error "Illegal entry type" 'error)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; matching and parsing stuff ;;
;; Matching and parsing stuff ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defun parsebib--looking-at-goto-end (str &optional match)
@@ -228,17 +218,17 @@ if a matching delimiter was found."
"Move forward to the closing brace matching the opening brace at point."
(with-syntax-table bibtex-braced-string-syntax-table
(forward-sexp 1)
;; if forward-sexp does not result in an error, we want to return t
;; If forward-sexp does not result in an error, we want to return t.
t))
(defun parsebib--match-quote-forward ()
"Move to the closing double quote matching the quote at point."
(with-syntax-table bibtex-quoted-string-syntax-table
(forward-sexp 1)
;; if forward-sexp does not result in an error, we want to return t
;; If forward-sexp does not result in an error, we want to return t.
t))
(defun parsebib--parse-value (limit &optional strings)
(defun parsebib--parse-bib-value (limit &optional strings)
"Parse value at point.
A value is either a field value or a @String expansion. Return
the value as a string. No parsing is done beyond LIMIT, but note
@@ -262,13 +252,13 @@ double quotes around field values are removed."
(goto-char (match-end 0)))
((looking-at "[[:space:]]*#[[:space:]]*")
(goto-char (match-end 0)))
(t (forward-char 1)))) ; so as not to get stuck in an infinite loop.
(t (forward-char 1)))) ; So as not to get stuck in an infinite loop.
(if strings
(string-join (parsebib--expand-strings (nreverse res) strings))
(string-join (nreverse res) " # "))))
;;;;;;;;;;;;;;;;;;;;;
;; expanding stuff ;;
;; Expanding stuff ;;
;;;;;;;;;;;;;;;;;;;;;
(defun parsebib--expand-strings (strings abbrevs)
@@ -279,7 +269,7 @@ Otherwise, if the string is enclosed in braces {} or double
quotes \"\", remove the delimiters. In addition, newlines and
multiple spaces in the string are replaced with a single space."
(mapcar (lambda (str)
(setq str (replace-regexp-in-string "[ \t\n\f]+" " " str))
(setq str (replace-regexp-in-string "[ \t\n\f[:space:]]+" " " str))
(cond
((gethash str abbrevs))
((string-match "\\`[\"{]\\(.*?\\)[\"}]\\'" str)
@@ -356,9 +346,9 @@ for INHERITANCES to be nil."
nil)
(t target-field))))
;;;;;;;;;;;;;;;;;;;
;; low-level API ;;
;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Low-level BibTeX/biblatex API ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defun parsebib-find-next-item (&optional pos)
"Find the first (potential) BibTeX item following POS.
@@ -428,8 +418,8 @@ expansion."
(point))))
(parsebib--looking-at-goto-end (concat "[({]\\(" parsebib--bibtex-identifier "\\)[[:space:]]*=[[:space:]]*"))
(let ((abbr (match-string-no-properties 1)))
(when (and abbr (> (length abbr) 0)) ; if we found an abbrev
(let ((expansion (parsebib--parse-value limit strings)))
(when (and abbr (> (length abbr) 0)) ; If we found an abbrev.
(let ((expansion (parsebib--parse-bib-value limit strings)))
(goto-char limit)
(cons abbr expansion)))))))
@@ -449,7 +439,18 @@ point."
(when (parsebib--match-paren-forward)
(buffer-substring-no-properties beg (point))))))
(defun parsebib-read-entry (type &optional pos strings)
(defun parsebib--get-hashid-string (fields)
"Create a string from the contents of FIELDS to compute a hash id."
(cl-loop
for field in parsebib-hashid-fields
collect (or
;; Remove braces {}.
(replace-regexp-in-string "^{\\|}\\'" "" (cdr (assoc-string field fields 'case-fold)))
"")
into hashid-fields
finally return (mapconcat #'identity hashid-fields "")))
(defun parsebib-read-entry (type &optional pos strings fields)
"Read a BibTeX entry of type TYPE at the line POS is on.
TYPE should be a string and should not contain the @
sign. The return value is the entry as an alist of (<field> .
@@ -457,59 +458,79 @@ sign. The return value is the entry as an alist of (<field> .
alist, the entry key is provided in the field \"=key=\" and the
entry type in the field \"=type=\".
If `parsebib-hashid-fields' is non-nil, a hash ID is added in the
field \"=hashid=\". The hash is computed on the basis of the
contents of the fields listed in `parsebib-hashid-fields' using
the function `secure-hash' and the `sha256' algorithm.
POS can be a number or a marker. It does not have to be at the
beginning of a line, but the entry must start at the beginning of
the line POS is on. If POS is nil, it defaults to point.
ENTRY should not be \"Comment\", \"Preamble\" or \"String\", but
is otherwise not limited to any set of possible entry types. If
so required, the calling function has to ensure that the entry
type is valid.
is otherwise not limited to any set of possible entry types.
If STRINGS is provided, it should be a hash table with string
abbreviations, which are used to expand abbrevs in the entry's
fields."
fields.
FIELDS is a list of the field names (as strings) to be read and
included in the result. Fields not in the list are ignored,
except \"=key=\" and \"=type=\", which are always included. Case
is ignored when comparing fields to the list in FIELDS. If
FIELDS is nil, all fields are returned."
(unless (member-ignore-case type '("comment" "preamble" "string"))
(when pos (goto-char pos))
(beginning-of-line)
(when (parsebib--looking-at-goto-end (concat parsebib--entry-start type "[[:space:]]*[\(\{]"))
;; find the end of the entry and the beginning of the entry key
;; Find the end of the entry and the beginning of the entry key.
(let* ((limit (save-excursion
(backward-char)
(parsebib--match-paren-forward)
(point)))
(beg (progn
(skip-chars-forward " \n\t\f") ; note the space!
(skip-chars-forward " \n\t\f") ; Note the space!
(point)))
(key (when (parsebib--looking-at-goto-end (concat "\\(" parsebib--key-regexp "\\)[ \t\n\f]*,") 1)
(buffer-substring-no-properties beg (point)))))
(or key (setq key "")) ; if no key was found, we pretend it's empty and try to read the entry anyway
(skip-chars-forward "^," limit) ; move to the comma after the entry key
(let ((fields (cl-loop for field = (parsebib--find-bibtex-field limit strings)
while field collect field)))
(or key (setq key "")) ; If no key was found, we pretend it's empty and try to read the entry anyway.
(skip-chars-forward "^," limit) ; Move to the comma after the entry key.
(let ((fields (cl-loop for field = (parsebib--parse-bibtex-field limit strings fields)
while field
if (consp field) collect field)))
(push (cons "=type=" type) fields)
(push (cons "=key=" key) fields)
(if parsebib-hashid-fields
(push (cons "=hashid=" (secure-hash 'sha256 (parsebib--get-hashid-string fields))) fields))
(nreverse fields))))))
(defun parsebib--find-bibtex-field (limit &optional strings)
"Find the field after point.
(defun parsebib--parse-bibtex-field (limit &optional strings fields)
"Parse the field starting at point.
Do not search beyond LIMIT (a buffer position). Return a
cons (FIELD . VALUE), or nil if no field was found.
If STRINGS is provided it should be a hash table with string
abbreviations, which are used to expand abbrevs in the field's
value."
(skip-chars-forward "\"#%'(),={} \n\t\f" limit) ; move to the first char of the field name
(unless (>= (point) limit) ; if we haven't reached the end of the entry
STRINGS is a hash table with string abbreviations, which are used
to expand abbrevs in the field's value.
FIELDS is a list of the field names (as strings) to be read and
included in the result. Fields not in the list are ignored,
except \"=key=\" and \"=type=\", which are always included. Case
is ignored when comparing fields to the list in FIELDS. If
FIELDS is nil, all fields are returned."
(skip-chars-forward "\"#%'(),={} \n\t\f" limit) ; Move to the first char of the field name.
(unless (>= (point) limit) ; If we haven't reached the end of the entry.
(let ((beg (point)))
(if (parsebib--looking-at-goto-end (concat "\\(" parsebib--bibtex-identifier "\\)[[:space:]]*=[[:space:]]*") 1)
(let ((field-type (buffer-substring-no-properties beg (point))))
(let ((field-contents (parsebib--parse-value limit strings)))
(cons field-type field-contents)))))))
(if (or (not fields)
(member-ignore-case field-type fields))
(cons field-type (parsebib--parse-bib-value limit strings))
(parsebib--parse-bib-value limit) ; Skip over the field value.
:ignore)))))) ; Ignore this field but keep the `cl-loop' in `parsebib-read-entry' going.
;;;;;;;;;;;;;;;;;;;;
;; high-level API ;;
;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; High-level BibTeX/biblatex API ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(defun parsebib-collect-preambles ()
"Collect all @Preamble definitions in the current buffer.
@@ -535,50 +556,61 @@ Return a list of strings, each string a separate @Comment."
(push (parsebib-read-comment) res)))
(nreverse (delq nil res)))))
(defun parsebib-collect-strings (&optional hash expand-strings)
(cl-defun parsebib-collect-strings (&key strings expand-strings)
"Collect all @String definitions in the current buffer.
Return value is a hash with the abbreviations as keys and the
expansions as values. If HASH is a hash table with test function
`equal', it is used to store the @String definitions. If
EXPAND-STRINGS is non-nil, @String expansions are expanded
themselves using the @String definitions already stored in HASH."
(or (and (hash-table-p hash)
(eq 'equal (hash-table-test hash)))
(setq hash (make-hash-table :test #'equal)))
expansions as values. If STRINGS is a hash table with test
function `equal', it is used to store the @String definitions.
If EXPAND-STRINGS is non-nil, @String expansions are expanded
themselves using the @String definitions already stored in
STRINGS."
(or (and (hash-table-p strings)
(eq 'equal (hash-table-test strings)))
(setq strings (make-hash-table :test #'equal)))
(save-excursion
(goto-char (point-min))
(cl-loop with string = nil
for item = (parsebib-find-next-item)
while item do
(when (cl-equalp item "string")
(setq string (parsebib-read-string nil (if expand-strings hash)))
(puthash (car string) (cdr string) hash)))
hash))
(setq string (parsebib-read-string nil (if expand-strings strings)))
(puthash (car string) (cdr string) strings)))
strings))
(defun parsebib-collect-entries (&optional hash strings inheritance)
"Collect all entries in the current buffer.
Return value is a hash table containing the entries. If HASH is
a hash table, with test function `equal', it is used to store the
entries. If STRINGS is non-nil, it should be a hash table of
string definitions, which are used to expand abbreviations used
in the entries.
(cl-defun parsebib-collect-bib-entries (&key entries strings inheritance fields)
"Collect all BibTeX / biblatex entries in the current buffer.
Return value is a hash table containing the entries. If ENTRIES
is a hash table with test function `equal', it is used to store
the entries collected in the buffer. Note that ENTRIES does not
have to be empty. It may contain entries from a previous parse.
If STRINGS is non-nil, it should be a hash table of string
definitions, which are used to expand abbreviations used in the
entries.
If INHERITANCE is non-nil, cross-references in the entries are
resolved: if the crossref field of an entry points to an entry
already in HASH, the fields of the latter that do not occur in
the entry are added to it. INHERITANCE indicates the inheritance
schema used for determining which fields inherit from which
fields. It can be a symbol `BibTeX' or `biblatex', or it can be
an explicit inheritance schema. (See the variable
already in ENTRIES (which includes the entries that appear
earlier in the buffer), the fields of the latter that do not occur
in the entry are added to it. INHERITANCE indicates the
inheritance schema used for determining which fields inherit from
which fields. It can be a symbol `BibTeX' or `biblatex', or it
can be an explicit inheritance schema. (See the variable
`parsebib--biblatex-inheritances' for details on the structure of
such an inheritance schema.) It can also be the symbol t, in
which case the local variable block is checked for a
dialect (using the variable `bibtex-dialect'), or, if no such
local variable is found, the value of the variable
`bibtex-dialect'."
(or (and (hash-table-p hash)
(eq 'equal (hash-table-test hash)))
(setq hash (make-hash-table :test #'equal)))
`bibtex-dialect'.
FIELDS is a list of the field names (as strings) to be read and
included in the result. Fields not in the list are ignored,
except \"=key=\" and \"=type=\", which are always included. Case
is ignored when comparing fields to the list in FIELDS. If
FIELDS is nil, all fields are returned."
(or (and (hash-table-p entries)
(eq 'equal (hash-table-test entries)))
(setq entries (make-hash-table :test #'equal)))
(if (eq inheritance t)
(setq inheritance (or (parsebib-find-bibtex-dialect)
bibtex-dialect
@@ -589,12 +621,12 @@ local variable is found, the value of the variable
for entry-type = (parsebib-find-next-item)
while entry-type do
(unless (member-ignore-case entry-type '("preamble" "string" "comment"))
(setq entry (parsebib-read-entry entry-type nil strings))
(setq entry (parsebib-read-entry entry-type nil strings fields))
(if entry
(puthash (cdr (assoc-string "=key=" entry)) entry hash))))
(puthash (cdr (assoc-string "=key=" entry)) entry entries))))
(when inheritance
(parsebib-expand-xrefs hash inheritance))
hash))
(parsebib-expand-xrefs entries inheritance))
entries))
(defun parsebib-find-bibtex-dialect ()
"Find the BibTeX dialect of a file if one is set.
@@ -612,9 +644,9 @@ file. Return nil if no dialect is found."
(string-match (concat "bibtex-dialect: " (regexp-opt (mapcar #'symbol-name bibtex-dialect-list) t)) comment))
(intern (match-string 1 comment))))))))
(defun parsebib-parse-buffer (&optional entries strings expand-strings inheritance)
(cl-defun parsebib-parse-bib-buffer (&key entries strings expand-strings inheritance fields)
"Parse the current buffer and return all BibTeX data.
Return list of five elements: a hash table with the entries, a
Return a list of five elements: a hash table with the entries, a
hash table with the @String definitions, a list of @Preamble
definitions, a list of @Comments and the BibTeX dialect, if
present in the file.
@@ -641,7 +673,13 @@ such an inheritance schema.) It can also be the symbol t, in
which case the local variable block is checked for a
dialect (using the variable `bibtex-dialect'), or, if no such
local variable is found, the value of the variable
`bibtex-dialect'."
`bibtex-dialect'.
FIELDS is a list of the field names (as strings) to be read and
included in the result. Fields not in the list are ignored,
except \"=key=\" and \"=type=\", which are always included. Case
is ignored when comparing fields to the list in FIELDS. If
FIELDS is nil, all fields are returned."
(save-excursion
(goto-char (point-min))
(or (and (hash-table-p entries)
@@ -666,12 +704,321 @@ local variable is found, the value of the variable
((cl-equalp item "comment")
(push (parsebib-read-comment) comments))
((stringp item)
(let ((entry (parsebib-read-entry item nil (if expand-strings strings))))
(let ((entry (parsebib-read-entry item nil (if expand-strings strings) fields)))
(when entry
(puthash (cdr (assoc-string "=key=" entry)) entry entries))))))
(when inheritance (parsebib-expand-xrefs entries (if (eq inheritance t) dialect inheritance)))
(list entries strings (nreverse preambles) (nreverse comments) dialect))))
;;;;;;;;;;;;;;;;;;
;; CSL-JSON API ;;
;;;;;;;;;;;;;;;;;;
(cl-defun parsebib-parse-json-buffer (&key entries stringify year-only fields)
"Parse the current buffer and return all CSL-JSON data.
The return value is a hash table containing all the elements.
The hash table's keys are the \"id\" values of the entries, the
hash table's values are alists as returned by `json-parse-buffer'
or `json-read'
If ENTRIES is a hash table with test function `equal', it is used
to store the entries. Any existing entries with identical keys
are overwritten.
If STRINGIFY is non-nil, JSON values that are not
strings (notably name and date fields) are converted to strings.
If additionally YEAR-ONLY is non-nil, dates are shortened to just
the year part.
FIELDS is a list of field names (as symbols) to be read and
included in the result. Fields not in the list are ignored,
except `id' and `type', which are always included. If FIELDS is
nil, all fields are returned.
If a JSON object is encountered that does not have an \"id\"
field, a `parsebib-entry-type-error' is raised."
(or (and (hash-table-p entries)
(eq (hash-table-test entries) 'equal))
(setq entries (make-hash-table :test #'equal)))
(when fields
(setq fields (append '(id type) fields)))
(let ((parse (if (and (fboundp 'json-serialize)
(json-serialize '((test . 1)))) ; Returns nil if native json support isn't working for some reason.
(lambda ()
(json-parse-buffer :object-type 'alist))
(lambda ()
(let ((json-object-type 'alist))
(json-read))))))
;; We do not read the entire file in one go, but instead parse each entry
;; separately. Large bibliographies would otherwise be returned as one
;; gigantic vector, which then needs to be converted to a hash table. If we
;; need to convert some of the data because `stringify' is t, the data is
;; held in memory twice.
(save-excursion
(goto-char (point-min))
;; JSON is pretty strict, not even comments are allowed. CSL-JSON
;; requires that the file is essentially one big array, so we know that
;; the first non-whitespace character in the file must be an opening
;; bracket;
(if (not (looking-at-p "[\n\t ]*\\["))
(error "[Parsebib] Not a valid CSL-JSON file"))
(let ((continue t))
(while continue
;; We also know that the first non-whitespace character after that
;; must be an opening brace:
(skip-chars-forward "^{")
(if-let ((entry (funcall parse))
(id (alist-get 'id entry)))
(progn
(when fields
(setq entry (seq-filter (lambda (elt)
(memq (car elt) fields))
entry)))
(puthash id (if stringify
(parsebib-stringify-json entry year-only)
entry)
entries))
(signal 'parsebib-entry-type-error (list (point))))
;; Parsing an entry moves point to the end of the entry. The next
;; character must be a comma if there is another entry. If we're not
;; seeing a comma, we've reached the end of the file:
(if (not (looking-at-p "[\n-t ]*,"))
(setq continue nil))))))
entries)
(defun parsebib-stringify-json (entry &optional year-only)
"Return ENTRY with all non-string values converted to strings.
ENTRY is a CSL-JSON entry in the form of an alist. ENTRY is
modified in place. Return value is ENTRY. If YEAR-ONLY is
non-nil, date fields are shortened to just the year."
(mapc (lambda (field)
(unless (stringp (alist-get field entry))
(setf (alist-get field entry)
(parsebib-stringify-json-field (assq field entry) year-only))))
(mapcar #'car entry))
entry)
(defvar parsebib--json-name-fields '(author
collection-editor
composer
container-author
director
editor
editorial-director
illustrator
interviewer
original-author
recipient
reviewed-author
translator))
(defvar parsebib--json-date-fields '(accessed
container
event-date
issued
original-date
submitted))
(defvar parsebib--json-number-fields '(chapter-number
collection-number
edition
issue
number
number-of-pages
number-of-volumes
volume))
(defvar parsebib-json-name-field-template "{non-dropping-particle }{family, }{given}{ dropping-particle}{, suffix}{literal}"
"Template used to display name fields.")
(defvar parsebib-json-name-field-separator " and "
"Separator used to concatenate names in a name field.")
(defvar parsebib-json-field-separator ", "
"Separator used to concatenate items of array fields.")
(defun parsebib--process-template (template items)
"Process TEMPLATE and return a formatted string.
ITEMS is an alist, the keys of which may occur in TEMPLATE.
Braced occurrences of the keys in ITEMS are replaced with the
corresponding values. Note that the keys in ITEMS should be
symbols."
(cl-flet ((create-replacements (match)
(save-match-data
(string-match "{\\([^A-Za-z]*\\)\\([A-Za-z][A-za-z-]+\\)\\([^A-Za-z]*\\)}" match)
(let* ((pre (match-string 1 match))
(key (match-string 2 match))
(post (match-string 3 match))
(value (alist-get (intern key) items)))
(if value
(format "%s%s%s" pre value post)
"")))))
(replace-regexp-in-string "{.*?}" #'create-replacements template nil t)))
(defun parsebib-stringify-json-field (field &optional short)
"Return the value of FIELD as a string.
FIELD is a cons cell that constitutes a CSL-JSON field-value
pair. The car is the key, the cdr the value. If the value is a
string, return it with sequences of white space reduced to a
single space. Otherwise, convert it into a string. SHORT is
only relevant for date fields: if it is non-nil, return just a
year, or the string \"XXXX\" if no year part is present."
(let ((key (car field))
(value (cdr field)))
(cond
((stringp value)
(replace-regexp-in-string "[ \t\n\f[:space:]]+" " " value))
((numberp value)
(format "%s" value))
((memq key parsebib--json-name-fields)
(parsebib--json-stringify-name-field value))
((memq key parsebib--json-date-fields)
(parsebib--json-stringify-date-field value short))
;; In CSL-JSON v1.0, the only array field besides name and date fields
;; is "categories". It has an array of strings as value, so the `format'
;; isn't strictly necessary. We do it this way just to be on the safe
;; side.
((arrayp value)
(mapconcat (lambda (e) (format "%s" e)) value parsebib-json-field-separator))
;; This clause should never be reached.
(t (replace-regexp-in-string "\n" " " (format "%s" value))))))
(defun parsebib--json-stringify-name-field (names)
"Convert NAMES to a string.
NAMES is the value of a CSL-JSON name field, a vector of alists.
Conversion is done on the basis of
`parsebib-json-name-field-template': each field in this template
is replaced with the value of the field in NAME. Fields that
have no value in NAME are ignored."
(mapconcat (lambda (name)
(parsebib--process-template parsebib-json-name-field-template name))
names
parsebib-json-name-field-separator))
(defun parsebib--json-stringify-date-field (date &optional short)
"Convert DATE to a string.
DATE is the value of a CSL-JSON date field. If SHORT is non-nil,
try to return only a year (in a date range, just the year of the
first date). If no year part is present, SHORT returns
\"XXXX\"."
(if short
(if-let ((date-parts (alist-get 'date-parts date))
(first-date (aref date-parts 0))
(year (aref first-date 0)))
(format "%s" year)
"XXXX")
;; Work with a copy of the original alist.
(setq date (copy-sequence date))
;; Set start-date and end-date.
(when-let ((date-parts (alist-get 'date-parts date)))
(let* ((start-date (aref date-parts 0))
(end-date (if (= (length date-parts) 2)
(aref date-parts 1))))
(setf (alist-get 'date-parts date nil :remove) nil)
(setf (alist-get 'start-date date)
(parsebib--json-stringify-date-part start-date))
(if end-date (setf (alist-get 'end-date date)
(parsebib--json-stringify-date-part end-date)))))
;; Set season.
(when-let ((season (alist-get 'season date)))
(if (numberp season)
(setf (alist-get 'season date)
(aref ["Spring" "Summer" "Autumn" "Winter"] (1- season)))))
;; Set circa.
(when-let ((circa (alist-get 'circa date)))
(setf (alist-get 'circa date) "ca."))
;; Now convert the date.
(parsebib--process-template "{circa }{season }{start-date}{/end-date}{literal}{raw}"
date)))
(defun parsebib--json-stringify-date-part (date-parts)
"Convert DATE-PARTS into a string.
DATE-PARTS is a sequence with up to three numeric elements: a
year, a month and a day."
(parsebib--process-template "{year}{-month}{-day}"
(seq-mapn #'cons '(year month day) date-parts)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Format-independent API ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(cl-defun parsebib-parse (files &key entries strings (display t) fields)
"Parse one or more bibliography files.
FILES is the list of files to parse. All bibliographic entries
in FILES are collected and returned in a single hash table.
FILES can be a list of `.bib' or `.json' files, or a combination
of these. FILES can also be a string, which should be the path
to a single bibliography file.
ENTRIES, if provided, should be a hash table with test function
`equal', it is used to store the entries. Any existing entries
with identical keys are overwritten. If provided, ENTRIES is
also the return value. If ENTRIES is nil, a new hash table is
created and returned.
STRINGS, similarly a hash table with test function `equal', is
used to store the @String definitions. Although STRINGS is not
returned, it is modified in place and can therefore be used to
collect the @String definitions in the files being parsed.
If DISPLAY is non-nil, field values are returned in a way that is
suitable for display: in `.bib' files, @String abbreviations are
expanded, in `.json' files, values that are not strings are
converted to strings. Furthermore, sequences of white space
characters (including newlines) are reduced to a single space.
Specifically, setting DISPLAY means setting the arguments
EXPAND-STRINGS and INHERITANCES in the function
`parsebib-parse-bib-buffer' and setting STRINGIFY and YEAR-ONLY
in the function `parsebib-parse-json-buffer'. DISPLAY is simply
passed on to these arguments, which means that it can be set to
anything that INHERITANCES in `parsebib-parse-bib-buffer'
accepts. (The other arguments only distinguish between nil and
non-nil.) Note that DISPLAY defaults to t.
FIELDS is a list of the field names to be read and included in
the result. Fields not in the list are ignored. Note that field
names should be strings; when parsing a `.json' file, they are
converted to symbols. See the doc strings of
`parsebib-parse-bib-buffer' and `parsebib-parse-json-buffer' for
details. If FIELDS is nil, all fields are returned."
(or (and (hash-table-p entries)
(eq (hash-table-test entries) 'equal))
(setq entries (make-hash-table :test #'equal)))
(or (and (hash-table-p strings)
(eq (hash-table-test strings) 'equal))
(setq strings (make-hash-table :test #'equal)))
(when (stringp files)
(setq files (list files)))
(mapc (lambda (file)
(with-temp-buffer
(insert-file-contents file)
(cond
((string= (file-name-extension file t) ".bib")
(parsebib-parse-bib-buffer :entries entries
:strings strings
:expand-strings display
:inheritance display
:fields fields))
((string= (file-name-extension file t) ".json")
(parsebib-parse-json-buffer :entries entries
:stringify display
:year-only display
:fields (mapcar #'intern fields)))
(t (error "[Parsebib] Not a bibliography file: %s" file)))))
files)
entries)
(provide 'parsebib)
;;; parsebib.el ends here