522 lines
17 KiB
EmacsLisp
522 lines
17 KiB
EmacsLisp
;;; org-ref-natbib-bbl-citeproc.el --- A bibtex + natbib BBL-based citeproc -*- lexical-binding: t; -*-
|
||
;;
|
||
;; Copyright (C) 2021-2024 John Kitchin
|
||
|
||
;; Author: John Kitchin <jkitchin@andrew.cmu.edu>
|
||
;; Keywords:
|
||
|
||
;; This program is free software; you can redistribute it and/or modify
|
||
;; it under the terms of the GNU General Public License as published by
|
||
;; the Free Software Foundation, either version 3 of the License, or
|
||
;; (at your option) any later version.
|
||
|
||
;; This program is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;; GNU General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
;;
|
||
;;; Commentary:
|
||
;;
|
||
;; This is a citation processor that uses the bbl from bibtex and natbib.
|
||
;;
|
||
;; It is kind of hacky. I parse the bbl file into entries, and then replace the
|
||
;; in-text citations and the bibliography with data from the parsed bbl.
|
||
;;
|
||
;; The CSL exporter is probably better all around than this, but this does have
|
||
;; the advantage of using LaTeX/bibtex for formatting.
|
||
;;
|
||
;; TODO: pre/post notes are not handled in the bbl file. They have to be handled
|
||
;; separately
|
||
;;
|
||
;; This seems to work ok for
|
||
;; See https://gking.harvard.edu/files/natnotes2.pdf
|
||
;;
|
||
;; Basically supports stylying options in natbib including [numbers, super, authoryear] for style,
|
||
;; and [round, square, curly, angle] for in-text citations
|
||
;; and [comma, semicolon; colon] for citation separators
|
||
;; and [sort, compress, sort&compress] if you want to sort and/or compress citation numbers.
|
||
;;
|
||
;; longnamesfirst is not currently supported.
|
||
;;
|
||
;; To use this for HTML export you do something like this:
|
||
;; (let ((org-export-before-parsing-hook '(org-ref-bbl-preprocess)))
|
||
;; (org-open-file (org-html-export-to-html)))
|
||
;;
|
||
;;
|
||
;;; Code:
|
||
|
||
(declare-function org-ref-get-cite-links "org-ref-core" ())
|
||
(declare-function org-ref-parse-cite-path "org-ref-core" (path))
|
||
|
||
(defvar org-ref-natmove) ;silence compiler
|
||
|
||
(defun org-bbl-clean-string (s)
|
||
"Clean S of markups.
|
||
This replaces some LaTeX markup and bibtex markups including:
|
||
\\emph{}, \\doi{}, \\url{},\\citename, and {\\em ...} {\\bf ...},
|
||
removes things like \\protect and \\penalty0, and replaces ~ with
|
||
a space. This function is surely not complete, and it does not
|
||
cover any math (yet)."
|
||
(with-temp-buffer
|
||
(insert s)
|
||
;; I convert these to org syntax.
|
||
;; \emph... -> org italics
|
||
;; TODO This list is surely not complete.
|
||
;; convert \cmd{stuff} to the format string
|
||
(let (p1 p2 p3 p4 ss cmd-p)
|
||
(cl-loop for (cmd . repl-format) in '(("emph" . "/%s/")
|
||
("doi" . "%s")
|
||
("url" . "%s")
|
||
("citename" . "%s")
|
||
;; natexlab seems to be for differentiating similar authors.
|
||
("natexlab" . "(%s)")
|
||
;; I use \ce{} a lot, and just strip it here.
|
||
("ce" . "%s"))
|
||
do
|
||
(goto-char (point-min))
|
||
(while (search-forward (format "\\%s{" cmd) nil t)
|
||
(setq p1 (match-beginning 0)
|
||
p2 (point))
|
||
(backward-char)
|
||
(forward-list)
|
||
(setq p4 (point)
|
||
p3 (- (point) 1))
|
||
(setq s (buffer-substring p2 p3))
|
||
(cl--set-buffer-substring p1 p4 (format repl-format s))))
|
||
|
||
;; some times we have these markups TODO I bet there are more Note I use
|
||
;; non-breaking spaces here to make the org syntax ok. It doesn't seem to
|
||
;; do quite the right thing in the bib-items though.
|
||
(cl-loop for (markup . fmt) in '(("\\em" . " /%s/ ")
|
||
("\\it" . " /%s/ ")
|
||
("\\bf" . " *%s* ")
|
||
("\\tt" . " =%s= "))
|
||
do
|
||
(goto-char (point-min))
|
||
(while (search-forward (concat "{" markup) nil t)
|
||
(setq p1 (match-beginning 0)
|
||
p2 (point))
|
||
(goto-char (match-beginning 0))
|
||
(forward-list)
|
||
(setq p4 (point)
|
||
p3 (- (point) 1))
|
||
(setq ss (string-trim (buffer-substring p2 p3)))
|
||
(cl--set-buffer-substring p1 p4 (format fmt ss))))
|
||
|
||
;; {text} for protecting case. This is tricky to do reliably. I try to check
|
||
;; if this is not part of a command, and skip it if so. This leaves
|
||
;; un-cleaned commands in, which is desirable to me so you can see if there
|
||
;; are ones we might handle in the future.
|
||
(goto-char (point-min))
|
||
(while (search-forward "{" nil t)
|
||
;; I think if we go back a word, and are then looking back at \\, we are in a command.
|
||
;; this would fail on a latex command like \word-word{} but I can't think of any right now.
|
||
(save-excursion
|
||
(backward-word)
|
||
(setq cmd-p (looking-back "\\\\" 1)))
|
||
;; This looks back for a \cmd, basically things not a closing }
|
||
(unless cmd-p
|
||
(setq p1 (match-beginning 0)
|
||
p2 (point))
|
||
(backward-char)
|
||
(forward-list)
|
||
(setq p4 (point)
|
||
p3 (- (point) 1))
|
||
(setq s (buffer-substring p2 p3))
|
||
(cl--set-buffer-substring p1 p4 s))))
|
||
|
||
|
||
(let ((result (buffer-string)))
|
||
(cl-loop for (s . repl) in '(("~" . " ")
|
||
("\\\\&" . "&")
|
||
("\\\\protect" . "")
|
||
("\\\\penalty0" . "")
|
||
("\\\\ " . " "))
|
||
do
|
||
(setq result (replace-regexp-in-string s repl result)))
|
||
result)))
|
||
|
||
|
||
(defun org-ref-bbl-get-natbib-options ()
|
||
"Return natbib-options (including the brackets).
|
||
Defaults to [numbers,super,sort]"
|
||
(goto-char (point-min))
|
||
(if (re-search-forward "\\\\usepackage\\(?1:\\[.*\\]\\)?{natbib}")
|
||
(match-string 1)
|
||
"[numbers,super,sort]"))
|
||
|
||
|
||
|
||
(defun org-ref-bbl-entry (entry)
|
||
"ENTRY is a string containing the contents of a bibitem from a bbl file.
|
||
Return the bibliography string associated with the entry. This is
|
||
done in a temp-buffer so we don't actually modify the bbl file."
|
||
(with-temp-buffer
|
||
(insert (org-bbl-clean-string entry))
|
||
(goto-char (point-min))
|
||
(let (p1
|
||
p2
|
||
authors
|
||
blocks
|
||
entry)
|
||
(setq p1 (point))
|
||
(search-forward "\\newblock")
|
||
(setq p2 (match-beginning 0))
|
||
(setq authors (string-join
|
||
(mapcar 'string-trim
|
||
(split-string (buffer-substring p1 p2) "\n"))
|
||
" "))
|
||
(goto-char (- p2 1))
|
||
(while (search-forward "\\newblock" nil t)
|
||
(setq p1 (point))
|
||
(setq p2 (save-excursion
|
||
(if (search-forward "\\newblock" nil t)
|
||
(match-beginning 0)
|
||
(point-max))))
|
||
|
||
(cl-pushnew (string-join
|
||
(mapcar 'string-trim
|
||
(split-string (buffer-substring p1 p2) "\n"))
|
||
" ")
|
||
blocks))
|
||
|
||
(setq entry (string-join (append (list authors) blocks) " "))
|
||
entry)))
|
||
|
||
|
||
(defun org-ref-bbl-bibliography-data ()
|
||
"Get an p-list for each entry in the buffer.
|
||
Assumes you are in a bbl file.
|
||
Returns a plist (list bibitem-key :entry (org-ref-bbl-entry bibitem-entry)
|
||
:index counter :bracket-data bibitem-bracket)"
|
||
(let ((data '())
|
||
N ss
|
||
p1 p2 bbl-max
|
||
bibitem-bracket
|
||
bibitem-key
|
||
bibitem-entry
|
||
(counter 0))
|
||
|
||
(goto-char (point-min))
|
||
(search-forward "\\end{thebibliography}")
|
||
;; this is the character that the last line starts on.
|
||
(setq bbl-max (match-beginning 0))
|
||
|
||
(goto-char (point-min))
|
||
(when (looking-at "\\\\begin{thebibliography}{\\(?1:[0-9]*\\)}")
|
||
(setq N (string-to-number (match-string 1))))
|
||
|
||
;; This might only work for numeric types
|
||
(if (> 0 N)
|
||
(cl-loop for i from 1 to N do
|
||
(search-forward "\\bibitem[")
|
||
;; get text in [...]
|
||
(setq p1 (point))
|
||
(forward-list)
|
||
(setq p2 (point))
|
||
;; the bracket usually contains [stuff (year) more stuff]
|
||
(setq ss (org-bbl-clean-string
|
||
(buffer-substring-no-properties p1 p2)))
|
||
|
||
(let* ((s (string-match "(" ss))
|
||
(e (string-match ")" ss s)))
|
||
;; in numerical mode I think this is ignored, but we get it
|
||
;; just in case for some later day.
|
||
(setq bibitem-bracket (format "%s%s"
|
||
(substring ss 0 s)
|
||
(if (string= "()" (substring ss s (+ e 1)))
|
||
""
|
||
(concat " " (substring ss s (+ e 1)))))))
|
||
|
||
;; Now get the key. From the last
|
||
(search-forward "{")
|
||
(goto-char (match-beginning 0))
|
||
(setq p1 (+ 1 (point)))
|
||
(forward-list)
|
||
(setq p2 (- (point) 1)
|
||
bibitem-key (buffer-substring p1 p2))
|
||
|
||
;; Now get up to the next bibitem
|
||
(setq p1 (point))
|
||
(save-excursion
|
||
(setq p2 (or (when (search-forward "\\bibitem[" nil t)
|
||
(match-beginning 0))
|
||
bbl-max)))
|
||
(setq bibitem-entry (string-trim (buffer-substring p1 p2)))
|
||
|
||
(cl-pushnew (list bibitem-key :entry (org-ref-bbl-entry bibitem-entry)
|
||
:index i :bracket-data bibitem-bracket)
|
||
data))
|
||
;; no number found probably author year
|
||
(while (search-forward "\\bibitem[" nil t)
|
||
(cl-incf counter)
|
||
;; get text in [...]
|
||
(setq p1 (point))
|
||
(search-forward "]")
|
||
(backward-char)
|
||
(setq p2 (point))
|
||
|
||
(setq ss (org-bbl-clean-string
|
||
(buffer-substring-no-properties p1 p2)))
|
||
|
||
(let* ((s (string-match "(" ss))
|
||
(e (string-match ")" ss s)))
|
||
|
||
(setq bibitem-bracket (format "%s%s"
|
||
(substring ss 0 s)
|
||
(if (string= "()" (substring ss s (+ e 1)))
|
||
""
|
||
(concat " " (substring ss s (+ e 1)))))))
|
||
|
||
|
||
;; Now get the key. From the last
|
||
(search-forward "{")
|
||
(goto-char (match-beginning 0))
|
||
(setq p1 (+ 1 (point)))
|
||
(forward-list)
|
||
(setq p2 (- (point) 1)
|
||
bibitem-key (buffer-substring p1 p2))
|
||
|
||
;; Now get up to the next bibitem
|
||
(setq p1 (point))
|
||
(save-excursion
|
||
(setq p2 (or (when (search-forward "\\bibitem[" nil t)
|
||
(match-beginning 0))
|
||
bbl-max)))
|
||
(setq bibitem-entry (string-trim (buffer-substring p1 p2)))
|
||
|
||
(cl-pushnew (list bibitem-key :entry (org-ref-bbl-entry bibitem-entry)
|
||
:index counter :bracket-data bibitem-bracket)
|
||
data)))
|
||
(reverse data)))
|
||
|
||
|
||
(defun org-ref-bbl-compress-numbers (lst)
|
||
"Take a list like (1 2 3 6) and return \"1-3,6\"."
|
||
(let* ((a (pop lst))
|
||
b
|
||
(sequence (list a))
|
||
(sequentials '()))
|
||
(while lst
|
||
(setq b (pop lst))
|
||
(if (= (- b a) 1)
|
||
;; we have a sequence, add it, set a=b and continue
|
||
(setq sequence (append sequence (list b))
|
||
a b)
|
||
;; lost sequence, store the sequence
|
||
(setq sequentials (append sequentials (list sequence))
|
||
sequence (list b)
|
||
a b)))
|
||
;; store last one
|
||
(setq sequentials (append sequentials (list sequence)))
|
||
|
||
;; Now construct strings for each group
|
||
(cl-loop for group in sequentials collect
|
||
(pcase (length group)
|
||
(1 (format "[[%s]]" (car group)))
|
||
(2 (format "[[%s]],[[%s]]" (cl-first group) (cl-second group)))
|
||
(_ (format "[[%s]]-[[%s]]" (cl-first group) (car (last group))))))))
|
||
|
||
|
||
(defun org-ref-replace-cite-link (link bibdata NATBIB-OPTIONS backend)
|
||
"NATBIB-OPTIONS is the string to options.
|
||
Argument LINK is an org link for a citation.
|
||
Argument BIBDATA the data parsed from a bbl file.
|
||
Argument BACKEND is the export format."
|
||
(let* ((refs (plist-get (org-ref-parse-cite-path (org-element-property :path link)) :references))
|
||
(keys (cl-loop for ref in refs collect (plist-get ref :key)))
|
||
items replacements replacement joiner p1 p2)
|
||
|
||
;; Numeric types
|
||
(cond
|
||
((or (string-match-p "numbers" NATBIB-OPTIONS)
|
||
(string-match-p "super" NATBIB-OPTIONS))
|
||
(setq items (cl-loop for key in keys
|
||
collect (plist-get (cdr (assoc key bibdata)) :index)))
|
||
|
||
(when (string-match-p "sort" NATBIB-OPTIONS)
|
||
(setq items (sort items #'<)))
|
||
|
||
(setq replacements (if (string-match-p "compress" NATBIB-OPTIONS)
|
||
(org-ref-bbl-compress-numbers items)
|
||
;; these are fuzzy org links to the target in item.
|
||
(cl-loop for item in items collect (format "[[%s]]" item)))))
|
||
|
||
;; authoryear types
|
||
;; This html provides an anchor that is named. Targets are usually replaced with numbers, not the text.
|
||
((string-match-p "authoryear" NATBIB-OPTIONS)
|
||
(setq replacements (cl-loop for key in keys
|
||
collect
|
||
(cond
|
||
((eq backend 'html)
|
||
(format "@@html:<a href=\"#%s\">%s</a>@@"
|
||
(plist-get (cdr (assoc key bibdata)) :bracket-data)
|
||
(plist-get (cdr (assoc key bibdata)) :bracket-data)))
|
||
(t
|
||
(format "[[%s]]" (plist-get (cdr (assoc key bibdata)) :bracket-data)))))))
|
||
(t
|
||
(error "%s not supported yet" NATBIB-OPTIONS)))
|
||
|
||
;; Now join all the replacements for each reference
|
||
(setq joiner (cond
|
||
((string-match-p "comma" NATBIB-OPTIONS)
|
||
",")
|
||
((or (string-match-p "semicolon" NATBIB-OPTIONS)
|
||
(string-match-p "colon" NATBIB-OPTIONS))
|
||
";")
|
||
(t
|
||
";")))
|
||
|
||
;; Finally create the overall replacement
|
||
(setq replacement (cond
|
||
((string-match-p "super" NATBIB-OPTIONS)
|
||
(format "^{%s}" (string-join replacements joiner)))
|
||
|
||
((string-match-p "square" NATBIB-OPTIONS)
|
||
(format "[%s]" (string-join replacements joiner)))
|
||
|
||
((string-match-p "round" NATBIB-OPTIONS)
|
||
(format "(%s)" (string-join replacements joiner)))
|
||
|
||
((string-match-p "curly" NATBIB-OPTIONS)
|
||
(format "{%s}" (string-join replacements joiner)))
|
||
|
||
((string-match-p "angle" NATBIB-OPTIONS)
|
||
(format "<%s>" (string-join replacements joiner)))
|
||
|
||
((string-match-p "authoryear" NATBIB-OPTIONS)
|
||
(string-join replacements joiner))
|
||
|
||
(t
|
||
(error "Cannot compute replacement for %s" NATBIB-OPTIONS))))
|
||
|
||
;; When super is the style, we need to replace any blanks back to the last
|
||
;; non-space. We just look back and move point if needed here.
|
||
(if (not (string-match-p "super" NATBIB-OPTIONS))
|
||
(setq p1 (org-element-property :begin link))
|
||
(goto-char (org-element-property :begin link))
|
||
(while (looking-back " " 1)
|
||
(backward-char))
|
||
(setq p1 (point)))
|
||
|
||
(setq p2 (org-element-property :end link))
|
||
|
||
;; org-ref-natmove is dynamically bound here
|
||
(when org-ref-natmove
|
||
(save-excursion
|
||
(goto-char (org-element-property :end link))
|
||
(skip-chars-backward " ")
|
||
|
||
(when (string-match-p "[[:punct:]]" (buffer-substring (point) (+ (point) 1)))
|
||
;; Get the character
|
||
(setq replacement (concat (buffer-substring (org-element-property :end link)
|
||
(+ 1 (org-element-property :end link)))
|
||
replacement)
|
||
p2 (+ 1 (org-element-property :end link))))))
|
||
|
||
(cl--set-buffer-substring p1 p2
|
||
(concat replacement (make-string (org-element-property :post-blank link) ? )))))
|
||
|
||
|
||
(defun org-ref-bbl-replace-bibliography (bib-link bibdata NATBIB-OPTIONS backend)
|
||
"Get a replacement bibliography string for BIBDATA and NATBIB-OPTIONS.
|
||
BIBDATA comes from `org-ref-bbl-bibliography-data'.
|
||
Argument BIB-LINK an org link for a bibliography.
|
||
Argument BACKEND is the export format."
|
||
(cl--set-buffer-substring (org-element-property :begin bib-link)
|
||
(org-element-property :end bib-link)
|
||
(cond
|
||
((or (string-match-p "numbers" NATBIB-OPTIONS)
|
||
(string-match-p "super" NATBIB-OPTIONS))
|
||
(concat "\n* Bibliography\n\n"
|
||
(string-join
|
||
(cl-loop for entry in bibdata collect
|
||
(format "%s. <<%s>> %s"
|
||
(plist-get (cdr entry) :index)
|
||
(plist-get (cdr entry) :index)
|
||
(plist-get (cdr entry) :entry)))
|
||
"\n\n")))
|
||
|
||
|
||
((string-match-p "authoryear" NATBIB-OPTIONS)
|
||
(concat "\n* Bibliography\n\n"
|
||
(string-join
|
||
(cl-loop for entry in bibdata collect
|
||
(cond
|
||
((eq backend 'html)
|
||
(format "- @@html:<a id=\"%s\"></a>@@(%s) %s\n"
|
||
(plist-get (cdr entry) :bracket-data)
|
||
(plist-get (cdr entry) :bracket-data)
|
||
(plist-get (cdr entry) :entry)))
|
||
(t
|
||
(format "- <<%s>> %s"
|
||
(plist-get (cdr entry) :bracket-data)
|
||
(plist-get (cdr entry) :entry)))))
|
||
|
||
|
||
"\n")))
|
||
(t
|
||
(error "%s not supported yet" NATBIB-OPTIONS)))))
|
||
|
||
|
||
|
||
(defun org-ref-bbl-preprocess (&optional backend)
|
||
"Should work on a copy of the buffer.
|
||
Meant to be used in `org-export-before-parsing-hook'.
|
||
Optional argument BACKEND The export backend.
|
||
|
||
You need a LaTeX file and a bbl file for it. This hook generates
|
||
those, then gets the data, replaces the citations and the
|
||
bibliography.
|
||
"
|
||
(let* ((org-export-before-parsing-functions nil)
|
||
(tex-file (org-latex-export-to-latex))
|
||
(bbl-file (concat (file-name-sans-extension tex-file) ".bbl"))
|
||
natbib-options bibdata org-ref-natmove
|
||
buf)
|
||
|
||
(when-let* (buf (find-buffer-visiting tex-file))
|
||
(kill-buffer buf))
|
||
|
||
(when-let* (buf (find-buffer-visiting bbl-file))
|
||
(kill-buffer buf))
|
||
|
||
;; refresh these
|
||
(call-process-shell-command (format "latex -shell-escape %s" tex-file))
|
||
(call-process-shell-command (format "bibtex %s" (file-name-sans-extension tex-file)))
|
||
|
||
(setq buf (find-file-noselect tex-file))
|
||
(with-current-buffer buf
|
||
(goto-char (point-min))
|
||
(setq natbib-options (org-ref-bbl-get-natbib-options))
|
||
(goto-char (point-min))
|
||
(setq org-ref-natmove (search-forward "\\usepackage{natmove}" nil t)))
|
||
(kill-buffer buf)
|
||
|
||
(setq buf (find-file-noselect bbl-file))
|
||
(with-current-buffer buf
|
||
(goto-char (point-min))
|
||
(setq bibdata (org-ref-bbl-bibliography-data)))
|
||
(kill-buffer buf)
|
||
|
||
;; Replace all the cite links
|
||
(cl-loop for cl in (reverse (org-ref-get-cite-links)) do
|
||
(org-ref-replace-cite-link cl bibdata natbib-options backend))
|
||
|
||
(org-ref-bbl-replace-bibliography
|
||
(org-element-map (org-element-parse-buffer) 'link
|
||
(lambda (lnk)
|
||
(when (string= (org-element-property :type lnk) "bibliography")
|
||
lnk))
|
||
nil
|
||
t)
|
||
bibdata natbib-options backend)))
|
||
|
||
|
||
(provide 'org-ref-natbib-bbl-citeproc)
|
||
|
||
;;; org-ref-natbib-bbl-citeproc.el ends here
|