;;; orb-core.el --- Org Roam BibTeX: core library -*- lexical-binding: t -*- ;; Copyright © 2020-2022 Mykhailo Shevchuk ;; Copyright © 2020 Leo Vivier ;; Author: Mykhailo Shevchuk ;; Leo Vivier ;; URL: https://github.com/org-roam/org-roam-bibtex ;; This file is NOT part of GNU Emacs. ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License along with ;; this program; see the file LICENSE. If not, visit ;; . ;;; Commentary: ;; ;; This file provides org-roam-bibtex' dependencies and thus should ;; normally be required by org-roam-bibtex feature libraries. It ;; defines customize groups and provides general utility functions ;; that depend on extra features provided through org-roam, ;; bibtex-completion and their dependencies. ;;; Code: ;; ============================================================================ ;;; Dependencies ;; ============================================================================ (require 'orb-utils) (require 'orb-compat) (eval-when-compile (require 'cl-macs) (require 'subr-x) (require 'rx)) (declare-function bibtex-completion-get-entry "bibtex-completion" (entry-key)) (declare-function bibtex-completion-get-value "bibtex-completion" (field entry &optional default)) (declare-function bibtex-completion-find-pdf (key-or-entry &optional find-additional)) ;; ============================================================================ ;;; Customize groups ;; ============================================================================ ;; ;; All modules should put their `defgroup' definitions here ;; Defcustom definitions should stay in respective files (defgroup org-roam-bibtex nil "Org-roam integration with BibTeX software." :group 'org-roam :prefix "orb-") (defgroup orb-note-actions nil "Orb Note Actions - run actions in note's context." :group 'org-roam-bibtex :prefix "orb-note-actions-") (defgroup orb-pdf-scrapper nil "Orb PDF Scrapper - retrieve references from PDF." :group 'org-roam-bibtex :prefix "orb-pdf-scrapper-") (defgroup orb-anystyle nil "Elisp interface to `anystyle-cli`." :group 'org-roam-bibtex :prefix "orb-anystyle-") (defgroup orb-autokey nil "Automatic generation of BibTeX citation keys." :group 'org-roam-bibtex :prefix "orb-autokey-") ;; ============================================================================ ;;; BibTeX fields and their special handling ;; ============================================================================ (defcustom orb-bibtex-field-aliases '(("=type=" . "entry-type") ("=key=" . "citekey") ("=has-pdf=" . "pdf?") ("=has-note=" . "note?") ("citation-number" . "#")) "Alist of ORB-specific field aliases of the form (FIELD . ALIAS). The ALIAS can be used instead of the FIELD anywhere in ORB's configuration. This variable is useful to replace `bibtex-completion''s internal '='-embraced virtual fields with more casual alternatives." :group 'org-roam-bibtex :type '(repeat (cons (string :tag "Field name") (string :tag "Alias name")))) (defcustom orb-attached-file-extensions '("pdf") "When retrieving an attached file, keep files with only these extensions. This is a list of file extensions without a dot as case-insensitive strings. Set it to nil to keep all file names regardless of their extensions. BibTeX entries are searched for attached files according to `bibtex-completion-pdf-field' (default `file') and in BibDesk-specific `Bdsk-File-N' fields." :group 'org-roam-bibtex :type '(repeat :tag "List of extensions" (string))) (defcustom orb-abbreviate-file-name t "Non-nil to force abbreviation of file names by `orb-get-attached-file'. When this option is set to a non-nil value, the filename returned by `orb-get-attached-file' will get the home directory part abbreviated to `~/'. Symlinked directories will be abbreviated according to `directory-abbrev-alist', see `abbreviate-file-name' for details. An as-is value will be used otherwise." :group 'org-roam-bibtex :type '(choice (const :tag "Yes" t) (const :tag "No" nil))) (defcustom orb-open-attached-file-as-truename t "Non-nil to open attached files with their true names. When this option is set non-nil, `orb-open-attached-file' will open files using their true names. You may want to set it to nil if using file symlinks and experiencing problems such as discussed here: https://github.com/org-roam/org-roam-bibtex/issues/259 An as-is value will be used otherwise." :group 'org-roam-bibtex :type '(choice (const :tag "Yes" t) (const :tag "No" nil))) (defcustom orb-use-bibdesk-attachments nil "Whether to look up BibDesk-specific file fields `Bdsk-File'. If this is non-nil, attachments given in BibDesk-specific file fields will be considered in addition to those found through the `bibtex-completion-find-pdf' mechanism when performing a template expansion, opening an attachment with `orb-note-actions' or scraping a PDF with `orb-pdf-scrapper'. Duplicates will be resolved, but since duplicate comparison is performed using `file-truename', this will lead to expansion of symlink paths if such are used in the normal BibTeX `file' field, for example. See also `orb-abbreviate-file-name' on how to abbreviate the retrieved filenames. Set this to symbol `only' to look up only BibDesk attachments and do not use `bibtex-completion-find-pdf'." :group 'org-roam-bibtex :type '(choice (const :tag "Yes" t) (const :tag "BibDesk only" only) (const :tag "No" nil))) (defsubst orb-resolve-field-alias (alias) "Return ALIAS association from `orb-bibtex-field-aliases'. Return ALIAS if association was not found." (or (car (rassoc alias orb-bibtex-field-aliases)) alias)) (defun orb-get-bibdesk-filenames (entry) "Return filenames stored in BibDesk file fields \"Bdsk-File-N\". ENTRY is a BibTeX entry as returned by `bibtex-completion-get-entry'. The variable `orb-attached-file-extensions' is respected." ;; NOTE: Mac-specific, hard-coded (let* ((bdsk-file-fields (seq-filter (lambda (cell) (string-match-p "Bdsk-File" (car cell))) entry)) (strip-value-rx (rx (seq (opt (in "\"{")) (group (* (not (in "\"{}")))) (opt (in "\"}"))))) (filename-rx (concat (rx (seq "Users/" (* anychar))) (if orb-attached-file-extensions (regexp-opt orb-attached-file-extensions t) "pdf"))) (bdsk-files (mapcar (lambda (cell) (let ((val (cdr cell)) file) (when (string-match strip-value-rx val) (setq file (base64-decode-string (match-string 1 val))) (when (string-match filename-rx file) (concat "/" (match-string 0 file)))))) bdsk-file-fields))) (seq-filter (lambda (val) val) bdsk-files))) ;;;###autoload (defun orb-get-attached-file (citekey) "Look up files associated with a BibTeX entry identified by CITEKEY. Files are searched for using `bibtex-completion-find-pdf', meaning that Mendeley, Zotero and plain file paths are all supported, and variables `bibtex-completion-pdf-field' and `bibtex-completion-library-path' are respected. Additionally, the BibTeX entry is searched for BibDesk-specific file fields `Bdsk-File-N'. If `orb-attached-file-extensions' is non-nil, return only file paths matching the respective extensions. If `orb-abbreviate-file-name' is non-nil, force an abbreviated file name. Depending on the value of `orb-use-bibdesk-attachments', the BibDesk-specific file fields `Bdsk-File-N' may or may not be used for the lookup. If multiple files have been found, the user will be prompted to select one." (condition-case err (when-let* ((entry (bibtex-completion-get-entry citekey)) (paths (--> (pcase orb-use-bibdesk-attachments (`nil (bibtex-completion-find-pdf entry bibtex-completion-find-additional-pdfs)) (`only (orb-get-bibdesk-filenames entry)) (_ (--> (nconc (bibtex-completion-find-pdf entry) (orb-get-bibdesk-filenames entry)) (-map #'file-truename it) (-uniq it)))) (if (not orb-attached-file-extensions) it ; do not filter by extensions ;; filter by extensions (--filter (when-let ((ext (file-name-extension it))) (member-ignore-case ext orb-attached-file-extensions)) it)))) (path (if (cdr paths) (completing-read "File to use: " paths) (car paths)))) (if orb-abbreviate-file-name (abbreviate-file-name path) path)) ;; ignore any errors that may be thrown by `bibtex-completion-find-pdf' ;; don't stop the capture process (error (orb-warning (format "error in `orb-get-attached-file`: %s %s" (car err) (cdr err)))))) ;;;###autoload (defun orb-open-attached-file (citekey) "Open a file associated with CITEKEY. CITEKEY must be a list for compatibility with `bibtex-completion' functions, which also expect a list. This is a modified and simplified version of `bibtex-completion-open-pdf', which uses `orb-get-bibdesk-filenames' under the hood and is therefore compatible with BibDesk. The file is opened with the function set in `bibtex-completion-pdf-open-function'. The intended primary use is with `orb-note-actions'." (let* ((key (car citekey)) (attachment (orb-get-attached-file key))) (if attachment (funcall bibtex-completion-pdf-open-function (if orb-open-attached-file-as-truename (file-truename attachment) attachment)) (message "No PDF(s) found for this entry: %s" key)))) ;; ============================================================================ ;;;; Orb autokey ;; ============================================================================ (defcustom orb-autokey-format "%a%y%T[4][1]" "Format string for automatically generated citation keys. Supported wildcards: Basic ========== %a |author| - first author's (or editor's) last name %t |title | - first word of title %f{field} |field | - first word of arbitrary field %y |year | - year YYYY %p |page | - first page %e{(expr)} |elisp | - execute elisp expression Extended ========== 1. Capitalized versions: %A |author| > %T |title | > Same as %a,%t,%f{field} but %F{field} |field | > preserve original capitalization 2. Starred versions %a*, %A* |author| - include author's (editor's) initials %t*, %T* |title | - do not ignore words in `orb-autokey-titlewords-ignore' %y* |year | - year's last two digits __YY %p* |page | - use \"pagetotal\" field instead of default \"pages\" 3. Optional parameters %a[N][M][D] |author| > %t[N][M][D] |title | > include first N words/names %f{field}[N][M][D] |field | > include at most M first characters of word/name %p[D] |page | > put delimiter D between words N and M should be a single digit 1-9. Putting more digits or any other symbols will lead to ignoring the optional parameter and those following it altogether. D should be a single alphanumeric symbol or one of `-_.:|'. Optional parameters work both with capitalized and starred versions where applicable. 4. Elisp expression - can be anything - should return a string or nil - will be evaluated before expanding other wildcards and therefore can insert other wildcards - will have `entry' variable bound to the value of BibTeX entry the key is being generated for, as returned by `bibtex-completion-get-entry'. The variable may be safely manipulated in a destructive manner. %e{(or (bibtex-completion-get-value \"volume\" entry) \"N/A\")} %e{(my-function entry)} Key generation is performed by `orb-autokey-generate-key'." :risky t :type 'string :group 'org-roam-bibtex) (defcustom orb-autokey-titlewords-ignore '("A" "An" "On" "The" "Eine?" "Der" "Die" "Das" "[^[:upper:]].*" ".*[^[:upper:][:lower:]0-9].*") "Patterns from title that will be ignored during key generation. Every element is a regular expression to match parts of the title that should be ignored during automatic key generation. Case sensitive." ;; Default value was take from `bibtex-autokey-titleword-ignore'. :type '(repeat :tag "Regular expression" regexp) :group 'orb-autokey) (defcustom orb-autokey-empty-field-token "N/A" "String to use when BibTeX field is nil or empty." :type 'string :group 'orb-autokey) (defcustom orb-autokey-invalid-symbols " \"'()={},~#%\\" "Characters not allowed in a BibTeX key. The key will be stripped of these characters." :type 'string :group 'orb-autokey) (defun orb--autokey-format-field (field &rest specs) "Return BibTeX FIELD formatted according to plist SPECS. Recognized keys: ========== :entry - BibTeX entry to use :value - Value of BibTeX field to use instead retrieving it from :entry :capital - capitalized version :starred - starred version :words - first optional parameter (number of words) :characters - second optional parameter (number of characters) :delimiter - third optional parameter (delimiter) All values should be strings, including those representing numbers. This function is used internally by `orb-autokey-generate-key'." (declare (indent 1)) (-let* (((&plist :entry entry :value value :capital capital :starred starred :words words :characters chars :delimiter delim) specs) ;; field values will be split into a list of words. `separator' is a ;; regexp for word separators: either a whitespace, one or more ;; dashes, or en dash, or em dash (separator "\\([ \n\t]\\|[-]+\\|[—–]\\)") (invalid-chars-rx (rx-to-string `(any ,orb-autokey-invalid-symbols) t)) (delim (or delim "")) result) ;; 0. virtual field "=name=" is used internally here and in ;; `orb-autokey-generate-key'; it stands for author or editor (if (string= field "=name=") ;; in name fields, logical words are full names consisting of several ;; words and containing spaces and punctuation, separated by a logical ;; separator, the word "and" (setq separator " and " value (or value (bibtex-completion-get-value "author" entry) (bibtex-completion-get-value "editor" entry))) ;; otherwise proceed with value or get it from entry (setq value (or value (bibtex-completion-get-value field entry)))) (if (or (not value) (string-empty-p value)) (setq result orb-autokey-empty-field-token) (when (> (length value) 0) (save-match-data ;; 1. split field into words (setq result (split-string value separator t "[ ,.;:-]+")) ;; 1a) only for title; ;; STARRED = include words from `orb-autokey-titlewords-ignore ;; unstarred version filters the keywords, starred ignores this block (when (and (string= field "title") (not starred)) (let ((ignore-rx (concat "\\`\\(:?" (mapconcat #'identity orb-autokey-titlewords-ignore "\\|") "\\)\\'")) (words ())) (setq result (dolist (word result (nreverse words)) (unless (string-match-p ignore-rx word) (push word words)))))) ;; 2. take number of words equal to WORDS if that is set ;; or just the first word; also 0 = 1. (if words (setq words (string-to-number words) result (-take (if (> words (length result)) (length result) words) result)) (setq result (list (car result)))) ;; 2a) only for "=name=" field, i.e. author or editor ;; STARRED = include initials (when (string= field "=name=") ;; NOTE: here we expect name field 'Doe, J. B.' ;; should ideally be able to handle 'Doe, John M. Longname, Jr' (let ((r-x (if starred "[ ,.\t\n]" "\\`\\(.*?\\),.*\\'")) (rep (if starred "" "\\1")) (words ())) (setq result (dolist (name result (nreverse words)) (push (s-replace-regexp r-x rep name) words))))) ;; 3. take at most CHARS number of characters from every word (when chars (let ((words ())) (setq chars (string-to-number chars) result (dolist (word result (nreverse words)) (push (substring word 0 (if (< chars (length word)) chars (length word))) words))))) ;; 4. almost there: concatenate words, include DELIMiter (setq result (mapconcat #'identity result delim)) ;; 5. CAPITAL = preserve case (unless capital (setq result (downcase result)))))) ;; return result stripped of the invalid characters (s-replace-regexp invalid-chars-rx "" result t))) (defun orb--autokey-evaluate-expression (expr &optional entry) "Evaluate arbitrary elisp EXPR passed as readable string. The expression will have value of ENTRY bound to `entry' variable at its disposal. ENTRY should be a BibTeX entry as returned by `bibtex-completion-get-entry'. The result returned should be a string or nil." (let ((result (eval `(let ((entry (quote ,(copy-tree entry)))) ,(read expr))))) (unless (or (stringp result) (not result)) (user-error "Result: %s, invalid type. \ Expression must be string or nil" result)) (or result ""))) ;;;###autoload (defun orb-autokey-generate-key (entry &optional control-string) "Generate citation key from ENTRY according to `orb-autokey-format'. Return a string. If optional CONTROL-STRING is non-nil, use it instead of `orb-autokey-format'." (let* ((case-fold-search nil) (str (or control-string orb-autokey-format)) ;; star regexp: group 3! (star '(opt (group-n 3 "*"))) ;; optional parameters: regexp groups 4-6! (opt1 '(opt (and "[" (opt (group-n 4 digit)) "]"))) (opt2 '(opt (and "[" (opt (group-n 5 digit)) "]"))) (opt3 '(opt (and "[" (opt (group-n 6 (any alnum "_.:|-"))) "]"))) ;; capital letters: regexp group 2! ;; author wildcard regexp (a-rx (macroexpand `(rx (group-n 1 (or "%a" (group-n 2 "%A")) ,star ,opt1 ,opt2 ,opt3)))) ;; title wildcard regexp (t-rx (macroexpand `(rx (group-n 1 (or "%t" (group-n 2 "%T")) ,star ,opt1 ,opt2 ,opt3)))) ;; any field wildcard regexp ;; required parameter: group 7! (f-rx (macroexpand `(rx (group-n 1 (or "%f" (group-n 2 "%F")) (and "{" (group-n 7 (1+ letter)) "}") ,opt1 ,opt2 ,opt3)))) ;; year wildcard regexp (y-rx (rx (group-n 1 "%y" (opt (group-n 3 "*"))))) ;; page wildcard regexp (p-rx (macroexpand `(rx (group-n 1 "%p" ,star ,opt3)))) ;; elisp expression wildcard regexp ;; elisp sexp: group 8! (e-rx (rx (group-n 1 "%e" "{" (group-n 8 "(" (1+ ascii) ")") "}")))) ;; Evaluating elisp expression should go the first because it can produce ;; additional wildcards (while (string-match e-rx str) (setq str (replace-match (save-match-data (orb--autokey-evaluate-expression (match-string 8 str) entry)) t nil str 1))) ;; Expanding all other wildcards are actually ;; variations of calls to `orb--autokey-format-field' with many ;; commonalities, so we wrap it into a macro (cl-macrolet ((expand (wildcard &key field value entry capital starred words characters delimiter) (let ((cap (or capital '(match-string 2 str))) (star (or starred '(match-string 3 str))) (opt1 (or words '(match-string 4 str))) (opt2 (or characters '(match-string 5 str))) (opt3 (or delimiter '(match-string 6 str)))) `(while (string-match ,wildcard str) (setq str (replace-match ;; we can safely pass nil values ;; `orb--autokey-format-field' should ;; handle them correctly (orb--autokey-format-field ,field :entry ,entry :value ,value :capital ,cap :starred ,star :words ,opt1 :characters ,opt2 :delimiter ,opt3) t nil str 1)))))) ;; Handle author wildcards (expand a-rx :field "=name=" :value (or (bibtex-completion-get-value "author" entry) (bibtex-completion-get-value "editor" entry))) ;; Handle title wildcards (expand t-rx :field "title" :value (or (bibtex-completion-get-value "title" entry) "")) ;; Handle custom field wildcards (expand f-rx :field (match-string 7 str) :entry entry) ;; Handle pages wildcards %p*[-] (expand p-rx :field (if (match-string 3 str) "pagetotal" "pages") :entry entry :words "1")) ;; Handle year wildcards ;; it's simple, so we do not use `orb--autokey-format-field' here ;; year should be well-formed: YYYY ;; TODO: put year into cl-macrolet (let ((year (or (bibtex-completion-get-value "year" entry) (bibtex-completion-get-value "date" entry)))) (if (or (not year) (string-empty-p year) (string= year orb-autokey-empty-field-token)) (while (string-match y-rx str) (setq str (replace-match orb-autokey-empty-field-token t nil str 1))) (while (string-match y-rx str) (setq year (format "%04d" (string-to-number year)) str (replace-match (format "%s" (if (match-string 3 str) (substring year 2 4) (substring year 0 4))) t nil str 1))))) str)) (provide 'orb-core) ;;; orb-core.el ends here ;; Local Variables: ;; coding: utf-8 ;; fill-column: 79 ;; End: