187 lines
6.2 KiB
EmacsLisp
187 lines
6.2 KiB
EmacsLisp
;;; org-ref-pdf.el --- Drag-n-drop PDF onto bibtex files -*- lexical-binding: t; -*-
|
|
|
|
;; Copyright (C) 2015-2024 John Kitchin
|
|
|
|
;; Author: John Kitchin <jkitchin@andrew.cmu.edu>
|
|
;; Keywords:
|
|
|
|
;; This program is free software; you can redistribute it and/or modify
|
|
;; it under the terms of the GNU General Public License as published by
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
;; (at your option) any later version.
|
|
|
|
;; This program is distributed in the hope that it will be useful,
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;; GNU General Public License for more details.
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
;;; Commentary:
|
|
|
|
;; This library provides functions to enable drag-n-drop of pdfs onto a bibtex
|
|
;; buffer to add bibtex entries to it.
|
|
;;
|
|
;; [2021-08-29 Sun] I have not found drag-and-drop to be especially reliable,
|
|
;; and don't recommend you use it. This code relies 'pdf-tools, which is slow to
|
|
;; load for me. pdf-tools is no longer automatically installed, so if you use
|
|
;; this, you should install it yourself.
|
|
|
|
;; TODO: If no DOI is found, figure out a way to do a crossref/google query to
|
|
;; get a doi. This needs a reliable title/citation.
|
|
|
|
;;; Code:
|
|
|
|
;; pdf-tools is optional, and I think this will skip compiling this file and
|
|
;; outputting an error.
|
|
(when (and (bound-and-true-p byte-compile-current-file)
|
|
(not (locate-library "pdf-tools")))
|
|
(message "org-ref-pdf: skipping byte-compile; pdf-tools not installed.")
|
|
(setq byte-compile-current-file nil)
|
|
(throw 'byte-compile-top-level nil))
|
|
|
|
(require 'pdf-tools)
|
|
|
|
(require 'org-ref-utils)
|
|
|
|
|
|
(eval-when-compile
|
|
(require 'cl-lib))
|
|
|
|
(declare-function org-ref-normalize-bibtex-completion-bibliography "org-ref-utils")
|
|
|
|
(defvar bibtex-completion-bibliography)
|
|
|
|
(defgroup org-ref-pdf nil
|
|
"Customization group for org-ref-pdf"
|
|
:tag "Org Ref PDF"
|
|
:group 'org-ref-pdf)
|
|
|
|
|
|
(defcustom pdftotext-executable
|
|
"pdftotext"
|
|
"Executable for pdftotext. Set if the executable is not on your
|
|
path, or you want to use another version."
|
|
:type 'file
|
|
:group 'org-ref-pdf)
|
|
|
|
|
|
(defcustom org-ref-pdf-doi-regex
|
|
"10\\.[0-9]\\{4,9\\}/[-+._;()/:A-Z0-9]+"
|
|
"Regular expression to match DOIs in a pdf converted to text."
|
|
:type 'regexp
|
|
:group 'org-ref-pdf)
|
|
|
|
|
|
(defcustom org-ref-pdf-to-bibtex-function
|
|
'copy-file
|
|
"Function for getting a pdf to a directory.
|
|
Defaults to `copy-file', but could also be `rename-file'."
|
|
:type 'File :group 'org-ref-pdf)
|
|
|
|
|
|
(defun org-ref-extract-doi-from-pdf (pdf)
|
|
"Try to extract a doi from a PDF file.
|
|
There may be more than one doi in the file. This function returns
|
|
all the ones it finds based on two patterns: doi: up to a quote,
|
|
bracket, space or end of line. dx.doi.org/up to a quote, bracket,
|
|
space or end of line.
|
|
|
|
If there is a trailing . we chomp it off. Returns a list of doi
|
|
strings, or nil."
|
|
(with-temp-buffer
|
|
(insert (shell-command-to-string (format "%s %s -"
|
|
pdftotext-executable
|
|
(shell-quote-argument pdf))))
|
|
(goto-char (point-min))
|
|
(let ((matches '()))
|
|
(while (re-search-forward org-ref-pdf-doi-regex nil t)
|
|
;; I don't know how to avoid a trailing . on some dois with the
|
|
;; expression above, so if it is there, I chomp it off here.
|
|
(let ((doi (match-string 0)))
|
|
(when (or (string-suffix-p "." doi) (string-suffix-p ";" doi))
|
|
(setq doi (substring doi 0 (- (length doi) 1))))
|
|
(cl-pushnew doi matches :test #'equal)))
|
|
matches)))
|
|
|
|
|
|
(defun org-ref-pdf-doi-candidates (dois)
|
|
"Generate candidate list for a completion source.
|
|
Used when multiple dois are found in a pdf file."
|
|
(cl-loop for doi in dois
|
|
collect
|
|
(condition-case nil
|
|
(cons
|
|
(plist-get (doi-utils-get-json-metadata doi) :title)
|
|
doi)
|
|
(error (cons (format "%s read error" doi) doi)))))
|
|
|
|
|
|
(defun org-ref-bibtex-key-from-doi (doi)
|
|
"Return a bibtex entry's key from a DOI.
|
|
BIB is an optional filename to get the entry from."
|
|
(catch 'key
|
|
(cl-loop for bibfile in (org-ref-normalize-bibtex-completion-bibliography)
|
|
do
|
|
(with-temp-buffer
|
|
(insert-file-contents (expand-file-name bibfile))
|
|
(when (search-forward doi)
|
|
(bibtex-beginning-of-entry)
|
|
(throw 'key (cdr (assoc "=key=" (bibtex-parse-entry)))))))))
|
|
|
|
|
|
;;;###autoload
|
|
(defun org-ref-pdf-to-bibtex ()
|
|
"Add pdf of current buffer to bib file and save pdf. The pdf
|
|
should be open in Emacs using the `pdf-tools' package."
|
|
(interactive)
|
|
(when (not (org-ref--file-ext-p (downcase (buffer-file-name)) "pdf"))
|
|
(error "Buffer is not a pdf file"))
|
|
;; Get doi from pdf of current buffer
|
|
(let* ((dois (org-ref-extract-doi-from-pdf (buffer-file-name)))
|
|
(doi-utils-download-pdf nil)
|
|
(doi (if (= 1 (length dois))
|
|
(car dois)
|
|
(completing-read "Select DOI: " dois))))
|
|
;; Add bib entry from doi:
|
|
(doi-utils-add-bibtex-entry-from-doi doi)
|
|
;; Copy pdf to a directory
|
|
(let ((key (org-ref-bibtex-key-from-doi doi)))
|
|
(funcall org-ref-pdf-to-bibtex-function
|
|
(buffer-file-name)
|
|
(expand-file-name (format "%s.pdf" key) (org-ref-library-path))))))
|
|
|
|
|
|
;;;###autoload
|
|
(defun org-ref-pdf-debug-pdf (pdf-file)
|
|
"Try to debug getting a doi from a pdf.
|
|
Opens a buffer with the pdf converted to text, and `occur' on the
|
|
variable `org-ref-pdf-doi-regex'."
|
|
(interactive "fPDF: ")
|
|
(switch-to-buffer (get-buffer-create "*org-ref-pdf debug*"))
|
|
(erase-buffer)
|
|
(insert (shell-command-to-string (format "%s %s -"
|
|
pdftotext-executable
|
|
(shell-quote-argument pdf-file))))
|
|
(goto-char (point-min))
|
|
(highlight-regexp org-ref-pdf-doi-regex)
|
|
(occur org-ref-pdf-doi-regex)
|
|
(switch-to-buffer-other-window "*Occur*"))
|
|
|
|
|
|
;;;###autoload
|
|
(defun org-ref-pdf-crossref-lookup ()
|
|
"Lookup highlighted text in PDFView in CrossRef."
|
|
(interactive)
|
|
(require 'pdf-view)
|
|
(unless (pdf-view-active-region-p)
|
|
(error "The region is not active"))
|
|
(let* ((txt (pdf-view-active-region-text)))
|
|
(pdf-view-deactivate-region)
|
|
(crossref-lookup (mapconcat 'identity txt " \n"))))
|
|
|
|
|
|
(provide 'org-ref-pdf)
|
|
;;; org-ref-pdf.el ends here
|