Files
emacs/lisp/org-ref/org-ref-arxiv.el
2025-11-25 19:52:03 +01:00

300 lines
12 KiB
EmacsLisp

;;; org-ref-arxiv.el --- arxiv utilities for org-mode -*- lexical-binding: t; -*-
;; Copyright (C) 2015-2024 John Kitchin
;; Author: John Kitchin <jkitchin@andrew.cmu.edu>
;; Keywords:
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; this library creates a new org-link for Arxiv (http://arxiv.org/) entries,
;; and provides functions to retrieve bibtex entries from an Arxiv number.
;;
;; An Arxiv number might look like: cond-mat/0410285 or 1503.01742
;;; Code:
(require 'bibtex)
(require 'org)
(require 'org-ref-utils)
(require 'parsebib)
(require 'xml)
;; This is a local variable defined in `url-http'. We need it to avoid
;; byte-compiler errors.
(defvar url-http-end-of-headers)
(declare-function parsebib-find-bibtex-dialect "parsebib")
(declare-function org-ref-clean-bibtex-entry "org-ref-core")
(declare-function org-ref-normalize-bibtex-completion-bibliography "org-ref-utils")
;; this is a C function
(declare-function libxml-parse-xml-region "xml")
;;* The org-mode link
;; this just makes a clickable link that opens the entry.
;; example: arxiv:cond-mat/0410285
(org-link-set-parameters "arxiv"
:follow (lambda (link-string)
(browse-url (format "http://arxiv.org/abs/%s" link-string)))
:export (lambda (keyword desc format)
(cond
((eq format 'html)
(format "<a href=\"http://arxiv.org/abs/%s\">arxiv:%s</a>"
keyword (or desc keyword)))
((eq format 'latex)
;; write out the latex command
(format "\\url{http://arxiv.org/abs/%s}{%s}" keyword (or desc keyword))))))
;;* Getting a bibtex entry for an arXiv article using remote service:
;; For an arxiv article, there is a link to a NASA ADS page like this:
;; http://adsabs.harvard.edu/cgi-bin/bib_query?arXiv:1503.01742
;; On that page, there is a link to a bibtex entry:
;; http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=2015arXiv150301742H&data_type=BIBTEX&db_key=PRE&nocookieset=1
;;
;; It looks like you need to get a Bibliographic code from the arxiv number to
;; then get the bibtex entry.
(defun arxiv-get-bibliographic-code (arxiv-number)
"Get Bibliographic code for ARXIV-NUMBER."
(with-current-buffer
(url-retrieve-synchronously
(concat
"http://adsabs.harvard.edu/cgi-bin/bib_query?arXiv:"
arxiv-number))
(search-forward-regexp "<link rel=\"canonical\" href=\"http://ui.adsabs.harvard.edu/abs/\\(.*\\)/abstract\"/>")
(match-string 1)))
(defun arxiv-get-bibtex-entry (arxiv-bibliographic-code)
"Get bibtex entry for ARXIV-BIBLIOGRAPHIC-CODE."
(with-current-buffer
(url-retrieve-synchronously (format "https://ui.adsabs.harvard.edu/abs/%s/exportcitation" arxiv-bibliographic-code))
(when (re-search-forward
"<textarea.*>\\(.*\\(?:\n.*\\)*?\\)\\(?:\n\\s-*\n\\)</textarea>"
nil t)
(xml-substitute-special (match-string 1)))))
;;* Getting a bibtex entry for an arXiv article using arXiv API:
;; Retrieves the meta data of an article view arXiv's http API,
;; extracts the necessary information, and formats a new BibTeX entry.
(defvar arxiv-entry-format-string "@article{%s,
journal = {CoRR},
title = {%s},
author = {%s},
archivePrefix = {arXiv},
year = {%s},
eprint = {%s},
primaryClass = {%s},
abstract = {%s},
url = {%s},
}"
"Template for BibTeX entries of arXiv articles.")
(declare-function doi-utils-doi-to-bibtex-string "doi-utils")
(declare-function org-ref-replace-nonascii "org-ref-bibtex")
(defun arxiv-get-bibtex-entry-via-arxiv-api (arxiv-number)
"Retrieve meta data for ARXIV-NUMBER.
Returns a formatted BibTeX entry."
(with-current-buffer
(url-retrieve-synchronously (format "http://export.arxiv.org/api/query?id_list=%s" arxiv-number) t)
(let* ((parse-tree (libxml-parse-xml-region
(progn (goto-char 0)
(search-forward "<?xml ")
(match-beginning 0))
(point-max)))
(entry (assq 'entry parse-tree))
(authors (mapcar (lambda (it) (nth 2 (nth 2 it)))
(seq-filter (lambda (it) (and (listp it) (eq (car it) 'author))) entry)))
(year (format-time-string "%Y" (date-to-time (nth 2 (assq 'published entry)))))
(title (nth 2 (assq 'title entry)))
(names (arxiv-bibtexify-authors authors))
(category (cdar (nth 1 (assq 'primary_category entry))))
(abstract (string-trim (nth 2 (assq 'summary entry))))
(url (nth 2 (assq 'id entry)))
(temp-bibtex (format arxiv-entry-format-string "" title names year arxiv-number category abstract url))
(key (with-temp-buffer
(insert temp-bibtex)
(bibtex-mode)
(bibtex-set-dialect (parsebib-find-bibtex-dialect) t)
(org-ref-replace-nonascii)
(bibtex-generate-autokey)))
(doi (assq 'doi entry)))
(unless (and doi
(ignore-errors (doi-utils-doi-to-bibtex-string (nth 2 doi))))
;; no doi or inactive doi, so we fall back to the simple template
(format arxiv-entry-format-string key title names year arxiv-number category abstract url)))))
(defun arxiv-bibtexify-authors (authors)
"Return names in 'SURNAME, FIRST NAME' format from AUTHORS list."
(string-join
(mapcar (lambda (it)
(concat (car (last it)) ", "
(string-join (butlast it) " ")))
(mapcar (lambda (it) (split-string it " +")) authors))
" and "))
(defun arxiv-maybe-arxiv-id-from-current-kill ()
"Try to get an arxiv ID from the current kill."
(let* ((the-current-kill (ignore-errors (current-kill 0 t))) ;; nil if empty kill ring
(arxiv-url-prefix-regexp "^https?://arxiv\\.org/\\(pdf\\|abs\\|format\\)/")
(arxiv-cite-prefix-regexp "^\\(arXiv\\|arxiv\\):")
(arxiv-id-old-regexp "[a-z-]+\\(\\.[A-Z]\\{2\\}\\)?/[0-9]\\{5,7\\}") ; Ex: math.GT/0309136
(arxiv-id-new-regexp "[0-9]\\{4\\}[.][0-9]\\{4,5\\}\\(v[0-9]+\\)?") ; Ex: 1304.4404v2
(arxiv-id-regexp (concat "\\(" arxiv-id-old-regexp "\\|" arxiv-id-new-regexp "\\)")))
(cond
(;; make sure current-kill has something in it
;; if current-kill is not a string, return nil
(not (stringp the-current-kill))
nil)
(;; check if current-kill looks like an arxiv ID
;; if so, return it
;; Ex: 1304.4404v2
(org-ref--string-match (concat "^" arxiv-id-regexp) the-current-kill)
the-current-kill)
(;; check if current-kill looks like an arxiv cite
;; if so, remove the prefix and return
;; Ex: arXiv:1304.4404v2 --> 1304.4404v2
(org-ref--string-match (concat arxiv-cite-prefix-regexp arxiv-id-regexp "$") the-current-kill)
(replace-regexp-in-string arxiv-cite-prefix-regexp "" the-current-kill))
(;; check if current-kill looks like an arxiv url
;; if so, remove the url prefix and return
;; Ex: https://arxiv.org/abs/1304.4404 --> 1304.4404
(org-ref--string-match (concat arxiv-url-prefix-regexp arxiv-id-regexp "$") the-current-kill)
(replace-regexp-in-string arxiv-url-prefix-regexp "" the-current-kill))
(;; check if current-kill looks like an arxiv PDF url
;; if so, remove the url prefix, the .pdf suffix, and return
;; Ex: https://arxiv.org/pdf/1304.4404.pdf --> 1304.4404
(org-ref--string-match (concat arxiv-url-prefix-regexp arxiv-id-regexp "\\.pdf$") the-current-kill)
(replace-regexp-in-string arxiv-url-prefix-regexp "" (substring the-current-kill 0 (- (length the-current-kill) 4))))
;; otherwise, return nil
(t
nil))))
(defvar bibtex-completion-bibliography)
;;;###autoload
(defun arxiv-add-bibtex-entry (arxiv-number bibfile)
"Add bibtex entry for ARXIV-NUMBER to BIBFILE."
(interactive
(list (read-string
"arxiv: "
(arxiv-maybe-arxiv-id-from-current-kill))
;; now get the bibfile to add it to
(completing-read
"Bibfile: "
(append (org-ref--directory-files "." (lambda (f) (org-ref--file-ext-p f "bib")))
(org-ref-normalize-bibtex-completion-bibliography)))))
(save-window-excursion
(find-file bibfile)
(goto-char (point-max))
(when (not (looking-at "^")) (insert "\n"))
(insert (arxiv-get-bibtex-entry-via-arxiv-api arxiv-number))
(org-ref-clean-bibtex-entry)
(goto-char (point-max))
(when (not (looking-at "^")) (insert "\n"))
(save-buffer)))
;;;###autoload
(defun arxiv-get-pdf (arxiv-number pdf)
"Retrieve a pdf for ARXIV-NUMBER and save it to PDF."
(interactive
(list (read-string
"arxiv: "
(arxiv-maybe-arxiv-id-from-current-kill))
(read-string
"PDF: ")))
(let ((pdf-url (format "https://arxiv.org/pdf/%s" arxiv-number)))
(url-copy-file pdf-url pdf)
;; now check if we got a pdf
(unless (org-ref-pdf-p pdf)
(delete-file pdf)
(message "Error downloading arxiv pdf %s" pdf-url))))
(defvar bibtex-completion-library-path)
;;;###autoload
(defun arxiv-get-pdf-add-bibtex-entry (arxiv-number bibfile pdfdir)
"Add bibtex entry for ARXIV-NUMBER to BIBFILE.
Remove troublesome chars from the bibtex key, retrieve a pdf
for ARXIV-NUMBER and save it to PDFDIR with the same name of the
key."
(interactive
(list (read-string
"arxiv: "
(arxiv-maybe-arxiv-id-from-current-kill))
;; now get the bibfile to add it to
(completing-read
"Bibfile: "
(append (org-ref--directory-files "." (lambda (f) (org-ref--file-ext-p f "bib")))
(org-ref-normalize-bibtex-completion-bibliography)))
(cond
((stringp bibtex-completion-library-path)
bibtex-completion-library-path)
((= 1 (length bibtex-completion-library-path))
(car bibtex-completion-library-path))
(t
(completing-read "PDF dir: " bibtex-completion-library-path)))))
(arxiv-add-bibtex-entry arxiv-number bibfile)
(save-window-excursion
(let ((key ""))
(find-file bibfile)
(goto-char (point-max))
(bibtex-beginning-of-entry)
(re-search-forward bibtex-entry-maybe-empty-head)
(if (match-beginning bibtex-key-in-head)
(progn
(setq key (delete-and-extract-region
(match-beginning bibtex-key-in-head)
(match-end bibtex-key-in-head)))
;; remove potentially troublesome characters from key
;; as it will be used as a filename
(setq key (replace-regexp-in-string "\"\\|\\*\\|/\\|:\\|<\\|>\\|\\?\\|\\\\\\||\\|\\+\\|,\\|\\.\\|;\\|=\\|\\[\\|]\\|!\\|@"
"" key))
;; check if the key is in the buffer
(when (save-excursion
(bibtex-search-entry key))
(save-excursion
(bibtex-search-entry key)
(bibtex-copy-entry-as-kill)
(switch-to-buffer-other-window "*duplicate entry*")
(bibtex-yank))
(setq key (bibtex-read-key "Duplicate Key found, edit: " key))))
(setq key (bibtex-read-key "Key not found, insert: ")))
(insert key)
(arxiv-get-pdf arxiv-number (concat pdfdir key ".pdf"))
;; Check that it worked, and insert a field for it.
(when (file-exists-p (concat pdfdir key ".pdf"))
(bibtex-end-of-entry)
(backward-char)
(insert (format " file = {%s}\n " (concat pdfdir key ".pdf")))))
(save-buffer)))
(provide 'org-ref-arxiv)
;;; org-ref-arxiv.el ends here
;; Local Variables:
;; byte-compile-warnings: (not docstrings)
;; End: