1738 lines
68 KiB
EmacsLisp
1738 lines
68 KiB
EmacsLisp
;;; doi-utils.el --- DOI utilities for making bibtex entries -*- lexical-binding: t; -*-
|
||
|
||
;; Copyright (C) 2015-2021 John Kitchin
|
||
|
||
;; Author: John Kitchin <jkitchin@andrew.cmu.edu>
|
||
;; Keywords: convenience
|
||
;; Package-Requires: ((org-ref))
|
||
|
||
;; This program is free software; you can redistribute it and/or modify
|
||
;; it under the terms of the GNU General Public License as published by
|
||
;; the Free Software Foundation, either version 3 of the License, or
|
||
;; (at your option) any later version.
|
||
|
||
;; This program is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;; GNU General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
;;; Commentary:
|
||
|
||
;; This package provides functionality to download PDFs and bibtex entries from
|
||
;; a DOI, as well as to update a bibtex entry from a DOI. It depends slightly
|
||
;; on org-ref, to determine where to save pdf files too, and where to insert
|
||
;; bibtex entries in the default bibliography.
|
||
|
||
;; The principle commands you will use from here are:
|
||
|
||
;; - doi-utils-get-bibtex-entry-pdf with the cursor in a bibtex entry.
|
||
;; - doi-utils-insert-bibtex-entry-from-doi to insert a bibtex entry at your cursor, clean it and try to get a pdf.
|
||
;; - doi-utils-add-bibtex-entry-from-doi to add an entry to your default bibliography (cleaned with pdf if possible).
|
||
;; - doi-utils-update-bibtex-entry-from-doi with cursor in an entry to update its fields.
|
||
|
||
;;; Code:
|
||
|
||
(defvar url-http-end-of-headers)
|
||
(declare-function org-ref-find-bibliography "org-ref-core")
|
||
(declare-function org-ref-clean-bibtex-entry "org-ref-core")
|
||
(declare-function bibtex-completion-edit-notes "bibtex-completion")
|
||
(declare-function org-bibtex-yank "org-bibtex")
|
||
(declare-function org-ref-possible-bibfiles "org-ref-core")
|
||
(declare-function org-ref-normalize-bibtex-completion-bibliography "org-ref-utils")
|
||
|
||
(declare-function org-ref--file-ext-p "org-ref-utils")
|
||
(declare-function org-ref--directory-files "org-ref-utils")
|
||
|
||
(eval-when-compile
|
||
(require 'cl-lib))
|
||
(require 'bibtex)
|
||
(require 'json)
|
||
(require 'org) ; org-add-link-type
|
||
|
||
(or (require 'ol-bibtex nil t)
|
||
(require 'org-bibtex)) ; org-bibtex-yank
|
||
|
||
(require 'url-http)
|
||
(require 'url-handlers)
|
||
(require 'org-ref-utils)
|
||
(require 'transient)
|
||
|
||
;;* Customization
|
||
(defgroup doi-utils nil
|
||
"Customization group for doi-utils."
|
||
:tag "DOI utils"
|
||
:group 'doi-utils)
|
||
|
||
|
||
(defcustom doi-utils-download-pdf
|
||
t
|
||
"Try to download PDFs when adding bibtex entries when non-nil."
|
||
:type 'boolean
|
||
:group 'doi-utils)
|
||
|
||
(defcustom doi-utils-open-pdf-after-download
|
||
nil
|
||
"Open PDF after adding bibtex entries."
|
||
:type 'boolean
|
||
:group 'doi-utils)
|
||
|
||
|
||
(defcustom doi-utils-timestamp-field
|
||
"DATE_ADDED"
|
||
"The bibtex field to store the date when an entry has been added."
|
||
:type 'string
|
||
:group 'doi-utils)
|
||
|
||
(defcustom doi-utils-timestamp-format-function
|
||
'current-time-string
|
||
"The function to format the timestamp for a bibtex entry.
|
||
Set to a function that returns nil to avoid setting timestamps in the entries.
|
||
e.g. (lambda () nil)"
|
||
:type 'function
|
||
:group 'doi-utils)
|
||
|
||
|
||
(defcustom doi-utils-dx-doi-org-url
|
||
"https://doi.org/"
|
||
"Base url to retrieve doi metadata from. A trailing / is required."
|
||
:type 'string
|
||
:group 'doi-utils)
|
||
|
||
|
||
(defcustom doi-utils-metadata-function 'doi-utils-get-json-metadata
|
||
"Function for retrieving json metadata from `doi-utils-dx-doi-org-url'.
|
||
The default is `doi-utils-get-json-metadata', but it sometimes
|
||
fails with a proxy. An alternative is
|
||
`doi-utils-get-json-metadata-curl' which requires an external
|
||
program to use curl."
|
||
:type 'function
|
||
:group 'doi-utils)
|
||
|
||
|
||
(defcustom doi-utils-async-download t
|
||
"Use `doi-utils-async-download-pdf' to get pdfs asynchrounously.
|
||
If nil use `doi-utils-get-bibtex-entry-pdf' synchronously."
|
||
:type 'boolean
|
||
:group 'doi-utils)
|
||
|
||
|
||
(defun doi-utils-pdf-filename-from-key ()
|
||
"Generate PDF filename from bibtex entry key.
|
||
This is the default function for `doi-utils-pdf-filename-function'.
|
||
Returns the bibtex entry key as the filename (without extension or path)."
|
||
(cdr (assoc "=key=" (bibtex-parse-entry))))
|
||
|
||
|
||
(defcustom doi-utils-pdf-filename-function
|
||
'doi-utils-pdf-filename-from-key
|
||
"Function to generate PDF filename from bibtex entry.
|
||
The function is called with no arguments while point is in the bibtex
|
||
entry, and should return a string to use as the PDF filename (without
|
||
the .pdf extension or directory path).
|
||
|
||
The directory path is determined separately by `bibtex-completion-library-path'.
|
||
|
||
The default function uses the bibtex entry key as the filename.
|
||
|
||
Example: To use the title field as the filename:
|
||
(setq doi-utils-pdf-filename-function
|
||
(lambda () (bibtex-autokey-get-field \"title\")))
|
||
|
||
Note: The function is responsible for ensuring the returned filename
|
||
is valid for the filesystem. Special characters in fields like title
|
||
may cause issues on some systems."
|
||
:type 'function
|
||
:group 'doi-utils)
|
||
|
||
|
||
;;* Getting pdf files from a DOI
|
||
|
||
;; The idea here is simple. When you visit http://dx.doi.org/doi or
|
||
;; https://doi.org/doi, you get redirected to the journal site. Once you have
|
||
;; the url for the article, you can usually compute the url to the pdf, or find
|
||
;; it in the page. Then you simply download it.
|
||
|
||
;; There are some subtleties in doing this that are described here. To get the
|
||
;; redirect, we have to use url-retrieve, and a callback function. The callback
|
||
;; does not return anything, so we communicate through global variables.
|
||
;; url-retrieve is asynchronous, so we have to make sure to wait for it to
|
||
;; finish.
|
||
|
||
(defvar *doi-utils-waiting* t
|
||
"Stores waiting state for url retrieval.")
|
||
|
||
(defvar *doi-utils-redirect* nil
|
||
"Stores redirect url from a callback function.")
|
||
|
||
(defun doi-utils-redirect-callback (&optional status)
|
||
"Callback for `url-retrieve' to set the redirect.
|
||
Optional argument STATUS Unknown why this is optional."
|
||
(when (plist-get status :error)
|
||
(signal (car (plist-get status :error)) (cdr(plist-get status :error))))
|
||
(when (plist-get status :redirect) ; is nil if there none
|
||
(setq *doi-utils-redirect* (plist-get status :redirect)))
|
||
;; we have done our job, so we are not waiting any more.
|
||
(setq *doi-utils-waiting* nil))
|
||
|
||
;; To actually get the redirect we use url-retrieve like this.
|
||
|
||
(defun doi-utils-get-redirect (doi)
|
||
"Get redirect url from `doi-utils-dx-doi-org-url'/doi."
|
||
;; we are going to wait until the url-retrieve is done
|
||
(setq *doi-utils-waiting* t)
|
||
;; start with no redirect. it will be set in the callback.
|
||
(setq *doi-utils-redirect* nil)
|
||
(url-retrieve
|
||
(format "%s%s" doi-utils-dx-doi-org-url doi)
|
||
'doi-utils-redirect-callback)
|
||
;; I suspect we need to wait here for the asynchronous process to
|
||
;; finish. we loop and sleep until the callback says it is done via
|
||
;; `*doi-utils-waiting*'. this works as far as i can tell. Before I
|
||
;; had to run this a few times to get it to work, which i suspect
|
||
;; just gave the first one enough time to finish.
|
||
(while *doi-utils-waiting* (sleep-for 0.1)))
|
||
|
||
;; Once we have a redirect for a particular doi, we need to compute the url to
|
||
;; the pdf. We do this with a series of functions. Each function takes a single
|
||
;; argument, the redirect url. If it knows how to compute the pdf url it does,
|
||
;; and returns it. We store the functions in a variable:
|
||
|
||
(defvar doi-utils-pdf-url-functions nil
|
||
"Functions that return a url to a pdf from a redirect url.
|
||
Each function takes one argument, the redirect url. The function
|
||
must return a pdf-url, or nil.")
|
||
|
||
|
||
;;** APS journals
|
||
|
||
(defun aps-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s*\\)://journals.aps.org" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/abstract/" "/pdf/" *doi-utils-redirect*)))
|
||
|
||
|
||
;;** Science
|
||
|
||
(defun science-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.sciencemag.org" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* ".full.pdf")))
|
||
|
||
|
||
;;** Nature
|
||
|
||
(defun nature-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.nature.com" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* ".pdf")))
|
||
|
||
|
||
;;** Elsevier/ScienceDirect
|
||
;; You cannot compute these pdf links; they are embedded in the redirected pages.
|
||
|
||
(defvar *doi-utils-pdf-url* nil
|
||
"Stores url to pdf download from a callback function.")
|
||
|
||
;;** Wiley
|
||
;; Wiley have changed the url structure from
|
||
;; http://onlinelibrary.wiley.com/doi/10.1002/anie.201402680/abstract
|
||
;; http://onlinelibrary.wiley.com/doi/10.1002/anie.201402680/pdf
|
||
;; to
|
||
;; http://onlinelibrary.wiley.com/doi/abs/10.1002/anie.201402680
|
||
;; http://onlinelibrary.wiley.com/doi/pdf/10.1002/anie.201402680
|
||
;; Hence fewer steps are now required.
|
||
|
||
;; https://onlinelibrary.wiley.com/doi/10.1002/adts.202200926
|
||
;; https://onlinelibrary.wiley.com/doi/epdf/10.1002/adts.202200926
|
||
|
||
;; (defun wiley-pdf-url (*doi-utils-redirect*)
|
||
;; "Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
;; (when (string-match "^http\\(s?\\)://onlinelibrary.wiley.com" *doi-utils-redirect*)
|
||
;; (replace-regexp-in-string "doi/abs" "doi/pdf" *doi-utils-redirect*)))
|
||
|
||
|
||
(defun wiley-pdf-url-2 (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*.
|
||
[2023-04-10 Mon] updated a new rule.
|
||
https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/anie.201310461?download=true"
|
||
(when (string-match "^http\\(s?\\)://onlinelibrary.wiley.com" *doi-utils-redirect*)
|
||
(concat
|
||
(replace-regexp-in-string "doi/" "doi/pdfdirect/" *doi-utils-redirect*)
|
||
"?download=true")))
|
||
|
||
|
||
(defun agu-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "https://agupubs.onlinelibrary.wiley.com"
|
||
*doi-utils-redirect*)
|
||
(replace-regexp-in-string "/full/" "/pdfdirect/" *doi-utils-redirect*)))
|
||
|
||
|
||
;;** Springer
|
||
|
||
(defun springer-chapter-pdf-url (*doi-utils-redirect*)
|
||
(when (string-match "^http\\(s?\\)://link.springer.com/chapter/" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/chapter" "/content/pdf"
|
||
(concat *doi-utils-redirect* ".pdf"))))
|
||
|
||
|
||
(defun springer-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://link.springer.com" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/article/" "/content/pdf/"
|
||
(concat *doi-utils-redirect* ".pdf"))))
|
||
|
||
|
||
;;** ACS
|
||
;; here is a typical url http://pubs.acs.org/doi/abs/10.1021/nl500037x
|
||
;; the pdf is found at http://pubs.acs.org/doi/pdf/10.1021/nl500037x
|
||
|
||
;; we just change /abs/ to /pdf/.
|
||
|
||
(defun acs-pdf-url-1 (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://pubs.acs.org/doi/abs/" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/abs/" "/pdf/" *doi-utils-redirect*)))
|
||
|
||
;; 1/20/2016 I noticed this new pattern in pdf urls, where there is no abs in
|
||
;; the url
|
||
(defun acs-pdf-url-2 (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://pubs.acs.org/doi/" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/doi/" "/doi/pdf/" *doi-utils-redirect*)))
|
||
|
||
;; 1/18/2019: It looks like they are using https now
|
||
(defun acs-pdf-url-3 (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^https://pubs.acs.org/doi/" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/doi/" "/doi/pdf/" *doi-utils-redirect*)))
|
||
|
||
|
||
;;** IOP
|
||
|
||
(defun iop-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://iopscience.iop.org" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* "/pdf")))
|
||
|
||
;;** JSTOR
|
||
|
||
(defun jstor-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.jstor.org" *doi-utils-redirect*)
|
||
(concat (replace-regexp-in-string "/stable/" "/stable/pdfplus/" *doi-utils-redirect*) ".pdf")))
|
||
|
||
|
||
;;** AIP
|
||
|
||
(defun aip-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://scitation.aip.org" *doi-utils-redirect*)
|
||
;; get stuff after content
|
||
(let (p1 p2 s p3)
|
||
(setq p2 (replace-regexp-in-string
|
||
"^http\\(s?\\)://scitation.aip.org/" "" *doi-utils-redirect*))
|
||
(setq s (split-string p2 "/"))
|
||
(setq p1 (mapconcat 'identity (org-ref--remove-at-indices '(0 6) s) "/"))
|
||
(setq p3 (concat "/" (nth 0 s) (nth 1 s) "/" (nth 2 s) "/" (nth 3 s)))
|
||
(format "http://scitation.aip.org/deliver/fulltext/%s.pdf?itemId=/%s&mimeType=pdf&containerItemId=%s"
|
||
p1 p2 p3))))
|
||
|
||
(defun aip-pdf-url-2 (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
;; [2021-08-28 Sat] Seems like they changed the link a little?
|
||
;; https://aip.scitation.org/doi/10.1063/1.5019667
|
||
;; to
|
||
;; https://aip.scitation.org/doi/pdf/10.1063/1.5019667
|
||
(when (string-match "^http\\(s?\\)://aip.scitation.org" *doi-utils-redirect*)
|
||
(concat "https://aip.scitation.org/doi/pdf" (cl-second (split-string *doi-utils-redirect* "doi")))))
|
||
|
||
;;** Taylor and Francis
|
||
|
||
(defun tandfonline-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.tandfonline.com" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/abs/\\|/full/" "/pdf/" *doi-utils-redirect*)))
|
||
|
||
;;** ECS
|
||
|
||
(defun ecs-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://jes.ecsdl.org" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "\\.abstract$" ".full.pdf" *doi-utils-redirect*)))
|
||
|
||
;; http://ecst.ecsdl.org/content/25/2/2769
|
||
;; http://ecst.ecsdl.org/content/25/2/2769.full.pdf
|
||
|
||
|
||
(defun ecst-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://ecst.ecsdl.org" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* ".full.pdf")))
|
||
|
||
|
||
|
||
;;** RSC
|
||
|
||
(defun rsc-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://pubs.rsc.org" *doi-utils-redirect*)
|
||
(let ((url (downcase *doi-utils-redirect*)))
|
||
(setq url (replace-regexp-in-string "articlelanding" "articlepdf" url))
|
||
url)))
|
||
|
||
;;** Science Direct
|
||
|
||
;; https://www.sciencedirect.com/science/article/pii/S001085452200577X?via%3Dihub
|
||
;; https://www.sciencedirect.com/science/article/pii/S001085452200577X/pdfft?isDTMRedir=true&download=true
|
||
|
||
(defun science-direct-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.sciencedirect.com" *doi-utils-redirect*)
|
||
(replace-string "?via%3Dihub" "/pdfft?isDTMRedir=true&download=true" *doi-utils-redirect*)))
|
||
|
||
;; (defun doi-utils-get-science-direct-pdf-url (redirect-url)
|
||
;; "Science direct hides the pdf url in html. We get it out here.
|
||
;; REDIRECT-URL is where the pdf url will be in."
|
||
;; (let ((first-url
|
||
;; (with-current-buffer (url-retrieve-synchronously redirect-url)
|
||
;; (goto-char (point-min))
|
||
;; (when (re-search-forward "pdf_url\" content=\"\\([^\"]*\\)\"" nil t)
|
||
;; (match-string-no-properties 1)))))
|
||
;; (and first-url
|
||
;; (with-current-buffer (url-retrieve-synchronously first-url)
|
||
;; (goto-char (point-min))
|
||
;; (when (re-search-forward "or click <a href=\"\\([^\"]*\\)\">" nil t)
|
||
;; (match-string-no-properties 1))))))
|
||
|
||
;; (defun science-direct-pdf-url (*doi-utils-redirect*)
|
||
;; "Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
;; (when (string-match "^http\\(s?\\)://www.sciencedirect.com" *doi-utils-redirect*)
|
||
;; (doi-utils-get-science-direct-pdf-url *doi-utils-redirect*)))
|
||
|
||
;; sometimes I get
|
||
;; http://linkinghub.elsevier.com/retrieve/pii/S0927025609004558
|
||
;; which actually redirect to
|
||
;; http://www.sciencedirect.com/science/article/pii/S0927025609004558
|
||
|
||
;; https://www.sciencedirect.com/science/article/pii/S001085452200577X?via%3Dihub
|
||
;; https://www.sciencedirect.com/science/article/pii/S001085452200577X/pdfft?isDTMRedir=true&download=true
|
||
|
||
;; (defun linkinghub-elsevier-pdf-url (*doi-utils-redirect*)
|
||
;; "Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
;; (when (string-match
|
||
;; "^https://linkinghub.elsevier.com/retrieve" *doi-utils-redirect*)
|
||
;; (science-direct-pdf-url
|
||
;; (replace-regexp-in-string
|
||
;; ;; change URL to science direct and use function to get pdf URL
|
||
;; "https://linkinghub.elsevier.com/retrieve"
|
||
;; "https://www.sciencedirect.com/science/article"
|
||
;; *doi-utils-redirect*))))
|
||
|
||
;; https://www.sciencedirect.com/science/article/pii/S1385894723014973/pdfft?isDTMRedir=true&download=true
|
||
|
||
(defun linkinghub-elsevier-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match
|
||
"^https://linkinghub.elsevier.com/retrieve" *doi-utils-redirect*)
|
||
(concat
|
||
(replace-regexp-in-string
|
||
;; change URL to science direct and use function to get pdf URL
|
||
"https://linkinghub.elsevier.com/retrieve"
|
||
"https://www.sciencedirect.com/science/article"
|
||
*doi-utils-redirect*)
|
||
"/pdfft?isDTMRedir=true")))
|
||
|
||
;;** PNAS
|
||
;; http://www.pnas.org/content/early/2014/05/08/1319030111
|
||
;; http://www.pnas.org/content/early/2014/05/08/1319030111.full.pdf
|
||
|
||
;; with supporting info
|
||
;; http://www.pnas.org/content/early/2014/05/08/1319030111.full.pdf+html?with-ds=yes
|
||
|
||
(defun pnas-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.pnas.org" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* ".full.pdf?with-ds=yes")))
|
||
|
||
|
||
;;** Copernicus Publications
|
||
(defvar copernicus-journal-urls '(
|
||
"^https://www.adv-geosci.net/"
|
||
"^https://www.adv-radio-sci.net/"
|
||
"^https://www.adv-sci-res.net/"
|
||
"^https://www.adv-stat-clim-meteorol-oceanogr.net/"
|
||
"^https://www.ann-geophys.net/"
|
||
"^https://www.arch-anim-breed.net/"
|
||
"^https://www.astra-proc.net/"
|
||
"^https://www.atmos-chem-phys.net/"
|
||
"^https://www.atmos-chem-phys-discuss.net/"
|
||
"^https://www.atmos-meas-tech.net/"
|
||
"^https://www.atmos-meas-tech-discuss.net/"
|
||
"^https://www.biogeosciences.net/"
|
||
"^https://www.biogeosciences-discuss.net/"
|
||
"^https://www.clim-past.net/recent_papers.html"
|
||
"^https://www.clim-past-discuss.net/"
|
||
"^https://www.drink-water-eng-sci.net/"
|
||
"^https://www.drink-water-eng-sci-discuss.net/"
|
||
"^https://www.eg-quaternary-sci-j.net/"
|
||
"^https://www.earth-surf-dynam.net/"
|
||
"^https://www.earth-surf-dynam-discuss.net/"
|
||
"^https://www.earth-syst-dynam.net/"
|
||
"^https://www.earth-syst-dynam-discuss.net/"
|
||
"^https://www.earth-syst-sci-data.net/"
|
||
"^https://www.earth-syst-sci-data-discuss.net/"
|
||
"^https://www.foss-rec.net/"
|
||
"^https://www.geogr-helv.net/"
|
||
"^https://www.geosci-instrum-method-data-syst.net/"
|
||
"^https://www.geosci-instrum-method-data-syst-discuss.net/"
|
||
"^https://www.geosci-model-dev.net/"
|
||
"^https://www.geosci-model-dev-discuss.net/"
|
||
"^https://www.hist-geo-space-sci.net/"
|
||
"^https://www.hydrol-earth-syst-sci.net/"
|
||
"^https://www.hydrol-earth-syst-sci-discuss.net/"
|
||
"^https://www.j-sens-sens-syst.net/"
|
||
"^https://www.mech-sci.net/"
|
||
"^https://www.nat-hazards-earth-syst-sci.net/"
|
||
"^https://www.nonlin-processes-geophys-discuss.net/"
|
||
"^https://www.ocean-sci.net/"
|
||
"^https://www.ocean-sci-discuss.net/"
|
||
"^https://www.primate-biol.net/"
|
||
"^https://www.proc-iahs.net/"
|
||
"^https://www.sci-dril.net/"
|
||
"^https://www.soil-journal.net/"
|
||
"^https://www.soil-discuss.net/"
|
||
"^https://www.solid-earth.net/"
|
||
"^https://www.solid-earth-discuss.net/"
|
||
"^https://www.stephan-mueller-spec-publ-ser.net/"
|
||
"^https://www.the-cryosphere.net/"
|
||
"^https://www.the-cryosphere-discuss.net/"
|
||
"^https://www.web-ecol.net/"
|
||
"^https://www.wind-energ-sci.net/"
|
||
"^https://www.wind-energ-sci-discuss.net/"
|
||
)
|
||
"List of Copernicus URLs.")
|
||
|
||
(defun doi-utils-get-copernicus-pdf-url (redirect-url)
|
||
"Copernicus hides the pdf url in html. We get it out here.
|
||
REDIRECT-URL is where the pdf url will be in."
|
||
(setq *doi-utils-waiting* t)
|
||
(url-retrieve
|
||
redirect-url
|
||
(lambda (status)
|
||
(goto-char (point-min))
|
||
(re-search-forward "citation_pdf_url\" content=\"\\([^\"]*\\)\"" nil t)
|
||
|
||
(setq *doi-utils-pdf-url* (match-string 1)
|
||
*doi-utils-waiting* nil)))
|
||
(while *doi-utils-waiting* (sleep-for 0.1))
|
||
*doi-utils-pdf-url*)
|
||
|
||
(defun copernicus-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
|
||
(car (cl-loop for copurl in copernicus-journal-urls
|
||
when (string-match copurl *doi-utils-redirect*)
|
||
collect
|
||
(progn (doi-utils-get-copernicus-pdf-url *doi-utils-redirect*)
|
||
*doi-utils-pdf-url*))))
|
||
|
||
|
||
;;** Sage
|
||
(defun sage-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://pss.sagepub.com" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* ".full.pdf")))
|
||
|
||
|
||
;;** Journal of Neuroscience
|
||
(defun jneurosci-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://www.jneurosci.org" *doi-utils-redirect*)
|
||
(concat *doi-utils-redirect* ".full.pdf")))
|
||
|
||
;;** Generic .full.pdf
|
||
(defun generic-full-pdf-url (*doi-utils-redirect*)
|
||
(let ((pdf (concat *doi-utils-redirect* ".full.pdf")))
|
||
(when (url-http-file-exists-p pdf)
|
||
pdf)))
|
||
|
||
;;** IEEE
|
||
;; 10.1109/re.2014.6912247
|
||
;; http(s)://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6912247
|
||
;; http(s)://ieeexplore.ieee.org/ielx7/6903646/6912234/06912247.pdf
|
||
;; http(s)://ieeexplore.ieee.org/iel7/6903646/6912234/06912247.pdf?arnumber=6912247
|
||
;; <meta name="citation_pdf_url" content="http(s)://ieeexplore.ieee.org/iel7/6903646/6912234/06912247.pdf?arnumber=6912247">
|
||
;; <frame src="http(s)://ieeexplore.ieee.org/ielx7/6903646/6912234/06912247.pdf?tp=&arnumber=6912247&isnumber=6912234" frameborder=0 />
|
||
(defun ieee-pdf-url (*doi-utils-redirect*)
|
||
"Get a url to the pdf from *DOI-UTILS-REDIRECT* for IEEE urls."
|
||
(when (string-match "^https?://ieeexplore.ieee.org" *doi-utils-redirect*)
|
||
(with-current-buffer (url-retrieve-synchronously *doi-utils-redirect*)
|
||
(goto-char (point-min))
|
||
(when (re-search-forward "<meta name=\"citation_pdf_url\" content=\"\\([[:ascii:]]*?\\)\">" nil t)
|
||
(let ((framed-url (match-string 1)))
|
||
(with-current-buffer (url-retrieve-synchronously framed-url)
|
||
(goto-char (point-min))
|
||
(when (re-search-forward "<frame src=\"\\(http[[:ascii:]]*?\\)\"" nil t)
|
||
(match-string 1))))))))
|
||
|
||
;; At least some IEEE papers need the following new pdf-link parsing
|
||
;; Example: 10.1109/35.667413
|
||
(defun ieee2-pdf-url (*doi-utils-redirect*)
|
||
"Get a url to the pdf from *DOI-UTILS-REDIRECT* for IEEE urls."
|
||
(when (string-match "^https?://ieeexplore.ieee.org" *doi-utils-redirect*)
|
||
(with-current-buffer (url-retrieve-synchronously *doi-utils-redirect*)
|
||
(goto-char (point-min))
|
||
(when (re-search-forward "\"pdfUrl\":\"\\([[:ascii:]]*?\\)\"" nil t)
|
||
(let ((framed-url (match-string 1)))
|
||
(with-current-buffer (url-retrieve-synchronously (concat "http://ieeexplore.ieee.org" framed-url))
|
||
(goto-char (point-min))
|
||
(when (re-search-forward "<frame src=\"\\(http[[:ascii:]]*?\\)\"" nil t)
|
||
(match-string 1))))))))
|
||
|
||
;; Another try to get the ieee pdf
|
||
;; <iframe src="http(s)://ieeexplore.ieee.org/ielx5/8/4538127/04538164.pdf?tp=&arnumber=4538164&isnumber=4538127" frameborder=0>
|
||
(defun ieee3-pdf-url (*doi-utils-redirect*)
|
||
"Get a url to the pdf from *DOI-UTILS-REDIRECT* for IEEE urls."
|
||
(when (string-match "^https?://ieeexplore.ieee.org" *doi-utils-redirect*)
|
||
(with-current-buffer (url-retrieve-synchronously *doi-utils-redirect*)
|
||
(goto-char (point-min))
|
||
(when (re-search-forward "\"pdfUrl\":\"\\([[:ascii:]]*?\\)\"" nil t)
|
||
(let ((framed-url (match-string 1)))
|
||
(with-current-buffer (url-retrieve-synchronously (concat "http://ieeexplore.ieee.org" framed-url))
|
||
(goto-char (point-min))
|
||
(when (re-search-forward "<iframe src=\"\\(http[[:ascii:]]*?\\)\"" nil t)
|
||
(match-string 1))))))))
|
||
|
||
;; ACM Digital Library
|
||
;; https://dl.acm.org/doi/10.1145/1368088.1368132
|
||
(defun acm-pdf-url (*doi-utils-redirect*)
|
||
"Get a url to the pdf from *DOI-UTILS-REDIRECT* for ACM urls."
|
||
(when (string-match "^https?://dl.acm.org" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "doi" "doi/pdf" *doi-utils-redirect* )))
|
||
|
||
;;** Optical Society of America (OSA)
|
||
(defun osa-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^https://www.osapublishing.org" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "abstract.cfm" "viewmedia.cfm" *doi-utils-redirect* )))
|
||
|
||
|
||
|
||
;;** Publishers using Highwire Press metatags
|
||
;; For context and details, see:
|
||
;; https://webmasters.stackexchange.com/questions/72746/where-are-the-complete-set-of-highwire-press-metatags-defined
|
||
|
||
(defun highwire-pdf-url (*doi-utils-redirect*)
|
||
"Typical URL: http://biomechanical.asmedigitalcollection.asme.org/article.aspx?articleid=1427237
|
||
|
||
On this page the pdf might be here: <meta name=\"citation_author\" content=\"Dalong Li\" /><meta name=\"citation_author_email\" content=\"dal40@pitt.edu\" /><meta name=\"citation_author\" content=\"Anne M. Robertson\" /><meta name=\"citation_author_email\" content=\"rbertson@pitt.edu\" /><meta name=\"citation_title\" content=\"A Structural Multi-Mechanism Damage Model for Cerebral Arterial Tissue\" /><meta name=\"citation_firstpage\" content=\"101013\" /><meta name=\"citation_doi\" content=\"10.1115/1.3202559\" /><meta name=\"citation_keyword\" content=\"Mechanisms\" /><meta name=\"citation_keyword\" content=\"Biological tissues\" /><meta name=\"citation_keyword\" content=\"Stress\" /><meta name=\"citation_keyword\" content=\"Fibers\" /><meta name=\"citation_journal_title\" content=\"Journal of Biomechanical Engineering\" /><meta name=\"citation_journal_abbrev\" content=\"J Biomech Eng\" /><meta name=\"citation_volume\" content=\"131\" /><meta name=\"citation_issue\" content=\"10\" /><meta name=\"citation_publication_date\" content=\"2009/10/01\" /><meta name=\"citation_issn\" content=\"0148-0731\" /><meta name=\"citation_publisher\" content=\"American Society of Mechanical Engineers\" /><meta name=\"citation_pdf_url\" content=\"http://biomechanical.asmedigitalcollection.asme.org/data/journals/jbendy/27048/101013_1.pdf\" />
|
||
|
||
It is in the citation_pdf_url.
|
||
|
||
It would be better to parse this, but here I just use a regexp.
|
||
"
|
||
|
||
(when (or (string-match "^http\\(s?\\)://biomechanical.asmedigitalcollection.asme.org" *doi-utils-redirect*)
|
||
(string-match "^http\\(s?\\)://ojs.aaai.org" *doi-utils-redirect*)
|
||
(string-match "^http\\(s?\\)://aclanthology.org" *doi-utils-redirect*))
|
||
(setq *doi-utils-waiting* 0)
|
||
(url-retrieve
|
||
*doi-utils-redirect*
|
||
(lambda (status)
|
||
(or (progn (goto-char (point-min))
|
||
(re-search-forward "citation_pdf_url\"? content=\"\\(.*\\)\"" nil t))
|
||
(progn (goto-char (point-min))
|
||
(re-search-forward "\"\\([^\"]*\\)\" name=\"?citation_pdf_url" nil t)))
|
||
;; (message-box (match-string 1))
|
||
(setq *doi-utils-pdf-url* (match-string 1)
|
||
*doi-utils-waiting* nil)))
|
||
(while (and *doi-utils-waiting* (< *doi-utils-waiting* 5))
|
||
(setq *doi-utils-waiting* (+ *doi-utils-waiting* 0.1))
|
||
(sleep-for 0.1))
|
||
*doi-utils-pdf-url*))
|
||
|
||
|
||
;; Society for Industrial and Applied Mathematics (SIAM)
|
||
(defun siam-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s?\\)://epubs.siam.org" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/doi/" "/doi/pdf/" *doi-utils-redirect* )))
|
||
|
||
;; PLOS journals
|
||
;; https://plos.org/
|
||
(defun plos-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^http\\(s*\\)://journals.plos.org" *doi-utils-redirect*)
|
||
(concat (replace-regexp-in-string (regexp-quote "/article?id=") "/article/file?id=" *doi-utils-redirect*) "&type=printable")))
|
||
|
||
|
||
;; https://www.frontiersin.org/articles/10.3389/fchem.2022.1037997/full
|
||
;; https://www.frontiersin.org/articles/10.3389/fchem.2022.1037997/pdf
|
||
(defun frontiers-pdf-url (*doi-utils-redirect*)
|
||
(when (string-match "^http\\(s*\\)://www.frontiersin.org" *doi-utils-redirect*)
|
||
(replace-regexp-in-string "/full" "/pdf" *doi-utils-redirect*)))
|
||
|
||
|
||
|
||
|
||
;; https://chemistry-europe.onlinelibrary.wiley.com/doi/10.1002/celc.201902035
|
||
;; https://chemistry-europe.onlinelibrary.wiley.com/doi/epdf/10.1002/celc.201902035
|
||
(defun chemistry-europe-pdf-url (*doi-utils-redirect*)
|
||
(when (string-match "^http\\(s*\\)://chemistry-europe.onlinelibrary.wiley.com" *doi-utils-redirect*)
|
||
(concat
|
||
(replace-regexp-in-string "/doi" "/doi/pdfdirect" *doi-utils-redirect*)
|
||
"?download=true")))
|
||
|
||
|
||
;; ** from issue #1081
|
||
|
||
(defun arxiv-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match-p "^https?://arxiv\\.org" *doi-utils-redirect*)
|
||
(concat (replace-regexp-in-string "/abs/" "/pdf/" *doi-utils-redirect*)
|
||
".pdf")))
|
||
|
||
|
||
(defun rss-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match-p "roboticsproceedings" *doi-utils-redirect*)
|
||
(concat (replace-regexp-in-string "\\.html" ".pdf" *doi-utils-redirect*))))
|
||
|
||
|
||
(defun ieeestamp-pdf-url (*doi-utils-redirect*)
|
||
"Get url to the pdf from *DOI-UTILS-REDIRECT*."
|
||
(when (string-match "^https?://ieeexplore\\.ieee\\.org/document/\\([0-9]+\\)"
|
||
*doi-utils-redirect*)
|
||
(concat "https://ieeexplore.ieee.org/stampPDF/getPDF.jsp?tp=&arnumber="
|
||
(match-string 1 *doi-utils-redirect*))))
|
||
|
||
|
||
;;** Add all functions
|
||
|
||
(setq doi-utils-pdf-url-functions
|
||
(list
|
||
'aps-pdf-url
|
||
'science-pdf-url
|
||
'nature-pdf-url
|
||
;; 'wiley-pdf-url
|
||
'wiley-pdf-url-2
|
||
'springer-chapter-pdf-url
|
||
'springer-pdf-url
|
||
'acs-pdf-url-1
|
||
'acs-pdf-url-2
|
||
'acs-pdf-url-3
|
||
'iop-pdf-url
|
||
'jstor-pdf-url
|
||
'aip-pdf-url
|
||
'aip-pdf-url-2
|
||
'science-direct-pdf-url
|
||
'linkinghub-elsevier-pdf-url
|
||
'tandfonline-pdf-url
|
||
'ecs-pdf-url
|
||
'ecst-pdf-url
|
||
'rsc-pdf-url
|
||
'pnas-pdf-url
|
||
'copernicus-pdf-url
|
||
'sage-pdf-url
|
||
'jneurosci-pdf-url
|
||
'ieee-pdf-url
|
||
'ieee2-pdf-url
|
||
'ieee3-pdf-url
|
||
'acm-pdf-url
|
||
'osa-pdf-url
|
||
'highwire-pdf-url
|
||
'siam-pdf-url
|
||
'agu-pdf-url
|
||
'plos-pdf-url
|
||
'frontiers-pdf-url
|
||
'chemistry-europe-pdf-url
|
||
'generic-full-pdf-url
|
||
'arxiv-pdf-url
|
||
'rss-pdf-url
|
||
'ieeestamp-pdf-url))
|
||
|
||
;;** Get the pdf url for a doi
|
||
|
||
(defun doi-utils-get-pdf-url (doi)
|
||
"Return a url to a pdf for the DOI if one can be calculated.
|
||
Loops through the functions in `doi-utils-pdf-url-functions'
|
||
until one is found."
|
||
(doi-utils-get-redirect doi)
|
||
|
||
(unless *doi-utils-redirect*
|
||
(error "No redirect found for %s" doi))
|
||
(catch 'pdf-url
|
||
(dolist (func doi-utils-pdf-url-functions)
|
||
(let ((this-pdf-url (funcall func *doi-utils-redirect*)))
|
||
(when this-pdf-url
|
||
(throw 'pdf-url this-pdf-url))))))
|
||
|
||
;;** Finally, download the pdf
|
||
(defvar bibtex-completion-library-path)
|
||
(defvar bibtex-completion-bibliography)
|
||
(declare-function async-start "async")
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-async-download-pdf ()
|
||
"Download the PDF for bibtex entry at point asynchronously.
|
||
It is not fully async, only the download is. Fully async is
|
||
harder because you need to run `doi-utils-get-pdf-url' async
|
||
too. "
|
||
(interactive)
|
||
(require 'async)
|
||
(save-excursion
|
||
(bibtex-beginning-of-entry)
|
||
(let (;; get doi, removing http://dx.doi.org/ if it is there.
|
||
(doi (replace-regexp-in-string
|
||
"https?://\\(dx.\\)?.doi.org/" ""
|
||
(bibtex-autokey-get-field "doi")))
|
||
(base-name (funcall doi-utils-pdf-filename-function))
|
||
(pdf-url)
|
||
(pdf-file))
|
||
|
||
(setq pdf-file
|
||
(concat (cond
|
||
((stringp bibtex-completion-library-path)
|
||
bibtex-completion-library-path)
|
||
((= 1 (length bibtex-completion-library-path))
|
||
(car bibtex-completion-library-path))
|
||
(t
|
||
(completing-read "Dir: " bibtex-completion-library-path)))
|
||
base-name ".pdf"))
|
||
|
||
(unless doi (error "No DOI found to get a pdf for"))
|
||
|
||
(when (file-exists-p pdf-file)
|
||
(error "%s already exists. Delete to re-download" pdf-file))
|
||
|
||
;; (doi-utils-get-pdf-url "10.1063/1.5019667")
|
||
;; If you get here, try getting the pdf file
|
||
(async-start
|
||
`(lambda ()
|
||
(setq package-user-dir ,package-user-dir)
|
||
(require 'package)
|
||
(package-initialize)
|
||
(setq load-path (list ,@load-path))
|
||
(require 'doi-utils)
|
||
|
||
(setq pdf-url (doi-utils-get-pdf-url ,doi))
|
||
(when pdf-url
|
||
(url-copy-file pdf-url ,pdf-file t)
|
||
|
||
(let* ((header (with-temp-buffer
|
||
(set-buffer-multibyte nil)
|
||
(insert-file-contents-literally ,pdf-file nil 0 5)
|
||
(buffer-string)))
|
||
(valid (and (stringp header)
|
||
(string-equal (encode-coding-string header 'utf-8) "%PDF-"))))
|
||
(if valid
|
||
(format "%s downloaded" ,pdf-file)
|
||
(delete-file ,pdf-file)
|
||
(require 'browse-url)
|
||
(browse-url pdf-url)
|
||
(message "Invalid pdf (file deleted). Header = %s" header)))))
|
||
`(lambda (result)
|
||
(message "doi-utils-async-download-pdf: %s" result))))))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-get-bibtex-entry-pdf (&optional arg)
|
||
"Download pdf for entry at point if the pdf does not already exist locally.
|
||
The entry must have a doi. The pdf will be saved, by the name
|
||
%s.pdf where %s is the bibtex label. Files will not be
|
||
overwritten. The pdf will be checked to make sure it is a pdf,
|
||
and not some html failure page. You must have permission to
|
||
access the pdf. We open the pdf at the end if
|
||
`doi-utils-open-pdf-after-download' is non-nil.
|
||
|
||
With one prefix ARG, directly get the pdf from a file (through
|
||
`read-file-name') instead of looking up a DOI. With a double
|
||
prefix ARG, directly get the pdf from an open buffer (through
|
||
`read-buffer-to-switch') instead. These two alternative methods
|
||
work even if the entry has no DOI, and the pdf file is not
|
||
checked."
|
||
(interactive "P")
|
||
(save-excursion
|
||
(bibtex-beginning-of-entry)
|
||
(let (;; get doi, removing http://dx.doi.org/ if it is there.
|
||
(doi (replace-regexp-in-string
|
||
"https?://\\(dx.\\)?.doi.org/" ""
|
||
(bibtex-autokey-get-field "doi")))
|
||
(base-name (funcall doi-utils-pdf-filename-function))
|
||
(pdf-url)
|
||
(pdf-file))
|
||
|
||
(setq pdf-file
|
||
(concat (cond
|
||
((stringp bibtex-completion-library-path)
|
||
bibtex-completion-library-path)
|
||
((= 1 (length bibtex-completion-library-path))
|
||
(car bibtex-completion-library-path))
|
||
(t
|
||
(completing-read "Dir: " bibtex-completion-library-path)))
|
||
base-name ".pdf"))
|
||
;; now get file if needed.
|
||
(unless (file-exists-p pdf-file)
|
||
(cond
|
||
((and (not arg)
|
||
doi
|
||
(setq pdf-url (doi-utils-get-pdf-url doi)))
|
||
(url-copy-file pdf-url pdf-file)
|
||
;; now check if we got a pdf
|
||
(if (org-ref-pdf-p pdf-file)
|
||
(message "%s saved" pdf-file)
|
||
(delete-file pdf-file)
|
||
(message "No pdf was downloaded.")
|
||
(browse-url pdf-url)))
|
||
((equal arg '(4))
|
||
(copy-file (expand-file-name (read-file-name "Pdf file: " nil nil t))
|
||
pdf-file))
|
||
((equal arg '(16))
|
||
(with-current-buffer (read-buffer-to-switch "Pdf buffer: ")
|
||
(write-file pdf-file)))
|
||
(t
|
||
(message "We don't have a recipe for this journal.")))
|
||
|
||
(when (file-exists-p pdf-file)
|
||
(bibtex-set-field "file" pdf-file))
|
||
|
||
(when (and doi-utils-open-pdf-after-download (file-exists-p pdf-file))
|
||
(org-open-file pdf-file))))))
|
||
|
||
;;* Getting bibtex entries from a DOI
|
||
|
||
;; [[http://homepages.see.leeds.ac.uk/~eeaol/notes/2013/02/doi-metadata/][found]]
|
||
;; you can download metadata about a DOI from http://dx.doi.org. You just have
|
||
;; to construct the right http request to get it. Here is a function that gets
|
||
;; the metadata as a plist in emacs.
|
||
;;
|
||
;;
|
||
(defvar doi-utils-cache nil
|
||
"Cache variable for storing data we can reuse.
|
||
A-list (doi . data) where doi is doi string, and data is what is
|
||
retrieved from it. This is transient, and disappears when you
|
||
restart Emacs. This mostly exists to prevent
|
||
`doi-utils-update-field' from needing to download the data for
|
||
every field.")
|
||
|
||
|
||
(defun doi-utils-clear-cache ()
|
||
"Clear `doi-utils-cache'."
|
||
(interactive)
|
||
(setq doi-utils-cache '()))
|
||
|
||
|
||
(defun doi-utils-get-json-metadata (doi)
|
||
"Try to get json metadata for DOI. Open the DOI in a browser if we do not get it."
|
||
(if-let* ((data (cdr (assoc doi doi-utils-cache))))
|
||
;; We have the data already, so we return it.
|
||
data
|
||
(let ((url-request-method "GET")
|
||
(url-mime-accept-string "application/citeproc+json")
|
||
(json-object-type 'plist)
|
||
(json-data)
|
||
(url (concat doi-utils-dx-doi-org-url doi)))
|
||
(with-temp-buffer
|
||
(url-insert
|
||
(url-retrieve-synchronously url))
|
||
(setq json-data (buffer-string))
|
||
|
||
(when (or (string-match "<title>Error: DOI Not Found</title>" json-data)
|
||
(string-match "Resource not found" json-data)
|
||
(string-match "Status *406" json-data)
|
||
(string-match "400 Bad Request" json-data))
|
||
(browse-url (concat doi-utils-dx-doi-org-url doi))
|
||
(error "Something went wrong. We got this response:
|
||
%s
|
||
|
||
Opening %s" json-data url))
|
||
|
||
(setq data (json-read-from-string json-data))
|
||
(cl-pushnew (cons doi data) doi-utils-cache)
|
||
data))))
|
||
|
||
|
||
(defun doi-utils-get-json-metadata-curl (doi)
|
||
"Try to get json metadata for DOI. Open the DOI in a browser if we do not get it."
|
||
(let ((json-object-type 'plist)
|
||
(json-data)
|
||
(url (concat doi-utils-dx-doi-org-url doi)))
|
||
(with-temp-buffer
|
||
(call-process "curl" nil t nil
|
||
"--location"
|
||
"--silent"
|
||
"--header"
|
||
"Accept: application/citeproc+json"
|
||
url)
|
||
(setq json-data (buffer-string))
|
||
(cond
|
||
((or (string-match "<title>Error: DOI Not Found</title>" json-data)
|
||
(string-match "Resource not found" json-data)
|
||
(string-match "Status *406" json-data)
|
||
(string-match "400 Bad Request" json-data))
|
||
(browse-url url)
|
||
(error "Something went wrong. We got this response:
|
||
%s
|
||
Opening %s" json-data url))
|
||
;; everything seems ok with the data
|
||
(t
|
||
(json-read-from-string json-data))))))
|
||
|
||
;; We can use that data to construct a bibtex entry. We do that by defining a
|
||
;; template, and filling it in. I wrote this template expansion code which
|
||
;; makes it easy to substitute values like %{} in emacs lisp.
|
||
|
||
|
||
(defun doi-utils-expand-template (s)
|
||
"Expand a string template S containing %{} with the eval of its contents."
|
||
(replace-regexp-in-string "%{\\([^}]+\\)}"
|
||
(lambda (arg)
|
||
(let ((sexp (substring arg 2 -1)))
|
||
(format "%s" (eval (read sexp)))))
|
||
s))
|
||
|
||
|
||
;; Now we define a function that fills in that template from the metadata.
|
||
|
||
;; As different bibtex types share common keys, it is advantageous to separate
|
||
;; data extraction from json, and the formatting of the bibtex entry.
|
||
|
||
;; We use eval-and-compile because we use the three following forms in the
|
||
;; `doi-utils-def-bibtex-type' macro. Since the macro is expanded at compile
|
||
;; time, we need to ensure these defuns and defvars are evaluated at
|
||
;; compile-time.
|
||
(eval-and-compile
|
||
(defvar doi-utils-json-metadata-extract
|
||
'((type (plist-get results :type))
|
||
(author (mapconcat (lambda (x)
|
||
(message "%s" x)
|
||
(if (plist-get x :name)
|
||
(plist-get x :name)
|
||
(concat (plist-get x :given) " " (plist-get x :family))))
|
||
(plist-get results :author) " and "))
|
||
(title (plist-get results :title))
|
||
(subtitle (plist-get results :subtitle))
|
||
(journal (plist-get results :container-title))
|
||
(series (plist-get results :container-title))
|
||
(publisher (plist-get results :publisher))
|
||
(volume (plist-get results :volume))
|
||
(issue (plist-get results :issue))
|
||
(number (plist-get results :issue))
|
||
(year (or (elt (elt (plist-get (plist-get results :issued) :date-parts) 0) 0)
|
||
(elt (elt (plist-get (plist-get results :approved) :date-parts) 0) 0)
|
||
))
|
||
;; Some dates don't have a month in them.
|
||
(month (let ((date (elt
|
||
(plist-get (plist-get results :issued) :date-parts) 0)))
|
||
(if (>= (length date) 2)
|
||
(elt date 1)
|
||
"-")))
|
||
(pages (or (plist-get results :page)
|
||
(plist-get results :article-number)))
|
||
(doi (plist-get results :DOI))
|
||
(url (plist-get results :URL))
|
||
(booktitle (plist-get results :container-title))
|
||
(school (or (plist-get results :school)
|
||
(plist-get (plist-get results :institution) :name)))
|
||
;; I am not sure how general this is. This gets the first name.
|
||
(institution (plist-get (car (plist-get results :institution)) :name))))
|
||
|
||
;; Next, we need to define the different bibtex types. Each type has a bibtex
|
||
;; type (for output) and the type as provided in the doi record. Finally, we
|
||
;; have to declare the fields we want to output.
|
||
|
||
(defvar doi-utils-bibtex-type-generators nil)
|
||
|
||
(defun doi-utils-concat-prepare (lst &optional acc)
|
||
"Minimize the number of args passed to `concat' from LST.
|
||
Given a list LST of strings and other expressions, which are
|
||
intended to be passed to `concat', concat any subsequent strings,
|
||
minimising the number of arguments being passed to `concat'
|
||
without changing the results. ACC is the list of additional
|
||
expressions."
|
||
(cond ((null lst) (nreverse acc))
|
||
((and (stringp (car lst))
|
||
(stringp (car acc)))
|
||
(doi-utils-concat-prepare (cdr lst) (cons (concat (car acc) (car lst))
|
||
(cdr acc))))
|
||
(t (doi-utils-concat-prepare (cdr lst) (cons (car lst) acc))))))
|
||
|
||
(defmacro doi-utils-def-bibtex-type (name matching-types &rest fields)
|
||
"Define a BibTeX type identified by (symbol) NAME.
|
||
MATCHING-TYPES is a list of strings. FIELDS are symbols that
|
||
match to retrieval expressions in
|
||
`doi-utils-json-metadata-extract'. This type will only be used
|
||
when the `:type' parameter in the JSON metadata is contained in
|
||
MATCHING-TYPES."
|
||
`(push (lambda (type results)
|
||
(when
|
||
(or ,@(mapcar
|
||
(lambda (match-type)
|
||
`(string= type ,match-type)) matching-types))
|
||
(let ,(mapcar (lambda (field)
|
||
(let ((field-expr
|
||
(assoc field doi-utils-json-metadata-extract)))
|
||
(if field-expr
|
||
;; need to convert to string first
|
||
`(,(car field-expr) (format "%s" ,(cadr field-expr)))
|
||
(error "Unknown bibtex field type %s" field))))
|
||
fields)
|
||
(concat
|
||
,@(doi-utils-concat-prepare
|
||
(org-ref--flatten-list
|
||
(list (concat "@" (symbol-name name) "{,\n")
|
||
;; there seems to be some bug with mapcan,
|
||
;; so we fall back to flatten
|
||
(mapcar (lambda (field)
|
||
`(" " ,(symbol-name field) " = {" ,field "},\n"))
|
||
fields)
|
||
"}\n")))))))
|
||
doi-utils-bibtex-type-generators))
|
||
|
||
(doi-utils-def-bibtex-type article ("journal-article" "article-journal" "article")
|
||
author title journal year volume number pages doi url)
|
||
|
||
(doi-utils-def-bibtex-type inproceedings ("proceedings-article" "paper-conference")
|
||
author title booktitle year month pages doi url)
|
||
|
||
(doi-utils-def-bibtex-type book ("book" "edited-book")
|
||
author title series publisher year pages doi url)
|
||
|
||
(doi-utils-def-bibtex-type inbook ("chapter" "book-chapter" "reference-entry")
|
||
author title booktitle series publisher year pages doi url)
|
||
|
||
(doi-utils-def-bibtex-type phdthesis ("phdthesis" "thesis" "dissertation")
|
||
author title school publisher year)
|
||
|
||
(doi-utils-def-bibtex-type techreport ("report")
|
||
institution author title publisher year doi url)
|
||
|
||
;; this is what preprints in chemrxiv look like for now
|
||
(doi-utils-def-bibtex-type misc ("posted-content")
|
||
author title year doi url)
|
||
|
||
|
||
|
||
;; With the code generating the bibtex entry in place, we can glue it to the json retrieval code.
|
||
|
||
(defun doi-utils-doi-to-bibtex-string (doi)
|
||
"Return a bibtex entry as a string for the DOI. Not all types are supported yet."
|
||
(let* ((results (funcall doi-utils-metadata-function doi))
|
||
(type (plist-get results :type)))
|
||
;; (format "%s" results) ; json-data
|
||
(or (cl-some (lambda (g) (funcall g type results)) doi-utils-bibtex-type-generators)
|
||
(message "%s not supported yet\n%S." type results))))
|
||
|
||
;; That is just the string for the entry. To be useful, we need a function that
|
||
;; inserts the string into a buffer. This function will insert the string at the
|
||
;; cursor, clean the entry, try to get the pdf.
|
||
|
||
(defun doi-utils-insert-bibtex-entry-from-doi (doi)
|
||
"Insert and clean bibtex entry from a DOI."
|
||
(insert (doi-utils-doi-to-bibtex-string doi))
|
||
(backward-char)
|
||
;; set date added for the record
|
||
(let ((ts (funcall doi-utils-timestamp-format-function)))
|
||
(when ts
|
||
(bibtex-set-field doi-utils-timestamp-field
|
||
ts)))
|
||
(org-ref-clean-bibtex-entry)
|
||
(when (buffer-file-name)
|
||
(save-buffer)))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-add-bibtex-entry-from-doi (doi &optional bibfile)
|
||
"Add DOI entry to end of a file in the current directory.
|
||
Pick the file ending with .bib or in . If you have an active region that
|
||
starts like a DOI, that will be the initial prompt. If no region
|
||
is selected and the first entry of the ‘kill-ring’ starts like a
|
||
DOI, then that is the initial prompt. Otherwise, you have to type
|
||
or paste in a DOI.
|
||
Argument BIBFILE the bibliography to use."
|
||
(interactive
|
||
(list (read-string
|
||
"DOI: "
|
||
;; now set initial input
|
||
(doi-utils-maybe-doi-from-region-or-current-kill))))
|
||
|
||
(unless bibfile
|
||
(setq bibfile (completing-read "Bibfile: " (org-ref-possible-bibfiles))))
|
||
;; Wrap in save-window-excursion to restore your window arrangement after this
|
||
;; is done.
|
||
(save-window-excursion
|
||
(with-current-buffer
|
||
(find-file-noselect bibfile)
|
||
;; Check if the doi already exists
|
||
(goto-char (point-min))
|
||
(if (re-search-forward (concat doi "\\_>") nil t)
|
||
(message "%s is already in this file" doi)
|
||
(goto-char (point-max))
|
||
|
||
(when (not (looking-back "\n\n" (min 3 (point))))
|
||
(insert "\n\n"))
|
||
|
||
(doi-utils-insert-bibtex-entry-from-doi doi)
|
||
(save-buffer)))))
|
||
|
||
(defalias 'doi-add-bibtex-entry 'doi-utils-add-bibtex-entry-from-doi
|
||
"Alias function for convenience.")
|
||
|
||
(defun doi-utils-maybe-doi-from-region-or-current-kill ()
|
||
"Try to get a DOI from the active region or current kill."
|
||
(let* ((the-active-region (if (region-active-p) ;; nil if no active region
|
||
(buffer-substring (region-beginning) (region-end))
|
||
nil))
|
||
(the-current-kill (ignore-errors (current-kill 0 t))) ;; nil if empty kill ring
|
||
;; DOI urls
|
||
;; Ex: https://doi.org/10.1109/MALWARE.2014.6999410
|
||
;; Ex: https://dx.doi.org/10.1007/978-3-319-60876-1_10
|
||
(doi-url-prefix-regexp "^https?://\\(dx\\.\\)?doi\\.org/")
|
||
;; https://www.crossref.org/blog/dois-and-matching-regular-expressions/
|
||
(doi-regexp "10\\.[0-9]\\{4,9\\}/[-._;()/:A-Z0-9]+$"))
|
||
(cond
|
||
;; Check if a DOI can be found in the active region
|
||
;; DOI raw
|
||
;; Ex: 10.1109/MALWARE.2014.6999410
|
||
((and (stringp the-active-region)
|
||
(org-ref--string-match (concat "^" doi-regexp) the-active-region))
|
||
the-active-region)
|
||
;; DOI url
|
||
;; Ex: https://doi.org/10.1109/MALWARE.2014.6999410
|
||
((and (stringp the-active-region)
|
||
(org-ref--string-match (concat doi-url-prefix-regexp doi-regexp) the-active-region))
|
||
(replace-regexp-in-string doi-url-prefix-regexp "" the-active-region))
|
||
;; DOI url as customized
|
||
((and (stringp the-active-region)
|
||
(org-ref--string-match (regexp-quote doi-utils-dx-doi-org-url) the-active-region))
|
||
(replace-regexp-in-string (regexp-quote doi-utils-dx-doi-org-url) "" the-active-region))
|
||
;; Check if DOI can be found in the current kill
|
||
;; DOI raw
|
||
;; Ex: 10.1109/MALWARE.2014.6999410
|
||
((and (stringp the-current-kill)
|
||
(org-ref--string-match (concat "^" doi-regexp) the-current-kill))
|
||
the-current-kill)
|
||
;; DOI url
|
||
;; Ex: https://doi.org/10.1109/MALWARE.2014.6999410
|
||
((and (stringp the-current-kill)
|
||
(org-ref--string-match (concat doi-url-prefix-regexp doi-regexp) the-current-kill))
|
||
(replace-regexp-in-string doi-url-prefix-regexp "" the-current-kill))
|
||
;; DOI url as customized
|
||
((and (stringp the-current-kill)
|
||
(org-ref--string-match (regexp-quote doi-utils-dx-doi-org-url) the-current-kill))
|
||
(replace-regexp-in-string (regexp-quote doi-utils-dx-doi-org-url) "" the-current-kill))
|
||
;; otherwise, return nil
|
||
(t
|
||
nil))))
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-doi-to-org-bibtex (doi)
|
||
"Convert a DOI to an ‘org-bibtex’ form and insert it at point."
|
||
(interactive "sDOI: ")
|
||
(with-temp-buffer
|
||
(insert (doi-utils-doi-to-bibtex-string doi))
|
||
(bibtex-clean-entry)
|
||
(kill-region (point-min) (point-max)))
|
||
(org-bibtex-yank)
|
||
(org-metaright)
|
||
(org-metaright))
|
||
|
||
;;* Updating bibtex entries
|
||
|
||
;; I wrote this code because it is pretty common for me to copy bibtex entries
|
||
;; from ASAP articles that are incomplete, e.g. no page numbers because it is
|
||
;; not in print yet. I wanted a convenient way to update an entry from its DOI.
|
||
;; Basically, we get the metadata, and update the fields in the entry.
|
||
|
||
;; There is not bibtex set field function, so I wrote this one.
|
||
|
||
|
||
;;;###autoload
|
||
(defun bibtex-set-field (field value &optional nodelim)
|
||
"Set FIELD to VALUE in bibtex file. create field if it does not exist.
|
||
Optional argument NODELIM see `bibtex-make-field'."
|
||
(interactive "sfield: \nsvalue: ")
|
||
(bibtex-beginning-of-entry)
|
||
(let ((found))
|
||
(if (setq found (bibtex-search-forward-field field t))
|
||
;; we found a field
|
||
(progn
|
||
(goto-char (car (cdr found)))
|
||
(when value
|
||
(bibtex-kill-field)
|
||
(bibtex-make-field field nil nil nodelim)
|
||
(backward-char)
|
||
(insert value)))
|
||
;; make a new field
|
||
(bibtex-beginning-of-entry)
|
||
(forward-line) (beginning-of-line)
|
||
(bibtex-next-field nil)
|
||
(forward-char)
|
||
(bibtex-make-field field nil nil nodelim)
|
||
(backward-char)
|
||
(insert value))))
|
||
|
||
|
||
(defun plist-get-keys (plist)
|
||
"Return keys in a PLIST."
|
||
(cl-loop for (key _value) on plist by #'cddr collect key))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-update-bibtex-entry-from-doi (doi)
|
||
"Update fields in a bibtex entry from the DOI.
|
||
Every field will be updated, so previous change will be lost."
|
||
(interactive (list
|
||
(or (replace-regexp-in-string
|
||
"https?://\\(dx.\\)?doi.org/" ""
|
||
(bibtex-autokey-get-field "doi"))
|
||
(read-string "DOI: "))))
|
||
(let* ((results (funcall doi-utils-metadata-function doi))
|
||
(type (plist-get results :type))
|
||
(author (mapconcat
|
||
(lambda (x)
|
||
;; There are two possible ways an author is named. The most
|
||
;; common is with :given and :family, but sometimes there is
|
||
;; :name instead.
|
||
(if (plist-get x :name)
|
||
(plist-get x :name)
|
||
(concat (plist-get x :given)
|
||
" " (plist-get x :family))))
|
||
(plist-get results :author) " and "))
|
||
(title (plist-get results :title))
|
||
(journal (plist-get results :container-title))
|
||
(year (format "%s"
|
||
(elt
|
||
(elt
|
||
(plist-get
|
||
(plist-get results :issued) :date-parts) 0) 0)))
|
||
(volume (plist-get results :volume))
|
||
(number (or (plist-get results :issue) ""))
|
||
(pages (or (plist-get results :page) ""))
|
||
(url (or (plist-get results :URL) ""))
|
||
(doi (plist-get results :DOI))
|
||
mapping)
|
||
|
||
;; map the json fields to bibtex fields. The code each field is mapped to is
|
||
;; evaluated.
|
||
(setq mapping '((:author . (bibtex-set-field "author" author))
|
||
(:title . (bibtex-set-field "title" title))
|
||
(:container-title . (bibtex-set-field "journal" journal))
|
||
(:issued . (bibtex-set-field "year" year))
|
||
(:volume . (bibtex-set-field "volume" volume))
|
||
(:issue . (bibtex-set-field "number" number))
|
||
(:page . (bibtex-set-field "pages" pages))
|
||
(:DOI . (bibtex-set-field "doi" doi))
|
||
(:URL . (bibtex-set-field "url" url))))
|
||
|
||
;; now we have code to run for each entry. we map over them and evaluate the code
|
||
(mapc
|
||
(lambda (key)
|
||
(eval (cdr (assoc key mapping))))
|
||
(plist-get-keys results)))
|
||
|
||
(org-ref-clean-bibtex-entry))
|
||
|
||
|
||
;; A downside to updating an entry is it overwrites what you have already fixed.
|
||
;; So, we next develop a function to update the field at point.
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-update-field ()
|
||
"Update the field at point in the bibtex entry.
|
||
Data is retrieved from the doi in the entry."
|
||
(interactive)
|
||
(let* ((doi (bibtex-autokey-get-field "doi"))
|
||
(results (funcall doi-utils-metadata-function doi))
|
||
(field (car (bibtex-find-text-internal nil nil ","))))
|
||
(cond
|
||
((string= field "volume")
|
||
(bibtex-set-field field (plist-get results :volume)))
|
||
((string= field "number")
|
||
(bibtex-set-field field (plist-get results :issue)))
|
||
((string= field "pages")
|
||
(bibtex-set-field field (or (plist-get results :page)
|
||
(plist-get results :article-number))))
|
||
((string= field "year")
|
||
(bibtex-set-field field (plist-get results :year)))
|
||
(t
|
||
(message "%s not supported yet." field)))))
|
||
|
||
|
||
|
||
;;* DOI functions for WOS
|
||
|
||
;; I came across this API http://wokinfo.com/media/pdf/OpenURL-guide.pdf to make
|
||
;; links to the things I am interested in here. Based on that document, here are
|
||
;; three links based on a doi:10.1021/jp047349j that take you to different Web
|
||
;; Of Science (WOS) pages.
|
||
|
||
|
||
;; 1. go to article in WOS: http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info:doi/10.1021/jp047349j
|
||
;; 2. citing articles: http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info%3Adoi%2F10.1021/jp047349j&svc_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Asch_svc&svc.citing=yes
|
||
;; 3. related articles: http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info%3Adoi%2F10.1021/jp047349j&svc_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Asch_svc&svc.related=yes
|
||
|
||
;; These are pretty easy to construct, so we can write functions that will
|
||
;; create them and open the url in our browser. There are some other options
|
||
;; that could be considered, but since we usually have a doi, it seems like the
|
||
;; best way to go for creating the links. Here are the functions.
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-wos (doi)
|
||
"Open Web of Science entry for DOI."
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(format
|
||
"http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info:doi/%s" doi)))
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-wos-citing (doi)
|
||
"Open Web of Science citing articles entry for DOI.
|
||
May be empty if none are found."
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(concat
|
||
"http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info%3Adoi%2F"
|
||
doi
|
||
"&svc_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Asch_svc&svc.citing=yes")))
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-wos-related (doi)
|
||
"Open Web of Science related articles page for DOI."
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(concat "http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info%3Adoi%2F"
|
||
doi
|
||
"&svc_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Asch_svc&svc.related=yes")))
|
||
|
||
|
||
;;* DOI functions for ADS
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-ads (doi)
|
||
"Open ADS entry for DOI"
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(concat
|
||
"https://ui.adsabs.harvard.edu/abs/" "%22" doi "%22")))
|
||
|
||
|
||
;;* A new doi link for org-mode
|
||
;; The idea is to add a menu to the doi link, so rather than just clicking to open the article, you can do other things.
|
||
;; 1. open doi
|
||
;; 2. open in wos
|
||
;; 3. open citing articles
|
||
;; 4. open related articles
|
||
;; 5. open bibtex entry
|
||
;; 6. get bibtex entry
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-open (doi)
|
||
"Open DOI in browser."
|
||
(interactive "sDOI: ")
|
||
(browse-url (concat doi-utils-dx-doi-org-url doi)))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-open-bibtex (doi)
|
||
"Search through variable `bibtex-completion-bibliography' for DOI."
|
||
(interactive "sDOI: ")
|
||
(cl-loop for f in (org-ref-normalize-bibtex-completion-bibliography)
|
||
when (progn (find-file f)
|
||
(when (search-forward doi (point-max) t)
|
||
(bibtex-beginning-of-entry)))
|
||
return f))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-crossref (doi)
|
||
"Search DOI in CrossRef."
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(format
|
||
"http://search.crossref.org/?q=%s" doi)))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-google-scholar (doi)
|
||
"Google scholar the DOI."
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(format
|
||
"http://scholar.google.com/scholar?q=%s" doi)))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-pubmed (doi)
|
||
"Search Pubmed for the DOI."
|
||
(interactive "sDOI: ")
|
||
(browse-url
|
||
(format
|
||
"http://www.ncbi.nlm.nih.gov/pubmed/?term=%s"
|
||
(url-hexify-string doi))))
|
||
|
||
(declare-function org-element-property "org-element")
|
||
|
||
(defun doi-utils--context-doi ()
|
||
(org-element-property :path (org-element-context)))
|
||
|
||
(transient-define-prefix doi-link-follow-menu ()
|
||
"DOI actions."
|
||
[["Actions"
|
||
("o" "open" (lambda () (interactive)
|
||
(doi-utils-open (doi-utils--context-doi))))
|
||
("w" "wos" (lambda () (interactive)
|
||
(doi-utils-wos (doi-utils--context-doi))))
|
||
("c" "wos citing articles" (lambda () (interactive)
|
||
(doi-utils-wos-citing (doi-utils--context-doi))))
|
||
("r" "wos related articles" (lambda () (interactive)
|
||
(doi-utils-wos-related (doi-utils--context-doi))))
|
||
("a" "ads" (lambda () (interactive)
|
||
(doi-utils-ads (doi-utils--context-doi))))
|
||
("s" "Google Scholar" (lambda () (interactive)
|
||
(doi-utils-google-scholar (doi-utils--context-doi))))
|
||
("f" "CrossRef" (lambda () (interactive)
|
||
(doi-utils-crossref (doi-utils--context-doi))))
|
||
("p" "Pubmed" (lambda () (interactive)
|
||
(doi-utils-pubmed (doi-utils--context-doi))))
|
||
("b" "open in bibtex" (lambda () (interactive)
|
||
(doi-utils-open-bibtex (doi-utils--context-doi))))
|
||
("g" "get bibtex entry" (lambda () (interactive)
|
||
(doi-utils-add-bibtex-entry-from-doi (doi-utils--context-doi))))
|
||
("q" "quit" transient-quit-one)]])
|
||
|
||
(define-obsolete-function-alias 'doi-link-follow/body
|
||
#'doi-link-follow-menu "3.1")
|
||
|
||
|
||
(org-link-set-parameters "doi"
|
||
:follow (lambda (_) (doi-link-follow-menu))
|
||
:export (lambda (doi desc format)
|
||
(cond
|
||
((eq format 'html)
|
||
(format "<a href=\"%s%s\">%s</a>"
|
||
doi-utils-dx-doi-org-url
|
||
doi
|
||
(or desc (concat "doi:" doi))))
|
||
((eq format 'latex)
|
||
(format "\\href{%s%s}{%s}"
|
||
doi-utils-dx-doi-org-url
|
||
doi
|
||
(or desc (concat "doi:" doi)))))))
|
||
|
||
;;* Getting a doi for a bibtex entry missing one
|
||
|
||
;; Some bibtex entries do not have a DOI, maybe because they were entered by
|
||
;; hand, or copied from a source that did not have it available. Here we develop
|
||
;; some functions to help you find the DOI using Crossref.
|
||
|
||
;; Here is our example bibtex entry.
|
||
;; #+BEGIN_SRC bibtex
|
||
;; @article{deml-2014-oxide,
|
||
;; author = {Ann M. Deml and Vladan Stevanovi{\'c} and
|
||
;; Christopher L. Muhich and Charles B. Musgrave and
|
||
;; Ryan O'Hayre},
|
||
;; title = {Oxide Enthalpy of Formation and Band Gap Energy As
|
||
;; Accurate Descriptors of Oxygen Vacancy Formation
|
||
;; Energetics},
|
||
;; journal = {Energy Environ. Sci.},
|
||
;; volume = 7,
|
||
;; number = 6,
|
||
;; pages = 1996,
|
||
;; year = 2014,
|
||
;; doi = {10.1039/c3ee43874k,
|
||
;; url = {http://dx.doi.org/10.1039/c3ee43874k}},
|
||
|
||
;; }
|
||
|
||
|
||
;; The idea is to query Crossref in a way that is likely to give us a hit
|
||
;; relevant to the entry.
|
||
|
||
;; According to http://search.crossref.org/help/api we can send a query with a
|
||
;; free form citation that may give us something back. We do this to get a list
|
||
;; of candidates, which could be used to get the doi.
|
||
|
||
(declare-function org-ref-bib-citation "org-ref-bibtex")
|
||
;;;###autoload
|
||
(defun doi-utils-crossref-citation-query ()
|
||
"Query Crossref with the title of the bibtex entry at point.
|
||
Get a list of possible matches. Choose one with completion."
|
||
(interactive)
|
||
(bibtex-beginning-of-entry)
|
||
(let* ((entry (bibtex-parse-entry))
|
||
(raw-json-string)
|
||
(json-string)
|
||
(json-data)
|
||
(doi (bibtex-autokey-get-field "doi")))
|
||
(unless (string= "" doi)
|
||
(error "Entry already has a doi field"))
|
||
|
||
(with-current-buffer
|
||
(url-retrieve-synchronously
|
||
(concat
|
||
"http://search.crossref.org/dois?q="
|
||
(url-hexify-string (org-ref-bib-citation))))
|
||
(save-excursion
|
||
(goto-char (point-min))
|
||
(while (re-search-forward "<i>\\|</i>" nil t)
|
||
(replace-match ""))
|
||
(goto-char (point-min))
|
||
(while (re-search-forward "&" nil t)
|
||
(replace-match "&"))
|
||
(goto-char (point-min))
|
||
(while (re-search-forward """ nil t)
|
||
(replace-match "\\\"" nil t)))
|
||
(setq raw-json-string (buffer-substring url-http-end-of-headers (point-max)))
|
||
;; decode json string
|
||
(setq json-string (decode-coding-string (encode-coding-string raw-json-string 'utf-8) 'utf-8))
|
||
(setq json-data (json-read-from-string json-string)))
|
||
|
||
(let* ((name (format "Crossref hits for %s" (org-ref-bib-citation)))
|
||
(candidates (mapcar (lambda (x)
|
||
(cons
|
||
(concat
|
||
(cdr (assoc 'fullCitation x)))
|
||
(cdr (assoc 'doi x))))
|
||
json-data))
|
||
|
||
(doi (cdr (assoc (completing-read "DOI: " candidates) candidates))))
|
||
(bibtex-make-field "doi" t)
|
||
(backward-char)
|
||
;; crossref returns doi url, but I prefer only a doi for the doi field
|
||
(insert (replace-regexp-in-string "^https?://\\(dx.\\)?doi.org/" "" doi))
|
||
(when (string= "" (bibtex-autokey-get-field "url"))
|
||
(bibtex-make-field "url" t)
|
||
(backward-char)
|
||
(insert doi)))))
|
||
|
||
|
||
|
||
;;* Debugging a DOI
|
||
|
||
;; I wrote this function to help debug a DOI. This function generates an
|
||
;; org-buffer with the doi, gets the json metadata, shows the bibtex entry, and
|
||
;; the pdf link for it.
|
||
(defun doi-utils-get-json (doi)
|
||
"Return json data as a string for DOI."
|
||
(let ((url-request-method "GET")
|
||
(url-mime-accept-string "application/citeproc+json")
|
||
(json-data))
|
||
(with-temp-buffer
|
||
(url-insert
|
||
(url-retrieve-synchronously
|
||
(concat doi-utils-dx-doi-org-url doi)))
|
||
(setq json-data (buffer-string))
|
||
(if (string-match "Resource not found" json-data)
|
||
(progn
|
||
(browse-url (concat doi-utils-dx-doi-org-url doi))
|
||
(error "Resource not found. Opening website"))
|
||
json-data))))
|
||
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-debug (doi)
|
||
"Generate an org-buffer showing data about DOI."
|
||
(interactive "sDOI: ")
|
||
(switch-to-buffer "*debug-doi*")
|
||
(erase-buffer)
|
||
(org-mode)
|
||
(insert (concat "doi:" doi) "\n\n")
|
||
(insert "* JSON
|
||
"
|
||
(let ((url-request-method "GET")
|
||
(url-mime-accept-string "application/citeproc+json"))
|
||
(pp
|
||
(json-read-from-string (with-temp-buffer
|
||
(url-insert
|
||
(url-retrieve-synchronously
|
||
(concat doi-utils-dx-doi-org-url doi)))
|
||
(buffer-string)))))
|
||
"\n\n")
|
||
(goto-char (point-min)))
|
||
|
||
;;* Adding a bibtex entry from a crossref query
|
||
|
||
;; The idea here is to perform a query on Crossref, get a completion buffer of
|
||
;; candidates, and select the entry(ies) you want to add to your bibtex file.
|
||
;; You can select a region, e.g. a free form citation, or set of words, or you
|
||
;; can type the query in by hand.
|
||
|
||
;;;###autoload
|
||
(defun doi-utils-add-entry-from-crossref-query (query bibtex-file)
|
||
"Search Crossref with QUERY and use completion to select an entry to add to BIBTEX-FILE."
|
||
(interactive (list
|
||
(read-string
|
||
"Query: "
|
||
;; now set initial input
|
||
(cond
|
||
;; If region is active assume we want it
|
||
((region-active-p)
|
||
(replace-regexp-in-string
|
||
"\n" " "
|
||
(buffer-substring (region-beginning) (region-end))))
|
||
;; type or paste it in
|
||
(t
|
||
nil)))
|
||
(completing-read
|
||
"Bibfile: "
|
||
(append (org-ref--directory-files "." (lambda (f) (org-ref--file-ext-p f "bib")))
|
||
bibtex-completion-bibliography))))
|
||
(let* ((json-data (with-temp-buffer
|
||
(url-insert
|
||
(url-retrieve-synchronously
|
||
(concat
|
||
"https://api.crossref.org/works?query="
|
||
(url-hexify-string query))))
|
||
|
||
(json-read-from-string (buffer-string))))
|
||
(name (format "Crossref hits for %s"
|
||
;; remove carriage returns. They can make completion confusing.
|
||
(replace-regexp-in-string "\n" " " query)))
|
||
(candidates (let-alist json-data
|
||
(cl-loop for item across .message.items
|
||
collect (let-alist item
|
||
(cons (format "%s, %s, %s, %s."
|
||
(string-join .title " ")
|
||
(string-join
|
||
(cl-loop for author across .author collect
|
||
(let-alist author
|
||
(format "%s %s"
|
||
.given .family)))
|
||
", ")
|
||
.publisher
|
||
.created.date-parts)
|
||
.DOI)))))
|
||
(doi (cdr (assoc (completing-read "Choice: " candidates) candidates))))
|
||
|
||
(with-current-buffer (find-file-noselect bibtex-file)
|
||
(doi-utils-add-bibtex-entry-from-doi
|
||
(replace-regexp-in-string
|
||
"^https?://\\(dx.\\)?doi.org/" "" doi)
|
||
bibtex-file)
|
||
(save-buffer))))
|
||
|
||
|
||
(defalias 'crossref-add-bibtex-entry 'doi-utils-add-entry-from-crossref-query
|
||
"Alias function for convenience.")
|
||
|
||
;; * Convenience
|
||
|
||
(defun doi-utils-toggle-pdf-download ()
|
||
"Toggle the setting of `doi-utils-download-pdf'.
|
||
I find this useful when downloading the pdfs slows down adding a
|
||
lot of references; then you just toggle it off."
|
||
(interactive)
|
||
(message "Setting doi-utils-download-pdf to %s"
|
||
(setq doi-utils-download-pdf (not doi-utils-download-pdf))))
|
||
|
||
;;* The end
|
||
(provide 'doi-utils)
|
||
;;; doi-utils.el ends here
|