Files
emacs/lisp/org-ref/openalex.el
2023-11-04 19:26:41 +01:00

460 lines
14 KiB
EmacsLisp

;;; openalex.el --- Org-ref interface to OpenAlex
;;; Commentary:
;; This is an elisp interface to OpenAlex (https://docs.openalex.org/) for org-ref.
;;
;; This provides functionality for the Work and Author API
;;
;; See
;; https://docs.openalex.org/how-to-use-the-api/rate-limits-and-authentication#the-polite-pool
;; for why we add email to the request.
(require 'dash)
(require 'request)
;;; Code:
(defun oa--response-parser ()
"Parse the response from json to elisp."
(let ((json-array-type 'list)
(json-object-type 'plist)
(json-key-type 'keyword)
(json-false nil)
(json-encoding-pretty-print nil))
(json-read)))
;; * Work object
(defun oa--work (entity-id &optional filter)
"Retrieve json data for a Work object for ENTITY-ID.
ENTITY-ID is an OpenAlex ID, DOI, Pubmed id,etc.
ENTITY-ID may also be a list of ids with a filter.
If FILTER is non-nil it should be a string like \"filter=openalex:\"
https://docs.openalex.org/api-entities/works"
(let* ((url (concat "https://api.openalex.org/works"
(if filter
(concat "?" filter entity-id)
(concat "/" entity-id))
(if user-mail-address
(concat "?mailto=" user-mail-address)
"")))
(req (request url :sync t :parser 'oa--response-parser))
(data (request-response-data req)))
;; this is for convenience to inspect data in a browser.
(plist-put data :oa-url url)
data))
;; * Viewing works
;;
;; This section provides a replacer and helper function to format org-entries
;; from the results returned in OpenAlex.
(defun oa--replacer (query object)
"Replacer function for `s-format'.
QUERY is a string that is either a sexp for a function to
evaluate or a dot notation path to data in OBJECT. If QUERY is a
sexp, it is read and evaluated. Otherwise, the path is split, and
looked up sequentially in object.
OBJECT is a plist, usually from a Work request."
(if (s-starts-with? "(" query)
;; this is a function
(eval (read query))
;; just get data
(let ((fields (s-split "\\." query))
result)
(while fields
(setq object (plist-get object (intern-soft (concat ":" (pop fields))))))
(or (string-replace "\\n" "" (format "%s" object)) "Not found"))))
;; ** help functions for complex data
;;
;; Some things like authors need to be constructed, and cannot just be looked
;; up. In other cases, I want logic, e.g. if data is there provide something,
;; and if not return an empty string. These functions do that work.
(defun oa--authors (wrk)
"Return an author string for WRK.
The string is a comma-separated list of links to author pages in OpenAlex."
(s-join ", " (cl-loop for author in (plist-get wrk :authorships)
collect
(format "[[elisp:(oa-author \"%s\")][%s]]"
(plist-get
(plist-get author :author)
:id)
(plist-get
(plist-get author :author)
:display_name)))))
;; I want some links if they can be made so the buffer is interactive. It might
;; be nice to integrate M-, navigation.
(defun oa--elisp-get-bibtex (wrk)
"Return a elisp link to get a bibtex entry for WRK if there is a doi."
(if-let ((doi (plist-get wrk :doi)))
(format "[[elisp:(doi-add-bibtex-entry \"%s\")][Get bibtex entry]]" doi)
""))
(defun oa--elisp-get-oa-related (wrk)
"Return a elisp link to get related works for WRK."
(format "[[elisp:(progn (xref--push-markers) (oa--related-works \"%s\"))][Get related work (%s)]]"
(plist-get wrk :id)
(length (plist-get wrk :related_works))))
(defun oa--elisp-get-oa-refs (wrk)
"Return a elisp link to get references for WRK."
(format "[[elisp:(progn (xref--push-markers) (oa--referenced-works \"%s\"))][Get references (%s)]]"
(plist-get wrk :id)
(length (plist-get wrk :referenced_works))))
(defun oa--elisp-get-oa-cited-by (wrk)
"Return a elisp link to get works that cite WRK."
(format "[[elisp:(progn (xref--push-markers) (oa--cited-by-works \"%s\"))][Get cited by (%s)]]"
(plist-get wrk :id)
(plist-get wrk :cited_by_count)))
(defun oa--works-entries (works)
"Return a list of org-formatted entries in WORKS.
WORKS is a list of results from OpenAlex."
(cl-loop for wrk in (plist-get works :results)
collect
(s-format "** ${title}
:PROPERTIES:
:HOST: ${host_venue.display_name}
:YEAR: ${publication_year}
:CITED_BY_COUNT: ${cited_by_count}
:AUTHOR: ${(oa--authors wrk)}
:DOI: ${doi}
:OPENALEX: ${id}
:END:
${(oa--elisp-get-bibtex wrk)}
- ${(oa--elisp-get-oa-refs wrk)}
- ${(oa--elisp-get-oa-related wrk)}
- ${(oa--elisp-get-oa-cited-by wrk)}
"
'oa--replacer wrk)))
(defun oa--works-buffer (bufname header entries)
"Create an org-buffer with BUFNAME representing the results in WORKS.
HEADER is the first thing in the buffer
WORKS is usually a list of results from OpenAlex.
Argument ENTRIES A list of strings for each org entry."
(let ((buf (get-buffer-create bufname)))
(with-current-buffer buf
(erase-buffer)
(insert header)
(insert "#+COLUMNS: %25ITEM %YEAR %CITED_BY_COUNT
elisp:org-columns elisp:org-columns-quit
#+caption: Sort
| year | [[elisp:(oa-buffer-sort-year t)][old first]] | [[elisp:(oa-buffer-sort-year)][new first]] |
| cited by | [[elisp:(oa-buffer-sort-cited-by-count t)][low first]] | [[elisp:(oa-buffer-sort-cited-by-count)][high first]] |
")
(insert (s-join "\n" entries))
(org-mode)
(goto-char (point-min))
(org-next-visible-heading 1))
;; (display-buffer-in-side-window buf '((side . right)))
(pop-to-buffer buf)))
;; There is something funny about pages here, maybe 25 results per page?
;; https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging I
;; am not sure how to do pages in this approach, so I am just getting these 25
;; at a time.
(defun oa--related-works (entity-id)
"Show the Related works buffer for ENTITY-ID."
(let* ((wrk (oa--work entity-id))
(related-work (plist-get wrk :related_works))
split
entries)
(while related-work
(setq split (-split-at 25 related-work)
related-work (nth 1 split))
;; split is what we process now
(setq entries (append entries
(oa--works-entries
(oa--work (s-join "|" (nth 0 split))
"filter=openalex:")))))
(oa--works-buffer
"*OpenAlex - Related works*"
(format "* OpenAlex - Related works for %s ([[%s][json]])
%s\n\n"
entity-id
(plist-get wrk :oa-url)
(s-format ":PROPERTIES:
:TITLE: ${title}
:HOST: ${host_venue.display_name}
:AUTHOR: ${(oa--authors wrk)}
:DOI: ${doi}
:YEAR: ${publication_year}
:OPENALEX: ${id}
:END:" 'oa--replacer wrk))
entries)))
(defun oa--referenced-works (entity-id)
"Show the Referenced work for ENTITY-ID."
(let* ((wrk (oa--work entity-id))
(referenced-work (plist-get wrk :referenced_works))
split
(entries '()))
(while referenced-work
(setq split (-split-at 25 referenced-work)
referenced-work (nth 1 split))
;; split is what we process now
(setq entries (append entries
(oa--works-entries
(oa--work (s-join "|" (nth 0 split))
"filter=openalex:")))))
(oa--works-buffer
"*OpenAlex - References*"
(format "* OpenAlex - References from %s ([[%s][json]])
%s\n\n"
entity-id
(plist-get wrk :oa-url)
(s-format ":PROPERTIES:
:TITLE: ${title}
:HOST: ${host_venue.display_name}
:AUTHOR: ${(oa--authors wrk)}
:DOI: ${doi}
:YEAR: ${publication_year}
:OPENALEX: ${id}
:END:" 'oa--replacer wrk))
entries)))
;; This function is different than the previous two. First we follow a URL
;; provided by the data, and second, here we do follow pages.
(defun oa--cited-by-works (entity-id)
"Show the Cited by buffer for ENTITY-ID."
(let* ((wrk (oa--work entity-id))
(url (plist-get wrk :cited_by_api_url))
(cited-by-works (request-response-data
(request url
:sync t
:parser 'oa--response-parser)))
(count (plist-get (plist-get cited-by-works :meta) :count))
(per-page (plist-get (plist-get cited-by-works :meta) :per_page))
(entries '())
(page 2))
;; get first page
(setq entries (oa--works-entries cited-by-works))
(while (> count (* per-page (- page 1)))
(setq cited-by-works (request-response-data
(request (format "%s&page=%s" url page)
:sync t
:parser 'oa--response-parser)))
(setq entries (append entries (oa--works-entries cited-by-works)))
(cl-incf page))
(oa--works-buffer
"*OpenAlex - Cited by*"
(format "* OpenAlex - %s Cited by ([[%s][json]])
%s"
entity-id
url
(s-format ":PROPERTIES:
:TITLE: ${title}
:HOST: ${host_venue.display_name}
:AUTHOR: ${(oa--authors wrk)}
:DOI: ${doi}
:YEAR: ${publication_year}
:OPENALEX: ${id}
:END:\n\n" 'oa--replacer wrk))
entries)))
;; ** buffer utilities for sorting entries
(defun oa-buffer-sort-year (&optional ascending)
"Sort org headings by year in descending order (new to old).
With prefix arg ASCENDING, sort in ascending order (old to new)"
(interactive "P")
(if ascending
(org-sort-entries nil ?f
(lambda () (string-to-number (or (org-entry-get (point) "YEAR") "0")))
(lambda (y1 y2)
(< y1 y2)))
(org-sort-entries nil ?f
(lambda () (string-to-number (or (org-entry-get (point) "YEAR") "0")))
(lambda (y1 y2)
(> y1 y2)))))
(defun oa-buffer-sort-cited-by-count (&optional ascending)
"Sort orgheadings by cited by count in descending order high to low.
With prefix arg ASCENDING sort from low to high."
(interactive "P")
(if ascending
(org-sort-entries nil ?f
(lambda ()
(string-to-number
(or (org-entry-get (point) "CITED_BY_COUNT")
"0")))
#'<)
(org-sort-entries nil ?f
(lambda ()
(string-to-number
(or
(org-entry-get (point) "CITED_BY_COUNT")
"0")))
#'>)))
;; * Interactive versions for org-ref citations
(defun oa-related-works ()
"Open the side window for Related works on cite at point."
(interactive)
(oa--related-works (concat "doi:" (org-ref-get-doi-at-point))))
(defun oa-referenced-works ()
"Open the side window for References from the cite at point."
(interactive)
(oa--referenced-works (concat "doi:" (org-ref-get-doi-at-point))))
(defun oa-cited-by-works ()
"Open the side window for Citing works for the cite at point."
(interactive)
(oa--cited-by-works (concat "doi:" (org-ref-get-doi-at-point))))
(defhydra+ org-ref-citation-hydra () ("ar" oa-related-works "Related documents" :column "OpenAlex"))
(defhydra+ org-ref-citation-hydra () ("ac" oa-cited-by-works "Cited by documents" :column "OpenAlex"))
(defhydra+ org-ref-citation-hydra () ("af" oa-referenced-works "References from" :column "OpenAlex"))
;; * utilities
(defun oa-kill-buffers ()
"Kill OpenAlex buffers."
(interactive)
(cl-loop for buf in (buffer-list)
do
(when (s-starts-with? "*OpenAlex" (buffer-name buf))
(kill-buffer buf))))
;; * Author object
(defun oa--author (entity-id &optional filter)
"Get an Author object for entity-id"
(let* ((url (concat "https://api.openalex.org/authors"
(if filter
(concat "?" filter entity-id)
(concat "/" entity-id))
(if user-mail-address
(concat "?mailto=" user-mail-address)
"")))
(req (request url :sync t :parser 'oa--response-parser))
(data (request-response-data req)))
;; this is for convenience to inspect data in a browser.
(plist-put data :oa-url url)
data))
(defun oa-author-entries (works-data url)
"Get entries from WORKS-DATA."
(let* ((meta (plist-get works-data :meta))
(per-page (plist-get meta :per_page))
(count (plist-get meta :count))
(pages (/ count per-page))
(entries '())
purl)
;; if there is a remainder we need to get the rest
(when (> (mod count per-page) 0) (cl-incf pages))
;; Now we have to loop through the pages
(cl-loop for i from 1 to pages
do
(setq purl (concat url (format "&page=%s" i))
works-data (request-response-data
(request purl
:sync t
:parser 'oa--response-parser))
entries (append entries
(cl-loop for result in (plist-get works-data :results)
collect
(s-format "** ${title}
:PROPERTIES:
:ID: ${id}
:DOI: ${ids.doi}
:YEAR: ${publication_year}
:HOST_VENUE: ${host_venue.display_name}
:AUTHORS: ${(oa--authors result)}
:CITED_BY_COUNT: ${cited_by_count}
:END:
${(oa--elisp-get-bibtex result)}
- ${(oa--elisp-get-oa-refs result)}
- ${(oa--elisp-get-oa-related result)}
- ${(oa--elisp-get-oa-cited-by result)}
" 'oa--replacer result)))))
entries))
(defun oa-author (entity-id)
"View Author for ENTITY-ID in an org-buffer."
(let* ((buf (get-buffer-create "*OpenAlex - Author*"))
(data (oa--author entity-id))
(works-count (plist-get data :works_count))
(works-url (plist-get data :works_api_url))
(works-data (request-response-data
(request works-url
:sync t
:parser 'oa--response-parser))))
(with-current-buffer buf
(erase-buffer)
(insert (s-format "* ${display_name} ([[${oa-url}][json]])
:PROPERTIES:
:ORCID: ${orcid}
:SCOPUS: ${ids.scopus}
:WORKS_COUNT: ${works_count}
:CITED_BY_COUNT: ${cited_by_count}
:INSTITUTION: ${last_known_institution.display_name}, ${last_known_institution.country_code}
:END:
#+COLUMNS: %25ITEM %YEAR %CITED_BY_COUNT
elisp:org-columns elisp:org-columns-quit
#+caption: Sort
| year | [[elisp:(oa-buffer-sort-year t)][old first]] | [[elisp:(oa-buffer-sort-year)][new first]] |
| cited by | [[elisp:(oa-buffer-sort-cited-by-count t)][low first]] | [[elisp:(oa-buffer-sort-cited-by-count)][high first]] |
"
'oa--replacer data))
(insert (s-join "\n" (oa-author-entries works-data works-url)))
(org-mode)
(goto-char (point-min))
(org-next-visible-heading 1))
(pop-to-buffer buf)))
(provide 'openalex)
;;; openalex.el ends here