1177 lines
36 KiB
EmacsLisp
1177 lines
36 KiB
EmacsLisp
;;; openalex.el --- Org-ref interface to OpenAlex -*- lexical-binding: t; -*-
|
|
|
|
;;; Commentary:
|
|
;; This is an elisp interface to OpenAlex (https://docs.openalex.org/) for org-ref.
|
|
;;
|
|
;; This provides functionality for the OpenAlex APIs.
|
|
;;
|
|
;; `oa-query' provides a general interface to all the endpoints with a filter.
|
|
;; It is not interactive though.
|
|
;;
|
|
;; `oa-author' provides an interactive way to search for an author and then see
|
|
;; a Google Scholar like org buffer with information about the author.
|
|
;;
|
|
;; `oa-fulltext-search' provides an interactive full text search of Works in
|
|
;; OpenAlex. You get an org-buffer of results, with links to subsequent pages of
|
|
;; results.
|
|
;;
|
|
;; `oa-coa' is an interactive command to generate the NSF COA form data for
|
|
;; coauthors.
|
|
;;
|
|
;; `oa-get-bibtex-entries' is an interactive command to download all bibtex
|
|
;; entries from headings in the current buffer with a DOI property.
|
|
;;
|
|
;; if you have an OpenAlex API you can set `oa-api-key' to use it. The
|
|
;; `user-mail-address' value will be added to the queries if it exists so you
|
|
;; will get the polite pool.
|
|
;;
|
|
;; This library extends the `org-ref-citation-menu' and adds keys to get to
|
|
;; cited by, references and related documents in OpenAlex.
|
|
|
|
(require 'request)
|
|
(require 'doi-utils)
|
|
(require 'org-ref-citation-links)
|
|
(require 'org-ref-utils)
|
|
|
|
(declare-function org-ref-possible-bibfiles "org-ref-core")
|
|
(declare-function org-ref-find-bibliography "org-ref-core")
|
|
(declare-function org-ref-get-bibtex-key-and-file "org-ref-core")
|
|
(declare-function bibtex-completion-show-entry "bibtex-completion")
|
|
(declare-function bibtex-completion-apa-format-reference "bibtex-completion")
|
|
(declare-function ivy-read "ivy")
|
|
(declare-function ivy-more-chars "ivy")
|
|
|
|
(defcustom oa-api-key
|
|
nil
|
|
"Your API key if you have one."
|
|
:group 'openalex
|
|
:type 'string)
|
|
|
|
;;; Code:
|
|
|
|
(defun oa--response-parser ()
|
|
"Parse the response from json to elisp."
|
|
(let ((json-array-type 'list)
|
|
(json-object-type 'plist)
|
|
(json-key-type 'keyword)
|
|
(json-false nil)
|
|
(json-encoding-pretty-print nil))
|
|
(json-read)))
|
|
|
|
|
|
(defun oa--params (&rest params)
|
|
"Build API request parameters, excluding nil values.
|
|
PARAMS should be an alist of (key . value) pairs.
|
|
The mailto and api_key parameters are added automatically."
|
|
(let ((result (when user-mail-address
|
|
`(("mailto" . ,user-mail-address)))))
|
|
;; Only add api_key if it's set and non-empty
|
|
(when (and oa-api-key (not (string-empty-p oa-api-key)))
|
|
(push `("api_key" . ,oa-api-key) result))
|
|
;; Add other params, filtering out nil values
|
|
(dolist (param params)
|
|
(when (cdr param)
|
|
(push param result)))
|
|
(nreverse result)))
|
|
|
|
|
|
(defun oa-get (data query &optional iterable)
|
|
"Get fields from DATA with QUERY.
|
|
QUERY is a dot notation string.
|
|
key1.key2.key3 represents a nested item.
|
|
key1.key2[].key3 represents key3 on all items in key1.key2.
|
|
|
|
Tested with up to two [] in the query.
|
|
|
|
Assumes data is in plist form."
|
|
(let* ((fields (and query (split-string query "\\.")))
|
|
(current-field (and fields (pop fields))))
|
|
|
|
(cond
|
|
;; return condition
|
|
((null query)
|
|
data)
|
|
|
|
;; query[] means get query then turn iteration on
|
|
((and (string-suffix-p "[]" current-field) (null iterable))
|
|
(setq current-field (substring current-field 0 -2))
|
|
(oa-get (plist-get data (intern-soft (concat ":" current-field)))
|
|
(when fields
|
|
(string-join fields "."))
|
|
t))
|
|
|
|
;; this means another level of iteration. You already have a collection. we
|
|
;; have to iterate over each one I think.
|
|
((and (string-suffix-p "[]" current-field) iterable)
|
|
(setq current-field (substring current-field 0 -2))
|
|
(cl-loop for item in data collect
|
|
(oa-get (plist-get item (intern-soft (concat ":" current-field)))
|
|
(when fields
|
|
(string-join fields "."))
|
|
t)))
|
|
|
|
;; single keyword, iterate over collection
|
|
(iterable
|
|
(oa-get (cl-loop for item in data collect
|
|
(plist-get item (intern-soft (concat ":" current-field))))
|
|
(when fields
|
|
(string-join fields "."))
|
|
t))
|
|
|
|
;; single keyword
|
|
(t
|
|
(oa-get (plist-get data (intern-soft (concat ":" current-field)))
|
|
(when fields
|
|
(string-join fields ".")))))))
|
|
|
|
|
|
;; * General query
|
|
(defun oa--query-all-data (endpoint &rest filter)
|
|
(let* ((page 1)
|
|
(url (concat "https://api.openalex.org/" endpoint))
|
|
(filter-string (string-join
|
|
(cl-loop for key in (plist-get-keys filter) collect
|
|
(concat (substring (symbol-name key) 1)
|
|
":"
|
|
(url-hexify-string
|
|
(plist-get filter key))))
|
|
","))
|
|
(params (oa--params `("filter" . ,filter-string)
|
|
`("page" . ,page)))
|
|
|
|
(req (request url :sync t :parser 'oa--response-parser :params params))
|
|
(data (request-response-data req))
|
|
(meta (plist-get data :meta))
|
|
(count (plist-get meta :count))
|
|
(per-page (plist-get meta :per_page))
|
|
(pages (ceiling (/ (float count) per-page)))
|
|
(results (plist-get data :results)))
|
|
(cl-loop for i from 2 to pages do
|
|
(setf (cdr (assoc "page" params)) i)
|
|
(setq req (request url :sync t :parser 'oa--response-parser :params params)
|
|
data (request-response-data req)
|
|
results (append results (plist-get data :results))))
|
|
results))
|
|
|
|
|
|
(defun oa-query (endpoint &rest filter)
|
|
"Run a query at ENDPOINT with FILTER.
|
|
ENDPOINT can be works, authors, sources, institutions, concepts,
|
|
publishers, or funders.
|
|
|
|
FILTER is a plist (:field value) where :field is a valid filter
|
|
field (see
|
|
https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/filter-entity-lists),
|
|
and value is what you want to filter on. The following logic is supported:
|
|
|
|
!value is negation
|
|
<value is less than
|
|
>value is greater than
|
|
value1+value2 is and within a field
|
|
value1|value2 is or within a field
|
|
|
|
Your email address will be added if `user-mail-address' is
|
|
non-nil, and `oa-api-key' if it is non-nil to the API url."
|
|
(let* ((page (if (plist-get filter :page)
|
|
(prog1
|
|
(string-to-number (plist-get filter :page))
|
|
(setq filter (org-plist-delete filter :page)))
|
|
1))
|
|
(base-url "https://api.openalex.org")
|
|
(url (concat base-url "/" endpoint "?filter="))
|
|
(filter-string (string-join
|
|
(cl-loop for key in (plist-get-keys filter) collect
|
|
(concat (substring (symbol-name key) 1)
|
|
":"
|
|
(url-hexify-string
|
|
(plist-get filter key))))
|
|
","))
|
|
|
|
(req (request url :sync t :parser 'oa--response-parser
|
|
:params (oa--params `("filter" . ,filter-string)
|
|
`("page" . ,page))))
|
|
(data (request-response-data req))
|
|
(meta (plist-get data :meta))
|
|
(count (plist-get meta :count))
|
|
(per-page (plist-get meta :per_page))
|
|
(pages (ceiling (/ (float count) per-page)))
|
|
(results (plist-get data :results))
|
|
(next-page (format "[[elisp:(oa-query \"%s\" %s :page \"%s\")][Next page: %s]]"
|
|
endpoint
|
|
(string-join (cl-loop for x in filter
|
|
collect
|
|
(if (keywordp x)
|
|
(format "%s" x)
|
|
(format "%S" x)))
|
|
" ")
|
|
(+ page 1)
|
|
(+ page 1)))
|
|
(buf (generate-new-buffer "*OpenAlex - Query*")))
|
|
|
|
(with-current-buffer buf
|
|
(erase-buffer)
|
|
(org-mode)
|
|
(insert (concat
|
|
(format "#+title: %s
|
|
** Results
|
|
:PROPERTIES:
|
|
:FILTER: %s
|
|
:COUNT: %s
|
|
:END:
|
|
|
|
%s
|
|
\n\n"
|
|
filter-string
|
|
filter-string
|
|
count
|
|
next-page)
|
|
(cond
|
|
((string= endpoint "works")
|
|
(string-join
|
|
(cl-loop for wrk in results collect
|
|
(org-ref--format-template "*** ${title}
|
|
:PROPERTIES:
|
|
:HOST: ${primary_location.source.display_name}
|
|
:YEAR: ${publication_year}
|
|
:CITED_BY_COUNT: ${cited_by_count}
|
|
:AUTHOR: ${authors}
|
|
:DOI: ${doi}
|
|
:OPENALEX: ${id}
|
|
:CREATED_DATE: ${created_date}
|
|
:END:
|
|
|
|
|
|
${get-bibtex}
|
|
|
|
- ${oa-refs}
|
|
- ${oa-related}
|
|
- ${oa-cited}
|
|
|
|
${abstract}
|
|
|
|
"
|
|
`(("title" . ,(oa--title wrk))
|
|
("primary_location.source.display_name" . ,(oa-get wrk "primary_location.source.display_name"))
|
|
("publication_year" . ,(oa-get wrk "publication_year"))
|
|
("cited_by_count" . ,(oa-get wrk "cited_by_count"))
|
|
("authors" . ,(oa--authors wrk))
|
|
("doi" . ,(oa-get wrk "doi"))
|
|
("id" . ,(oa-get wrk "id"))
|
|
("created_date" . ,(oa-get wrk "created_date"))
|
|
("get-bibtex" . ,(oa--elisp-get-bibtex wrk))
|
|
("oa-refs" . ,(oa--elisp-get-oa-refs wrk))
|
|
("oa-related" . ,(oa--elisp-get-oa-related wrk))
|
|
("oa-cited" . ,(oa--elisp-get-oa-cited-by wrk))
|
|
("abstract" . ,(oa--abstract wrk)))))
|
|
"\n"))
|
|
(t
|
|
(format "%s" results)
|
|
)))))
|
|
(pop-to-buffer buf)
|
|
(goto-char (point-min))))
|
|
|
|
|
|
;; * Work object
|
|
|
|
(defun oa--work (entity-id &optional filter)
|
|
"Retrieve json data for a Work object for ENTITY-ID.
|
|
ENTITY-ID is an OpenAlex ID, DOI, Pubmed id,etc.
|
|
|
|
ENTITY-ID may also be a list of ids with a filter.
|
|
|
|
If FILTER is non-nil it should be a string like \"filter=openalex:\"
|
|
|
|
https://docs.openalex.org/api-entities/works"
|
|
|
|
(let* ((url (concat "https://api.openalex.org/works"
|
|
;; This is hackier than I prefer, but sometimes entity-id
|
|
;; is nil, or starts with ? for a filter, and I couldn't
|
|
;; see a cleaner way to solve this. this function is used
|
|
;; in a lot of places.
|
|
(cond
|
|
((string-prefix-p "?" entity-id)
|
|
entity-id)
|
|
(t
|
|
(format "/%s" entity-id)))))
|
|
(req (request url :sync t :parser 'oa--response-parser
|
|
:params (oa--params `("filter" . ,filter))))
|
|
(data (request-response-data req)))
|
|
;; this is for convenience to inspect data in a browser, e.g. you can click
|
|
;; on the url in Emacs and it opens in a browser.
|
|
(plist-put data :oa-url url)
|
|
data))
|
|
|
|
|
|
;; * autocomplete works
|
|
|
|
(defun oa--works-candidates (query)
|
|
"Retrieve autocomplete works from OpenAlex."
|
|
(or
|
|
(ivy-more-chars)
|
|
(let* ((url "https://api.openalex.org/autocomplete/works")
|
|
(req (request url :sync t :parser 'oa--response-parser
|
|
:params (let ((params `(("q" . ,query))))
|
|
(when user-mail-address
|
|
(push `("mailto" . ,user-mail-address) params))
|
|
(nreverse params))))
|
|
(data (request-response-data req))
|
|
(results (plist-get data :results)))
|
|
(cl-loop for work in results collect
|
|
(propertize
|
|
(format "%s, %s"
|
|
(plist-get work :display_name)
|
|
(plist-get work :hint))
|
|
'oaid (plist-get work :id))))))
|
|
|
|
|
|
(defun oa-works ()
|
|
"Autocomplete works.
|
|
This doesn't seem as useful as it could be."
|
|
(interactive)
|
|
|
|
(ivy-read "Work: " #'oa--works-candidates
|
|
:dynamic-collection t
|
|
:action
|
|
'(1
|
|
("o" (lambda (candidate)
|
|
(browse-url (get-text-property 0 'oaid candidate)))
|
|
"Open in browser"))))
|
|
|
|
;; * Viewing works
|
|
|
|
|
|
;; ** help functions for complex data
|
|
;;
|
|
;; Some things like authors need to be constructed, and cannot just be looked
|
|
;; up. In other cases, I want logic, e.g. if data is there provide something,
|
|
;; and if not return an empty string. These functions do that work.
|
|
(defun oa--authors (wrk)
|
|
"Return an author string for WRK.
|
|
The string is a comma-separated list of links to author pages in OpenAlex."
|
|
(string-join (cl-loop for author in (plist-get wrk :authorships)
|
|
collect
|
|
(format "[[elisp:(oa--author-org \"%s\")][%s]]"
|
|
(plist-get
|
|
(plist-get author :author)
|
|
:id)
|
|
(plist-get
|
|
(plist-get author :author)
|
|
:display_name)))
|
|
", "))
|
|
|
|
|
|
(defun oa--title (wrk)
|
|
"Return a title from WRK with linebreaks removed."
|
|
(string-replace "\n" " " (plist-get wrk :title)))
|
|
|
|
|
|
;; I want some links if they can be made so the buffer is interactive. It might
|
|
;; be nice to integrate M-, navigation.
|
|
(defun oa--elisp-get-bibtex (wrk)
|
|
"Return a elisp link to get a bibtex entry for WRK if there is a doi."
|
|
(if-let* ((doi (plist-get wrk :doi)))
|
|
(format "[[elisp:(doi-add-bibtex-entry \"%s\")][Get bibtex entry]]" doi)
|
|
""))
|
|
|
|
|
|
(defun oa--elisp-get-oa-related (wrk)
|
|
"Return a elisp link to get related works for WRK."
|
|
(format "[[elisp:(progn (xref--push-markers (current-buffer) (point)) (oa--related-works \"%s\"))][Get related work (%s)]]"
|
|
(plist-get wrk :id)
|
|
(length (plist-get wrk :related_works))))
|
|
|
|
|
|
(defun oa--elisp-get-oa-refs (wrk)
|
|
"Return a elisp link to get references for WRK."
|
|
(format "[[elisp:(progn (xref--push-markers (current-buffer) (point)) (oa--referenced-works \"%s\"))][Get references (%s)]]"
|
|
(plist-get wrk :id)
|
|
(length (plist-get wrk :referenced_works))))
|
|
|
|
|
|
(defun oa--elisp-get-oa-cited-by (wrk)
|
|
"Return a elisp link to get works that cite WRK."
|
|
(format "[[elisp:(progn (xref--push-markers (current-buffer) (point)) (oa--cited-by-works \"%s\"))][Get cited by (%s)]]"
|
|
(plist-get wrk :id)
|
|
(plist-get wrk :cited_by_count)))
|
|
|
|
|
|
(defun oa--works-entries (works)
|
|
"Return a list of org-formatted entries in WORKS.
|
|
WORKS is a list of results from OpenAlex."
|
|
(cl-loop for wrk in (plist-get works :results)
|
|
collect
|
|
(org-ref--format-template "** ${title}
|
|
:PROPERTIES:
|
|
:HOST: ${primary_location.source.display_name}
|
|
:YEAR: ${publication_year}
|
|
:CITED_BY_COUNT: ${cited_by_count}
|
|
:AUTHOR: ${authors}
|
|
:DOI: ${doi}
|
|
:OPENALEX: ${id}
|
|
:END:
|
|
|
|
|
|
${get-bibtex}
|
|
|
|
- ${oa-refs}
|
|
- ${oa-related}
|
|
- ${oa-cited}
|
|
|
|
"
|
|
`(("title" . ,(oa-get wrk "title"))
|
|
("primary_location.source.display_name" . ,(oa-get wrk "primary_location.source.display_name"))
|
|
("publication_year" . ,(oa-get wrk "publication_year"))
|
|
("cited_by_count" . ,(oa-get wrk "cited_by_count"))
|
|
("authors" . ,(oa--authors wrk))
|
|
("doi" . ,(oa-get wrk "doi"))
|
|
("id" . ,(oa-get wrk "id"))
|
|
("get-bibtex" . ,(oa--elisp-get-bibtex wrk))
|
|
("oa-refs" . ,(oa--elisp-get-oa-refs wrk))
|
|
("oa-related" . ,(oa--elisp-get-oa-related wrk))
|
|
("oa-cited" . ,(oa--elisp-get-oa-cited-by wrk))))))
|
|
|
|
|
|
(defun oa--works-buffer (bufname header entries)
|
|
"Create an org-buffer with BUFNAME representing the results in WORKS.
|
|
HEADER is the first thing in the buffer
|
|
WORKS is usually a list of results from OpenAlex.
|
|
Argument ENTRIES A list of strings for each org entry."
|
|
(let ((buf (get-buffer-create bufname)))
|
|
|
|
(with-current-buffer buf
|
|
(erase-buffer)
|
|
(insert header)
|
|
(insert "#+COLUMNS: %25ITEM %YEAR %CITED_BY_COUNT
|
|
elisp:org-columns elisp:org-columns-quit
|
|
|
|
|
|
#+caption: Sort
|
|
| year | [[elisp:(oa-buffer-sort-year t)][old first]] | [[elisp:(oa-buffer-sort-year)][new first]] |
|
|
| cited by | [[elisp:(oa-buffer-sort-cited-by-count t)][low first]] | [[elisp:(oa-buffer-sort-cited-by-count)][high first]] |
|
|
|
|
")
|
|
(insert (string-join entries "\n"))
|
|
(org-mode)
|
|
(goto-char (point-min))
|
|
(org-next-visible-heading 1))
|
|
;; (display-buffer-in-side-window buf '((side . right)))
|
|
(pop-to-buffer buf)))
|
|
|
|
|
|
(defun oa--related-works (entity-id)
|
|
"Show the Related works buffer for ENTITY-ID."
|
|
(let* ((wrk (oa--work entity-id))
|
|
(related-work (plist-get wrk :related_works))
|
|
split
|
|
entries)
|
|
(while related-work
|
|
(setq split (org-ref--split-at 25 related-work)
|
|
related-work (nth 1 split))
|
|
|
|
;; split is what we process now
|
|
(setq entries (append entries
|
|
(oa--works-entries
|
|
(oa--work (format "?filter=openalex:%s" (string-join (nth 0 split) "|")))))))
|
|
|
|
(oa--works-buffer
|
|
"*OpenAlex - Related works*"
|
|
(format "* OpenAlex - Related works for %s ([[%s][json]])
|
|
%s\n\n"
|
|
entity-id
|
|
(plist-get wrk :oa-url)
|
|
(org-ref--format-template ":PROPERTIES:
|
|
:TITLE: ${title}
|
|
:HOST: ${primary_location.source.display_name}
|
|
:AUTHOR: ${authors}
|
|
:DOI: ${doi}
|
|
:YEAR: ${publication_year}
|
|
:OPENALEX: ${id}
|
|
:END:
|
|
|
|
Found ${nentries} results.
|
|
"
|
|
`(("title" . ,(oa-get wrk "title"))
|
|
("primary_location.source.display_name" . ,(oa-get wrk "primary_location.source.display_name"))
|
|
("authors" . ,(oa--authors wrk))
|
|
("doi" . ,(oa-get wrk "doi"))
|
|
("publication_year" . ,(oa-get wrk "publication_year"))
|
|
("id" . ,(oa-get wrk "id"))
|
|
("nentries" . ,(length entries)))))
|
|
entries)))
|
|
|
|
|
|
(defun oa--referenced-works (entity-id)
|
|
"Show the Referenced work for ENTITY-ID."
|
|
(let* ((wrk (oa--work entity-id))
|
|
(referenced-work (plist-get wrk :referenced_works))
|
|
split
|
|
(entries '()))
|
|
(while referenced-work
|
|
(setq split (org-ref--split-at 25 referenced-work)
|
|
referenced-work (nth 1 split))
|
|
;; split is what we process now
|
|
(setq entries (append entries
|
|
(oa--works-entries
|
|
(oa--work (format "?filter=openalex:%s"
|
|
(string-join (nth 0 split) "|")))))))
|
|
(oa--works-buffer
|
|
"*OpenAlex - References*"
|
|
(format "* OpenAlex - References from %s ([[%s][json]])
|
|
%s\n\n"
|
|
entity-id
|
|
(plist-get wrk :oa-url)
|
|
(org-ref--format-template ":PROPERTIES:
|
|
:TITLE: ${title}
|
|
:HOST: ${primary_location.source.display_name}
|
|
:AUTHOR: ${authors}
|
|
:DOI: ${doi}
|
|
:YEAR: ${publication_year}
|
|
:OPENALEX: ${id}
|
|
:END:
|
|
|
|
Found ${nentries} results.
|
|
"
|
|
`(("title" . ,(oa-get wrk "title"))
|
|
("primary_location.source.display_name" . ,(oa-get wrk "primary_location.source.display_name"))
|
|
("authors" . ,(oa--authors wrk))
|
|
("doi" . ,(oa-get wrk "doi"))
|
|
("publication_year" . ,(oa-get wrk "publication_year"))
|
|
("id" . ,(oa-get wrk "id"))
|
|
("nentries" . ,(length entries)))))
|
|
entries)))
|
|
|
|
|
|
;; This function is different than the previous two. First we follow a URL
|
|
;; provided by the data, and second, here we do follow pages.
|
|
(defun oa--cited-by-works (entity-id)
|
|
"Show the Cited by buffer for ENTITY-ID."
|
|
(let* ((wrk (oa--work entity-id))
|
|
(url (plist-get wrk :cited_by_api_url))
|
|
(cited-by-works (request-response-data
|
|
(request url
|
|
:sync t
|
|
:parser 'oa--response-parser
|
|
:params (oa--params))))
|
|
(count (plist-get (plist-get cited-by-works :meta) :count))
|
|
(per-page (plist-get (plist-get cited-by-works :meta) :per_page))
|
|
(entries '())
|
|
(page 2))
|
|
;; get first page
|
|
(setq entries (oa--works-entries cited-by-works))
|
|
(while (> count (* per-page (- page 1)))
|
|
(setq cited-by-works (request-response-data
|
|
(request url
|
|
:sync t
|
|
:parser 'oa--response-parser
|
|
:params (oa--params `("page" . ,page)))))
|
|
(setq entries (append entries (oa--works-entries cited-by-works)))
|
|
(cl-incf page))
|
|
|
|
(oa--works-buffer
|
|
"*OpenAlex - Cited by*"
|
|
(format "* OpenAlex - %s Cited by ([[%s][json]])
|
|
%s"
|
|
entity-id
|
|
url
|
|
(org-ref--format-template ":PROPERTIES:
|
|
:TITLE: ${title}
|
|
:HOST: ${primary_location.source.display_name}
|
|
:AUTHOR: ${authors}
|
|
:DOI: ${doi}
|
|
:YEAR: ${publication_year}
|
|
:OPENALEX: ${id}
|
|
:END:
|
|
|
|
Found ${nentries} results.
|
|
|
|
"
|
|
`(("title" . ,(oa-get wrk "title"))
|
|
("primary_location.source.display_name" . ,(oa-get wrk "primary_location.source.display_name"))
|
|
("authors" . ,(oa--authors wrk))
|
|
("doi" . ,(oa-get wrk "doi"))
|
|
("publication_year" . ,(oa-get wrk "publication_year"))
|
|
("id" . ,(oa-get wrk "id"))
|
|
("nentries" . ,(length entries)))))
|
|
entries)))
|
|
|
|
|
|
;; ** buffer utilities for sorting entries
|
|
|
|
(defun oa-buffer-sort-year (&optional ascending)
|
|
"Sort org headings by year in descending order (new to old).
|
|
With prefix arg ASCENDING, sort in ascending order (old to new)"
|
|
(interactive "P")
|
|
(if ascending
|
|
(org-sort-entries nil ?f
|
|
(lambda () (string-to-number (or (org-entry-get (point) "YEAR") "0")))
|
|
(lambda (y1 y2)
|
|
(< y1 y2)))
|
|
(org-sort-entries nil ?f
|
|
(lambda () (string-to-number (or (org-entry-get (point) "YEAR") "0")))
|
|
(lambda (y1 y2)
|
|
(> y1 y2))))
|
|
(org-fold-show-all))
|
|
|
|
|
|
(defun oa-buffer-sort-cited-by-count (&optional ascending)
|
|
"Sort org headings by cited by count in descending order high to low.
|
|
With prefix arg ASCENDING sort from low to high."
|
|
(interactive "P")
|
|
(if ascending
|
|
(org-sort-entries nil ?f
|
|
(lambda ()
|
|
(string-to-number
|
|
(or (org-entry-get (point) "CITED_BY_COUNT")
|
|
"0")))
|
|
#'<)
|
|
(org-sort-entries nil ?f
|
|
(lambda ()
|
|
(string-to-number
|
|
(or
|
|
(org-entry-get (point) "CITED_BY_COUNT")
|
|
"0")))
|
|
#'>))
|
|
(org-fold-show-all))
|
|
|
|
|
|
;; * Interactive versions for org-ref citations
|
|
|
|
(defun oa-related-works ()
|
|
"Open the side window for Related works on cite at point."
|
|
(interactive)
|
|
(oa--related-works (concat "https://doi.org/" (org-ref-get-doi-at-point))))
|
|
|
|
|
|
(defun oa-referenced-works ()
|
|
"Open the side window for References from the cite at point."
|
|
(interactive)
|
|
(oa--referenced-works (concat "doi:" (org-ref-get-doi-at-point))))
|
|
|
|
|
|
(defun oa-cited-by-works ()
|
|
"Open the side window for Citing works for the cite at point."
|
|
(interactive)
|
|
(oa--cited-by-works (concat "doi:" (org-ref-get-doi-at-point))))
|
|
|
|
|
|
(defun oa-open ()
|
|
"Open the cite at point in OpenAlex."
|
|
(interactive)
|
|
(let* ((url (concat
|
|
"https://api.openalex.org/works/https://doi.org/"
|
|
(org-ref-get-doi-at-point)))
|
|
(req (request url :sync t :parser 'oa--response-parser))
|
|
(data (request-response-data req)))
|
|
(browse-url (plist-get data :id))))
|
|
|
|
|
|
(with-eval-after-load 'org-ref-citation-links
|
|
(transient-append-suffix 'org-ref-citation-menu "u"
|
|
'("xa" "Open in OpenAlex" oa-open))
|
|
(transient-append-suffix 'org-ref-citation-menu "xa"
|
|
'("xr" "Related documents" oa-related-works))
|
|
(transient-append-suffix 'org-ref-citation-menu "xr"
|
|
'("xc" "Cited by documents" oa-cited-by-works))
|
|
(transient-append-suffix 'org-ref-citation-menu "xc"
|
|
'("xf" "References from" oa-referenced-works)))
|
|
|
|
|
|
;; * Author object
|
|
|
|
(defun oa--author (entity-id &optional filter)
|
|
"Get an Author object for ENTITY-ID.
|
|
FILTER is an optional string to add to the URL."
|
|
(let* ((url (concat "https://api.openalex.org/authors"
|
|
entity-id))
|
|
(req (request url :sync t :parser 'oa--response-parser
|
|
:params (oa--params `("filter" . ,filter))))
|
|
(data (request-response-data req)))
|
|
;; this is for convenience to inspect data in a browser.
|
|
(plist-put data :oa-url url)
|
|
data))
|
|
|
|
|
|
(defun oa--author-entries (works-data url)
|
|
"Get entries from WORKS-DATA."
|
|
(let* ((meta (plist-get works-data :meta))
|
|
(per-page (plist-get meta :per_page))
|
|
(count (plist-get meta :count))
|
|
(pages (/ count per-page))
|
|
(entries '()))
|
|
;; if there is a remainder we need to get the rest
|
|
(when (> (mod count per-page) 0) (cl-incf pages))
|
|
|
|
;; Now we have to loop through the pages
|
|
(cl-loop for i from 1 to pages
|
|
do
|
|
(setq works-data (request-response-data
|
|
(request url
|
|
:sync t
|
|
:parser 'oa--response-parser
|
|
:params (oa--params `("page" . ,i))))
|
|
entries (append entries
|
|
(cl-loop for result in (plist-get works-data :results)
|
|
collect
|
|
(org-ref--format-template "*** ${title}
|
|
:PROPERTIES:
|
|
:ID: ${id}
|
|
:DOI: ${ids.doi}
|
|
:PDF: ${primary_location.pdf_url}
|
|
:LANDING_PAGE: ${primary_location.landing_page_url}
|
|
:YEAR: ${publication_year}
|
|
:HOST_VENUE: ${primary_location.source.display_name}
|
|
:AUTHORS: ${authors}
|
|
:CITED_BY_COUNT: ${cited_by_count}
|
|
:END:
|
|
|
|
${get-bibtex}
|
|
|
|
- ${oa-refs}
|
|
- ${oa-related}
|
|
- ${oa-cited}
|
|
|
|
${abstract}
|
|
|
|
" `(("title" . ,(oa--title result))
|
|
("id" . ,(oa-get result "id"))
|
|
("ids.doi" . ,(oa-get result "ids.doi"))
|
|
("publication_year" . ,(oa-get result "publication_year"))
|
|
("primary_location.pdf_url" . ,(oa-get result "primary_location.pdf_url"))
|
|
("primary_location.landing_page_url" . ,(oa-get result "primary_location.landing_page_url"))
|
|
("primary_location.source.display_name" . ,(oa-get result "primary_location.source.display_name"))
|
|
("authors" . ,(oa--authors result))
|
|
("cited_by_count" . ,(oa-get result "cited_by_count"))
|
|
("get-bibtex" . ,(oa--elisp-get-bibtex result))
|
|
("oa-refs" . ,(oa--elisp-get-oa-refs result))
|
|
("oa-related" . ,(oa--elisp-get-oa-related result))
|
|
("oa-cited" . ,(oa--elisp-get-oa-cited-by result))
|
|
("abstract" . ,(oa--abstract result))))))))
|
|
entries))
|
|
|
|
|
|
(defun oa--abstract (wrk)
|
|
"Construct an abstract from a WRK."
|
|
(let* ((aii (plist-get wrk :abstract_inverted_index))
|
|
(word_index '())
|
|
sorted)
|
|
|
|
(cl-loop for (k v) on aii by 'cddr
|
|
do
|
|
(cl-loop for index in v
|
|
do
|
|
(push (list k index) word_index)))
|
|
|
|
(setq sorted (sort
|
|
word_index
|
|
(lambda (a b)
|
|
(<
|
|
(nth 1 a)
|
|
(nth 1 b)))))
|
|
|
|
(string-join (mapcar
|
|
(lambda (x)
|
|
(substring
|
|
(symbol-name (car x))
|
|
1))
|
|
sorted)
|
|
" ")))
|
|
|
|
|
|
(defun oa--author-candidates (query)
|
|
"Retrieve autocomplete authors from OpenAlex."
|
|
(or
|
|
(ivy-more-chars)
|
|
(let* ((url "https://api.openalex.org/autocomplete/authors")
|
|
(req (request url :sync t :parser 'oa--response-parser
|
|
:params (let ((params `(("q" . ,query))))
|
|
(when user-mail-address
|
|
(push `("mailto" . ,user-mail-address) params))
|
|
(nreverse params))))
|
|
(data (request-response-data req))
|
|
(results (plist-get data :results)))
|
|
(cl-loop for author in results collect
|
|
(propertize
|
|
(format "%s - %s"
|
|
(plist-get author :display_name)
|
|
(plist-get
|
|
author :hint))
|
|
'oaid (plist-get author :id))))))
|
|
|
|
|
|
(defun oa--format-institution (data)
|
|
"Format institution from DATA, handling nil values gracefully.
|
|
Extracts the first institution from last_known_institutions array."
|
|
(let* ((institutions (plist-get data :last_known_institutions))
|
|
(first-inst (and institutions (listp institutions) (car institutions)))
|
|
(name (and first-inst (plist-get first-inst :display_name)))
|
|
(country (and first-inst (plist-get first-inst :country_code))))
|
|
(cond
|
|
((and name country) (format "%s, %s" name country))
|
|
(name name)
|
|
(country country)
|
|
(t ""))))
|
|
|
|
|
|
(defun oa--counts-by-year (data)
|
|
"Get citation counts by year and make a graph.
|
|
DATA is an author from OpenAlex.
|
|
Requires gnuplot. Generates a temporary file."
|
|
(if (executable-find "gnuplot")
|
|
(let* ((pngfile (make-temp-file "oa-" nil ".png"))
|
|
(counts (sort
|
|
(cl-loop for i from 1 for item in (plist-get data :counts_by_year)
|
|
collect
|
|
(list
|
|
(plist-get item :year)
|
|
(plist-get item :cited_by_count)
|
|
(plist-get item :works_count)))
|
|
(lambda (a b)
|
|
(< (nth 0 a)
|
|
(nth 0 b)))))
|
|
(count-string (cl-loop for i from 1 for (year cites works) in counts
|
|
concat
|
|
(format "%s \"%s\" %s %s\n"
|
|
i year cites works)))
|
|
(gnuplot (format "set terminal \"png\" size 800,400
|
|
set output \"%s\"
|
|
$counts << EOD
|
|
%s
|
|
EOD
|
|
set boxwidth 0.5
|
|
set style fill solid noborder
|
|
set style line 1 lc rgb \"grey\"
|
|
set ylabel \"Citation count\"
|
|
set y2label \"Document count\"
|
|
set y2tics nomirror
|
|
set key left top
|
|
plot $counts using 1:3:xtic(2) with boxes lc rgb \"grey\" title \"Citations per year\", \"\" using 1:4 axes x1y2 with lines title \"Document count\"
|
|
"
|
|
pngfile
|
|
count-string))
|
|
(cmdfile (make-temp-file "gnuplot-cmds-" nil ".gpl"))
|
|
(shellcmd (format "gnuplot --persist -c \"%s\"" cmdfile)))
|
|
(with-temp-file cmdfile
|
|
(insert gnuplot))
|
|
(shell-command shellcmd)
|
|
(delete-file cmdfile)
|
|
(format "[[%s]]" pngfile))
|
|
"Gnuplot required to see citation graph. Please install it."))
|
|
|
|
|
|
(defun oa--author-org (entity-id)
|
|
"Generate an org-buffer for an author with ENTITY-ID."
|
|
(let* ((buf (get-buffer-create "*OpenAlex - Author*"))
|
|
(data (oa--author entity-id))
|
|
(citations-image (oa--counts-by-year data))
|
|
(works-count (plist-get data :works_count))
|
|
(works-url (plist-get data :works_api_url))
|
|
(works-data (request-response-data
|
|
(request works-url
|
|
:sync t
|
|
:parser 'oa--response-parser
|
|
:params (oa--params)))))
|
|
(with-current-buffer buf
|
|
(erase-buffer)
|
|
(insert (org-ref--format-template "* ${display_name} ([[${oa-url}][json]])
|
|
:PROPERTIES:
|
|
:OPENALEX: ${id}
|
|
:ORCID: ${orcid}
|
|
:SCOPUS: ${ids.scopus}
|
|
:WORKS_COUNT: ${works_count}
|
|
:CITED_BY_COUNT: ${cited_by_count}
|
|
:INSTITUTION: ${institution}
|
|
:END:
|
|
|
|
#+COLUMNS: %25ITEM %YEAR %CITED_BY_COUNT
|
|
elisp:org-columns elisp:org-columns-quit
|
|
|
|
${citations-image}
|
|
|
|
** Articles
|
|
|
|
#+caption: Sort
|
|
| year | [[elisp:(oa-buffer-sort-year t)][old first]] | [[elisp:(oa-buffer-sort-year)][new first]] |
|
|
| cited by | [[elisp:(oa-buffer-sort-cited-by-count t)][low first]] | [[elisp:(oa-buffer-sort-cited-by-count)][high first]] |
|
|
|
|
"
|
|
`(("display_name" . ,(oa-get data "display_name"))
|
|
("oa-url" . ,(oa-get data "oa-url"))
|
|
("id" . ,(oa-get data "id"))
|
|
("orcid" . ,(oa-get data "orcid"))
|
|
("ids.scopus" . ,(oa-get data "ids.scopus"))
|
|
("works_count" . ,(oa-get data "works_count"))
|
|
("cited_by_count" . ,(oa-get data "cited_by_count"))
|
|
("institution" . ,(oa--format-institution data))
|
|
("citations-image" . ,citations-image))))
|
|
(insert (string-join (oa--author-entries works-data works-url) "\n"))
|
|
|
|
(org-mode)
|
|
(goto-char (point-min))
|
|
(org-next-visible-heading 1))
|
|
(pop-to-buffer buf)))
|
|
|
|
|
|
(defun oa-author ()
|
|
"Get data and act on it for an author."
|
|
(interactive)
|
|
|
|
(ivy-read "Author: " #'oa--author-candidates
|
|
:dynamic-collection t
|
|
:action
|
|
'(1
|
|
("o" (lambda (candidate)
|
|
(oa--author-org (get-text-property 0 'oaid candidate)))
|
|
"Open org file")
|
|
("l" (lambda (candidate)
|
|
(insert (format "[[%s][%s]]"
|
|
(get-text-property 0 'oaid candidate)
|
|
candidate))))
|
|
("u" (lambda (candidate)
|
|
(browse-url (get-text-property 0 'oaid candidate)))
|
|
"Open in browser"))))
|
|
|
|
|
|
;; * Full text search
|
|
|
|
|
|
(defun oa-fulltext-search (query &optional page)
|
|
"Perform a fulltext search on QUERY.
|
|
PAGE is optional, and loads that page of results. Defaults to 1."
|
|
(interactive (list (read-string "Query: ")
|
|
nil))
|
|
(when (null page) (setq page 1))
|
|
(let* ((url "https://api.openalex.org/works")
|
|
(req (request url
|
|
:sync t
|
|
:parser #'oa--response-parser
|
|
:params (oa--params `("page" . ,page)
|
|
`("filter" . ,(format "fulltext.search:%s" query)))))
|
|
(data (request-response-data req))
|
|
(metadata (plist-get data :meta))
|
|
(count (plist-get metadata :count))
|
|
(per-page (plist-get metadata :per_page))
|
|
(npages (+ (/ count per-page) (if (= 0 (mod count per-page)) 0 1)))
|
|
(results (plist-get data :results))
|
|
(next-page (format "[[elisp:(oa-fulltext-search \"%s\" %s)][Next page: %s of %s]]"
|
|
query
|
|
(+ page 1)
|
|
(+ page 1)
|
|
npages))
|
|
(buf (get-buffer-create "*OpenAlex Full-text search*")))
|
|
|
|
(with-current-buffer buf
|
|
(erase-buffer)
|
|
(org-mode)
|
|
(insert (org-ref--format-template "#+title: Full-text search: ${query}
|
|
|
|
[[elisp:(oa-fulltext-search \"${query}\" ${page})]]"
|
|
`(("query" . ,query)
|
|
("page" . ,page))))
|
|
(insert (org-ref--format-template
|
|
"
|
|
${meta.count} results: Page ${meta.page} of ${s1} ${s2}
|
|
\n\n"
|
|
`(("meta.count" . ,count)
|
|
("meta.page" . ,(oa-get data "meta.page"))
|
|
("s1" . ,(format "%s" npages))
|
|
("s2" . ,(format "%s" next-page)))))
|
|
|
|
(insert
|
|
(cl-loop for result in results concat
|
|
(org-ref--format-template "* ${title}
|
|
:PROPERTIES:
|
|
:JOURNAL: ${primary_location.source.display_name}
|
|
:AUTHOR: ${authors}
|
|
:YEAR: ${publication_year}
|
|
:OPENALEX: ${id}
|
|
:DOI: ${ids.doi}
|
|
:END:
|
|
|
|
${get-bibtex}
|
|
|
|
- ${oa-refs}
|
|
- ${oa-related}
|
|
- ${oa-cited}
|
|
|
|
" `(("title" . ,(oa--title result))
|
|
("primary_location.source.display_name" . ,(oa-get result "primary_location.source.display_name"))
|
|
("authors" . ,(oa--authors result))
|
|
("publication_year" . ,(oa-get result "publication_year"))
|
|
("id" . ,(oa-get result "id"))
|
|
("ids.doi" . ,(oa-get result "ids.doi"))
|
|
("get-bibtex" . ,(oa--elisp-get-bibtex result))
|
|
("oa-refs" . ,(oa--elisp-get-oa-refs result))
|
|
("oa-related" . ,(oa--elisp-get-oa-related result))
|
|
("oa-cited" . ,(oa--elisp-get-oa-cited-by result))))))
|
|
|
|
(insert next-page)
|
|
|
|
(goto-char (point-min)))
|
|
(pop-to-buffer buf)))
|
|
|
|
|
|
;; * NSF Collaborators and Other Affiliations
|
|
(defun oa-coa (entity-id &optional COA-file)
|
|
"Get a list of collaborators for the past 5 years in tab-delimited form.
|
|
This is for Table 4 in the COA_template at
|
|
https://www.nsf.gov/bfa/dias/policy/coa/coa_template.xlsx.
|
|
|
|
ENTITY-ID is an identifier that OpenAlex can use. Used
|
|
interactively you can query and select an author.
|
|
|
|
If COA-FILE is non-nil write results to that file, otherwise save
|
|
to the clipboard. You should be able to paste the results
|
|
directly into Excel.
|
|
|
|
Results are sorted in alphaphabetical order by last name.
|
|
|
|
Caveats: OpenAlex provides the name in Firstname Initial Lastname
|
|
form. I assume this can be split on spaces, and the last word is
|
|
the last name. That is not always correct, so some manual name
|
|
fixing may be required.
|
|
|
|
The Institutions are not always reliable. I use the most recent
|
|
institution if an author is listed multiple times. Sometimes this
|
|
is empty, and sometimes an author has multiple institutions
|
|
listed.
|
|
|
|
There may be duplicates for people who have different names in
|
|
OpenAlex, e.g. missing initials, differences in spelling,
|
|
abbreviations, including having a period or not.
|
|
|
|
Your name will be included, you will need to delete this manually
|
|
in the Excel sheet.
|
|
|
|
This only gets the coauthors in publications known to OpenAlex.
|
|
Recently published papers are probably missing.
|
|
"
|
|
(interactive (list
|
|
(get-text-property 0 'oaid
|
|
(ivy-read "Author: " #'oa--author-candidates
|
|
:dynamic-collection t))
|
|
(when (y-or-n-p "Save to file?")
|
|
(read-file-name "File: "))))
|
|
|
|
(let* ((data (oa--author entity-id))
|
|
(works-url (plist-get data :works_api_url))
|
|
(works-data (request-response-data
|
|
(request works-url
|
|
:sync t
|
|
:parser 'oa--response-parser
|
|
:params (oa--params))))
|
|
(meta (plist-get works-data :meta))
|
|
(count (plist-get meta :count))
|
|
(per-page (plist-get meta :per_page))
|
|
(pages (/ count per-page))
|
|
(results (plist-get works-data :results))
|
|
(current-year (string-to-number (format-time-string "%Y" (current-time))))
|
|
(current-authors '()))
|
|
|
|
;; Now we need to accumulate the rest of the results from other pages
|
|
(when (> (mod count per-page) 0) (cl-incf pages))
|
|
|
|
(cl-loop for i from 2 to pages
|
|
do
|
|
(setq works-data (request-response-data
|
|
(request works-url
|
|
:sync t
|
|
:parser 'oa--response-parser
|
|
:params (oa--params `("page" . ,i))))
|
|
results (append results (plist-get works-data :results))))
|
|
|
|
;; Now results is a list of your publications. We need to iterate over each
|
|
;; one, and accumulate author information
|
|
(cl-loop for result in results do
|
|
(let ((year (plist-get result :publication_year)))
|
|
(when (> year (- current-year 5))
|
|
(cl-loop for authorship in (plist-get result :authorships) do
|
|
(let* ((author (plist-get authorship :author))
|
|
(name (plist-get author :display_name))
|
|
(name-parts (mapcar #'capitalize (split-string name)))
|
|
(name-string (concat (car (last name-parts)) ", "
|
|
(string-join (butlast name-parts) " ")))
|
|
|
|
(institutions (plist-get authorship :institutions))
|
|
(institution (plist-get (car institutions) :display_name)))
|
|
;; name, institution, contact info, last-active
|
|
;; we won't have contact info from openalex.
|
|
(push (list name-string institution "" year
|
|
(plist-get result :publication_date))
|
|
current-authors))))))
|
|
|
|
(setq current-authors (sort current-authors
|
|
(lambda (a b)
|
|
"Sort first on name, then on year in descending order"
|
|
(if (string= (nth 0 a) (nth 0 b))
|
|
(> (nth 3 a) (nth 3 b))
|
|
(string< (car a) (car b))))))
|
|
|
|
;; now filter for unique authors
|
|
(setq current-authors (cl-loop for group in (seq-group-by (lambda (x)
|
|
(car x))
|
|
current-authors)
|
|
collect (cadr group)))
|
|
|
|
;; Finally lets fix the year so Excel reads it correctly. I use the publication date
|
|
(setq current-authors (cl-loop for row in current-authors
|
|
collect
|
|
(list "A:"
|
|
(nth 0 row)
|
|
(nth 1 row)
|
|
(nth 2 row)
|
|
(nth 4 row))))
|
|
|
|
(if COA-file
|
|
(with-temp-file COA-file
|
|
(cl-loop for row in current-authors do
|
|
(insert (string-join (mapcar (lambda (x)
|
|
(format "%s" (or x "")))
|
|
row)
|
|
"\t")
|
|
"\n")))
|
|
|
|
|
|
(kill-new (mapconcat (lambda (row)
|
|
(concat (string-join (mapcar (lambda (x)
|
|
(format "%s" (or x "")))
|
|
row)
|
|
"\t")
|
|
"\n"))
|
|
current-authors))
|
|
(message "COA data on the clipboard."))))
|
|
|
|
|
|
|
|
;; * utilities
|
|
|
|
(defun oa-kill-buffers ()
|
|
"Kill OpenAlex buffers."
|
|
(interactive)
|
|
(cl-loop for buf in (buffer-list)
|
|
do
|
|
(when (string-prefix-p "*OpenAlex" (buffer-name buf))
|
|
(kill-buffer buf))))
|
|
|
|
|
|
(defun oa-get-bibtex-entries ()
|
|
"Download all the bibtex entries in the buffer.
|
|
Operates on headings with a DOI property."
|
|
(interactive)
|
|
(let ((bibfile (completing-read "Bibfile: " (org-ref-possible-bibfiles))))
|
|
(org-map-entries
|
|
(lambda ()
|
|
(kill-new (org-entry-get (point) "DOI"))
|
|
(doi-utils-add-bibtex-entry-from-doi
|
|
(doi-utils-maybe-doi-from-region-or-current-kill)
|
|
bibfile))
|
|
"DOI<>\"\"")))
|
|
|
|
|
|
(provide 'openalex)
|
|
|
|
;;; openalex.el ends here
|
|
|
|
;; Local Variables:
|
|
;; byte-compile-warnings: (not docstrings-wide)
|
|
;; End:
|