pkg update and first config fix
org-brain not working, add org-roam
This commit is contained in:
396
lisp/org-roam-bibtex/orb-anystyle.el
Normal file
396
lisp/org-roam-bibtex/orb-anystyle.el
Normal file
@@ -0,0 +1,396 @@
|
||||
;;; orb-anystyle.el --- Orb Roam BibTeX: Elisp interface to Anystyle -*- lexical-binding: t -*-
|
||||
|
||||
;; Copyright © 2020-2022 Mykhailo Shevchuk
|
||||
|
||||
;; Author: Mykhailo Shevchuk <mail@mshevchuk.com>
|
||||
;; URL: https://github.com/org-roam/org-roam-bibtex
|
||||
|
||||
;; This file is NOT part of GNU Emacs.
|
||||
|
||||
;; This program is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 3, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; This program is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License along with
|
||||
;; this program; see the file LICENSE. If not, visit
|
||||
;; <https://www.gnu.org/licenses/>.
|
||||
|
||||
;; N.B. This file contains code snippets adopted from other
|
||||
;; open-source projects. These snippets are explicitly marked as such
|
||||
;; in place. They are not subject to the above copyright and
|
||||
;; authorship claims.
|
||||
|
||||
;;; Commentary:
|
||||
;;
|
||||
|
||||
;;; Code:
|
||||
;; * Library requires
|
||||
|
||||
(require 'orb-core)
|
||||
|
||||
(eval-when-compile
|
||||
(require 'subr-x)
|
||||
(require 'cl-macs))
|
||||
|
||||
;; * Customize definitions
|
||||
|
||||
(defcustom orb-anystyle-executable "anystyle"
|
||||
"Anystyle executable path or program name."
|
||||
:type '(choice (const "anystyle")
|
||||
(file :tag "Path to executable" :must-match t))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-pdfinfo-executable nil
|
||||
"Path to pdfinfo executable to be passed to anystyle.
|
||||
When this is nil, anystyle will look for it in the system path."
|
||||
:type '(choice
|
||||
(file :tag "Path to executable")
|
||||
(const nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-pdftotext-executable nil
|
||||
"Path to pdftotext executable to be passed to anystyle.
|
||||
When this is nil, anystyle will look for it in the system path."
|
||||
:type '(choice
|
||||
(file :tag "Path to executable")
|
||||
(const nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-parser-model nil
|
||||
"Path to anystyle custom parser model."
|
||||
:type '(choice
|
||||
(file :tag "Path to file" :must-match t)
|
||||
(const :tag "Built-in" nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-finder-model nil
|
||||
"Path to anystyle custom finder model."
|
||||
:type '(choice
|
||||
(file :tag "Path to file" :must-match t)
|
||||
(const :tag "Built-in" nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
;; --crop is currently broken upstream
|
||||
|
||||
(defcustom orb-anystyle-find-crop nil
|
||||
"Crop value in pt to be passed to `anystyle find'.
|
||||
An integer or a conc cell of integers."
|
||||
:type '(choice (integer :tag "Top and bottom")
|
||||
(cons :tag "Top, bottom, left and right"
|
||||
(integer :tag "Top and bottom")
|
||||
(integer :tag "Left and right"))
|
||||
(const :tag "Do not crop" nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-find-solo nil
|
||||
"Non-nil to pass the `--solo' flag."
|
||||
:type '(choice (const :tag "Yes" t)
|
||||
(const :tag "No" nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-find-layout nil
|
||||
"Non-nil to pass the `--layout' flag."
|
||||
:type '(choice (const :tag "Yes" t)
|
||||
(const :tag "No" nil))
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-default-buffer "*Orb Anystyle Output*"
|
||||
"Default buffer name for anystyle output."
|
||||
:type 'string
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-user-directory
|
||||
(concat (file-name-as-directory user-emacs-directory) "anystyle")
|
||||
"Directory to keep anystyle user files."
|
||||
:type 'directory
|
||||
:group 'orb-anystyle)
|
||||
|
||||
(defcustom orb-anystyle-parser-training-set
|
||||
(concat (file-name-as-directory orb-anystyle-user-directory) "core.xml")
|
||||
"XML file containing parser training data."
|
||||
:type '(file :must-match t)
|
||||
:group 'anystyle)
|
||||
|
||||
(defcustom orb-anystyle-finder-training-set
|
||||
(f-join (file-name-as-directory orb-anystyle-user-directory) "ttx/")
|
||||
"Directory containing finder training data (.ttx files)."
|
||||
:type 'directory
|
||||
:group 'anystyle)
|
||||
|
||||
;; * Main functions
|
||||
|
||||
;;;###autoload
|
||||
(cl-defun orb-anystyle (command
|
||||
&key (exec orb-anystyle-executable)
|
||||
verbose help version adapter
|
||||
((:finder-model fmodel) orb-anystyle-finder-model)
|
||||
((:parser-model pmodel) orb-anystyle-parser-model)
|
||||
(pdfinfo orb-anystyle-pdfinfo-executable)
|
||||
(pdftotext orb-anystyle-pdftotext-executable)
|
||||
format stdout overwrite
|
||||
(crop orb-anystyle-find-crop)
|
||||
(solo orb-anystyle-find-solo)
|
||||
(layout orb-anystyle-find-layout)
|
||||
input output
|
||||
(buffer orb-anystyle-default-buffer))
|
||||
"Run anystyle COMMAND with `shell-command'.
|
||||
ARGS is a plist with the following recognized keys:
|
||||
|
||||
Anystyle CLI options
|
||||
==========
|
||||
1) EXEC :exec => string (valid executable)
|
||||
- default value can be set through `orb-anystyle-executable'
|
||||
|
||||
2) COMMAND :command => symbol or string
|
||||
- valid values: find parse help check license train
|
||||
|
||||
3) Global options can be passed with the following keys.
|
||||
|
||||
FMODEL :finder-model => string (valid file path)
|
||||
PMODEL :parser-model => string (valid file path)
|
||||
PDFINFO :pdfinfo => string (valid executable)
|
||||
PDFTOTEXT :pdftotext => string (valid executable)
|
||||
ADAPTER :adapter => anything
|
||||
STDOUT :stdout => boolean
|
||||
HELP :help => boolean
|
||||
VERBOSE :verbose => boolean
|
||||
VERSION :version => boolean
|
||||
OVERWRITE :overwrite => boolean
|
||||
FORMAT :format => string, symbol or list of unquoted symbols
|
||||
|
||||
- FORMAT must be one or more output formats accepted by anystyle commands:
|
||||
parse => bib csl json ref txt xml
|
||||
find => bib csl json ref txt ttx xml
|
||||
- string must be space- or comma-separated, additional spaces are
|
||||
ignored
|
||||
|
||||
Default values for some of these options can be set globally via
|
||||
the following variables: `orb-anystyle-finder-model',
|
||||
`orb-anystyle-parser-model', `orb-anystyle-pdfinfo-executable',
|
||||
`orb-anystyle-pdftotext-executable'.
|
||||
|
||||
4) Command options can be passed with the following keys:
|
||||
|
||||
CROP :crop => integer or cons cell of integers
|
||||
LAYOUT :layout => boolean
|
||||
SOLO :solo => boolean
|
||||
|
||||
- Command options are ignored for commands other than find
|
||||
- anystyle help -c flag is not supported
|
||||
|
||||
Default values for these options can be set globally via the
|
||||
following variables: `orb-anystyle-find-crop',
|
||||
`orb-anystyle-find-layout', `orb-anystyle-find-solo'.
|
||||
|
||||
5) INPUT :input => string (file path)
|
||||
|
||||
6) OUTPUT :output => string (file path)
|
||||
|
||||
`shell-command'-related options
|
||||
==========
|
||||
|
||||
7) BUFFER :buffer => buffer-or-name
|
||||
|
||||
- `shell-command''s OUTPUT-BUFFER
|
||||
- can be a cons cell (OUTPUT-BUFFER . ERROR-BUFFER)
|
||||
- when nil, defaults to `orb-anystyle-default-buffer'
|
||||
|
||||
anystyle CLI command synopsis:
|
||||
anystyle [global options] command [command options] [arguments...].
|
||||
|
||||
Homepage: https://anystyle.io
|
||||
Github: https://github.com/inukshuk/anystyle-cli
|
||||
Courtesy of its authors."
|
||||
(declare (indent 1))
|
||||
(let* ((commands '(list find parse check train help license))
|
||||
(exec (executable-find exec))
|
||||
(buf (if (consp buffer) buffer (list buffer)))
|
||||
;; '(a b c) => "a,b,c"
|
||||
(to-string (lambda (str)
|
||||
(--reduce-from
|
||||
(format "%s,%s" acc it)
|
||||
(car str) (cdr str))))
|
||||
;; debug
|
||||
;; (anystyle-run (lambda (str)
|
||||
;; (message "command: %s \nbuffers: %s and %s" str (car buf) (cdr buf))))
|
||||
(anystyle-run (lambda (str)
|
||||
(if (eq command 'train)
|
||||
;; train can take minutes, so run it in a sub-process
|
||||
(start-process-shell-command
|
||||
"anystyle" (car buf) str)
|
||||
(shell-command str
|
||||
(car buf) (cdr buf)))))
|
||||
global-options command-options anystyle)
|
||||
;; executable is a must
|
||||
(unless exec
|
||||
(user-error "Anystyle executable not found! \
|
||||
Install anystyle-cli before running Orb PDF Scrapper"))
|
||||
;; we process :version and :help before checking command
|
||||
;; since with this global flag command is not required
|
||||
(cond
|
||||
;; help flag takes priority
|
||||
(help
|
||||
(setq global-options " --help"
|
||||
command-options ""
|
||||
input nil
|
||||
output nil))
|
||||
;; anystyle ignores everything with --version flag except the
|
||||
;; --help flag, which we've just resolved above
|
||||
(version
|
||||
(setq global-options "--version"
|
||||
command nil
|
||||
command-options ""
|
||||
input nil
|
||||
output nil))
|
||||
;; otherwise command is a must
|
||||
((not command)
|
||||
(user-error "Anystyle command required: \
|
||||
find, parse, check, train, help or license")))
|
||||
(when (stringp command)
|
||||
(setq command (intern command)))
|
||||
;; command must be a valid command
|
||||
(unless (memq command commands)
|
||||
(user-error "Invalid command %s. Valid commands are \
|
||||
find, parse, check, train, help and license" command))
|
||||
;;
|
||||
;; command specific arguments
|
||||
(cl-case command
|
||||
('help
|
||||
(when (stringp input)
|
||||
(setq input (intern input)))
|
||||
(unless (or (and global-options
|
||||
(string= global-options " --help"))
|
||||
(memq input commands))
|
||||
(user-error "Invalid input %s. Valid input for 'anystyle help': \
|
||||
find, parse, check, train, help or license" input)))
|
||||
('license
|
||||
(setq input nil
|
||||
output nil
|
||||
global-options ""
|
||||
command-options ""))
|
||||
('check
|
||||
(setq output nil))
|
||||
('find
|
||||
;; pdfinfo and pdftotext must be present in the system
|
||||
(when (and pdfinfo (not (executable-find pdfinfo)))
|
||||
(user-error "Executable not found: pdfinfo, %s" pdfinfo))
|
||||
(when (and pdftotext (not (executable-find pdftotext)))
|
||||
(user-error "Executable not found: pdftotext, %s" pdftotext))
|
||||
(setq global-options
|
||||
(orb-format "%s" global-options
|
||||
" --pdfinfo=\"%s\"" pdfinfo
|
||||
" --pdftotext=\"%s\"" pdftotext))
|
||||
;; Command options
|
||||
;; N.B. Help command accepts a command option -c but it's totally
|
||||
;; irrelevant for us:
|
||||
;;
|
||||
;; [COMMAND OPTIONS]
|
||||
;; -c - List commands one per line, to assist with shell completion
|
||||
;; so we do not implement it
|
||||
;;
|
||||
;; :crop value should be integer; if no value was explicitly supplied,
|
||||
;; use the default from `orb-anystyle-find-crop'
|
||||
(when crop
|
||||
(unless (consp crop)
|
||||
(setq crop (list crop)))
|
||||
(let ((x (car crop))
|
||||
(y (or (cdr crop) 0)))
|
||||
(unless (and (integerp x)
|
||||
(integerp y))
|
||||
(user-error "Invalid value %s,%y. Number expected" x y))
|
||||
(setq crop (format "%s,%s" x y))))
|
||||
;; parse only accepts --[no]-layout, so we ignore the rest
|
||||
;; append command options to command
|
||||
(setq command-options
|
||||
(orb-format " --crop=%s" crop
|
||||
" --layout" (cons layout " --no-layout")
|
||||
" --solo" (cons solo " --no-solo"))))
|
||||
('train
|
||||
(unless output
|
||||
(setq output
|
||||
(concat (or (file-name-directory orb-anystyle-parser-training-set)
|
||||
(file-name-as-directory orb-anystyle-user-directory))
|
||||
"parser.mod")))))
|
||||
;; Arguments relevant for more than one command
|
||||
;;
|
||||
;; find, parse:
|
||||
;; format option should be one of accepted types if present
|
||||
(when (and (memq command '(find parse))
|
||||
format)
|
||||
(when (stringp format)
|
||||
(setq format
|
||||
(-map #'intern
|
||||
(split-string (string-trim format)
|
||||
"[, ]" t " "))))
|
||||
(unless (listp format)
|
||||
(setq format (list format)))
|
||||
(let ((accepted-formats
|
||||
(cl-case command
|
||||
('find '(bib csl json ref txt ttx xml))
|
||||
('parse '(bib csl json ref txt xml)))))
|
||||
(when (--none? (memq it accepted-formats) format)
|
||||
(user-error
|
||||
"Invalid format(s) %s. Valid formats for command %s: %s"
|
||||
(funcall to-string format)
|
||||
command
|
||||
(funcall to-string accepted-formats)))
|
||||
;; convert format to a comma-separated string and append
|
||||
;; it to global options
|
||||
(setq global-options
|
||||
(orb-format "%s" global-options
|
||||
" -f %s" (funcall to-string format)))))
|
||||
;; find, parse, check accept
|
||||
;; finder and parser models
|
||||
(when (memq command '(find parse check))
|
||||
(when (and fmodel (not (f-exists? fmodel)))
|
||||
(display-warning 'org-roam-bibtex
|
||||
"Finder model file not found: %s, \
|
||||
using the default one" fmodel)
|
||||
(setq fmodel nil))
|
||||
(when (and pmodel (not (f-exists? pmodel)))
|
||||
(display-warning 'org-roam-bibtex
|
||||
"Finder model file not found: %s, \
|
||||
using the default one" pmodel)
|
||||
(setq pmodel nil))
|
||||
(setq global-options (orb-format "%s" global-options
|
||||
" -F \"%s\"" fmodel
|
||||
" -P \"%s\"" pmodel)))
|
||||
;; find, train, parse and check:
|
||||
;; 1) require input, which should be a valid path
|
||||
;; 2) something called ruby adapter, probably a right place here
|
||||
;; 3) --verbose, --stdout, --overwrite if non-nil
|
||||
(when (memq command '(find train parse check))
|
||||
(unless input
|
||||
(user-error "Input required for command %s" command))
|
||||
(unless (and (stringp input) (f-exists? input))
|
||||
(user-error "Invalid input file or directory %s" input))
|
||||
(setq global-options
|
||||
(orb-format
|
||||
"%s" global-options
|
||||
" --verbose" (cons verbose " --no-verbose")
|
||||
;; this flag does nothing for check
|
||||
" --stdout" (cons stdout " --no-stdout")
|
||||
" --adapter=\"%s\"" adapter
|
||||
" --overwrite" (cons overwrite " --no-overwrite"))))
|
||||
;; Set arguments and run the program
|
||||
;;
|
||||
(setq anystyle (orb-format "%s" exec
|
||||
"%s" global-options
|
||||
" %s" command
|
||||
"%s" command-options
|
||||
" \"%s\"" (when input (file-truename input))
|
||||
" \"%s\"" (when output (file-truename output))))
|
||||
(funcall anystyle-run anystyle)))
|
||||
|
||||
(provide 'orb-anystyle)
|
||||
;;; orb-anystyle.el ends here
|
||||
;; Local Variables:
|
||||
;; coding: utf-8
|
||||
;; fill-column: 79
|
||||
;; End:
|
||||
Reference in New Issue
Block a user