`string-fill` requires emacs 28.1
[org-tag-eldoc.git] / org-tag-eldoc-wikipedia.el
blobf5eac60745bd3859a64f533052b68b21341d86b8
1 ;;; org-tag-eldoc-wikipedia.el --- Web Scraping on Wikipedia -*- lexical-binding: t; -*-
2 ;; -*- coding: utf-8 -*-
4 ;; Copyright (C) 2024-2025 Christopher M. Miles, all rights reserved.
6 ;;; Commentary:
8 ;; search API: https://wikipedia.org/w/index.php?search=[query]
9 ;; tag URL: https://en.wikipedia.org/wiki/Computer_science
11 ;;; Code:
13 (require 'url)
14 (require 'url-http) ; for `url-http-end-of-headers'
15 (require 'request)
16 (require 'dom)
17 (require 'elquery nil t) ; optionally use elquery.el to replace dom.el.
18 (require 'org-tag-eldoc-common)
21 (defun org-tag-eldoc-wikipedia--request (tag)
22 "Send HTTP GET request to get TAG explanation data."
23 (let ((request-backend 'url-retrieve) ; use `url-retrieve' backend for proxy.
24 (request-message-level -1)
25 (url-proxy-services org-tag-eldoc-request-proxy)
26 (url (format "https://en.wikipedia.org/w/index.php?title=%s" (capitalize tag))))
27 (request url
28 :type "GET"
29 :parser (lambda ()
30 (cond
31 ((fboundp 'libxml-parse-html-region) ; convert HTML -> Elisp alist structure
32 (libxml-parse-html-region (point-min) (point-max)))
33 ((featurep 'elquery) ; convert HTML -> elquery object structure
34 (elquery-read-buffer (current-buffer)))))
35 :success (cl-function
36 (lambda (&key data &allow-other-keys)
37 ;; DEBUG: (setq request-result data)
38 (cond
39 ((featurep 'dom)
40 ;; for `libxml' parser
41 (setq org-tag-eldoc--explanation
42 (org-tag-eldoc-common--format-explanation
43 (dom-texts
44 (dom-search (dom-by-class data "mw-content-ltr\\ mw-parser-output")
45 (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node)))))))))
46 ((featurep 'elquery)
47 ;; for `elquery' parser
48 (setq org-tag-eldoc--explanation
49 (elquery-text (car (elquery-$ "#mw-content-text > div.mw-content-ltr.mw-parser-output > p" data))))))))
50 :error (cl-function
51 (lambda (&rest args &key error-thrown &allow-other-keys)
52 ;; (message "[org-tag-eldoc] (Wikipedia) request error %s!" error-thrown)
53 nil))
54 :status-code '((404 . (lambda (&rest _) nil))
55 (500 . (lambda (&rest _) nil))))
57 ;; (with-current-buffer (url-retrieve-synchronously url)
58 ;; (let ((dom (progn
59 ;; (goto-char url-http-end-of-headers)
60 ;; (libxml-parse-html-region (point) (point-max)))))
61 ;; ;; DEBUG: (setq request-result dom)
62 ;; (org-tag-eldoc-common--format-explanation
63 ;; (cond
64 ;; ((featurep 'dom)
65 ;; (dom-texts
66 ;; (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
67 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node)))))))
68 ;; ((featurep 'elquery)
69 ;; (elquery-text (car (elquery-$ "#mw-content-text > div.mw-content-ltr.mw-parser-output > p" data))))))))
72 ;;; TEST: https://en.wikipedia.org/w/index.php?title=Computer_science
73 ;; (org-tag-eldoc-wikipedia--request "computer_science")
75 ;;; TEST
76 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
77 ;; (with-current-buffer
78 ;; (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
79 ;; (let ((dom (progn
80 ;; (goto-char url-http-end-of-headers)
81 ;; (libxml-parse-html-region (point) (point-max)))))
82 ;; (setq request-result dom)
83 ;; (setq org-tag-eldoc--explanation
84 ;; (dom-texts
85 ;; (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
86 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node))))))))))
88 ;;; TEST
89 ;; (dom-texts
90 ;; (dom-search (dom-by-class request-result "mw-content-ltr\\ mw-parser-output")
91 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node))))))
93 ;;; TEST
94 ;;; this example works fine.
95 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
96 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=GitHub")
97 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
98 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
99 ;; (lambda (node) (eq (car node) 'p)))))
100 ;; (setq node-github paragraph-node)
101 ;; (dom-texts paragraph-node)))))
103 ;; `node-github' ; looks fine
105 ;;; this example failed on `dom-texts'.
106 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
107 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
108 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
109 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
110 ;; (lambda (node) (eq (car node) 'p)))))
111 ;; (setq node-computer-science paragraph-node)
112 ;; (dom-texts paragraph-node)))))
114 ;; `node-computer-science' ; looks fine
116 ;;; this examples works by a new `dom-search' rule.
117 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
118 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
119 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
120 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
121 ;; (lambda (node) (and (eq (cl-first node) 'p)
122 ;; (null (cl-second node)))))))
123 ;; (setq node-computer-science paragraph-node)
124 ;; (dom-texts paragraph-node)))))
126 (defun org-tag-eldoc-wikipedia-query (tag)
127 "Query TAG on Wikipedia then return a cons cell of tag and explanation."
128 (if (stringp org-tag-eldoc--explanation)
129 org-tag-eldoc--explanation
130 (org-tag-eldoc-wikipedia--request tag)
131 (sit-for 1.0)
132 (org-tag-eldoc-database-save tag org-tag-eldoc--explanation)))
136 (provide 'org-tag-eldoc-wikipedia)
138 ;;; org-tag-eldoc-wikipedia.el ends here