cache (tag . explanation) in `org-tag-eldoc-tag-explanations-alist` only when explana...
[org-tag-eldoc.git] / org-tag-eldoc-wikipedia.el
blob795938b127f0d8e3e044953ae7e9779dcce47506
1 ;;; org-tag-eldoc-wikipedia.el --- Web Scraping on Wikipedia -*- lexical-binding: t; -*-
2 ;; -*- coding: utf-8 -*-
4 ;; Copyright (C) 2024-2025 Christopher M. Miles, all rights reserved.
6 ;;; Commentary:
8 ;; search API: https://wikipedia.org/w/index.php?search=[query]
9 ;; tag URL: https://en.wikipedia.org/wiki/Computer_science
11 ;;; Code:
13 (require 'url)
14 (require 'url-http) ; for `url-http-end-of-headers'
15 (require 'request)
16 (require 'dom)
17 (require 'elquery nil t) ; optionally use elquery.el to replace dom.el.
18 (require 'org-tag-eldoc-common)
21 (defun org-tag-eldoc-wikipedia--request (tag)
22 "Send HTTP GET request to get TAG explanation data."
23 (let ((request-backend 'url-retrieve) ; use `url-retrieve' backend for proxy.
24 (url-proxy-services org-tag-eldoc-request-proxy)
25 (url (format "https://en.wikipedia.org/w/index.php?title=%s" (capitalize tag))))
26 (with-current-buffer (url-retrieve-synchronously url)
27 (let ((dom (progn
28 (goto-char url-http-end-of-headers)
29 (libxml-parse-html-region (point) (point-max)))))
30 ;; DEBUG: (setq request-result dom)
31 (org-tag-eldoc-common--format-explanation
32 (cond
33 ((featurep 'dom)
34 (dom-texts
35 (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
36 (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node)))))))
37 ((featurep 'elquery)
38 (elquery-text (car (elquery-$ "#mw-content-text > div.mw-content-ltr.mw-parser-output > p" data))))))))))
40 ;;; TEST: https://en.wikipedia.org/w/index.php?title=Computer_science
41 ;; (org-tag-eldoc-wikipedia--request "computer_science")
43 ;;; TEST
44 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
45 ;; (with-current-buffer
46 ;; (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
47 ;; (let ((dom (progn
48 ;; (goto-char url-http-end-of-headers)
49 ;; (libxml-parse-html-region (point) (point-max)))))
50 ;; (setq request-result dom)
51 ;; (setq org-tag-eldoc--explanation
52 ;; (dom-texts
53 ;; (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
54 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node))))))))))
56 ;;; TEST
57 ;; (dom-texts
58 ;; (dom-search (dom-by-class request-result "mw-content-ltr\\ mw-parser-output")
59 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node))))))
61 ;;; TEST
62 ;;; this example works fine.
63 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
64 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=GitHub")
65 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
66 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
67 ;; (lambda (node) (eq (car node) 'p)))))
68 ;; (setq node-github paragraph-node)
69 ;; (dom-texts paragraph-node)))))
71 ;; `node-github' ; looks fine
73 ;;; this example failed on `dom-texts'.
74 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
75 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
76 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
77 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
78 ;; (lambda (node) (eq (car node) 'p)))))
79 ;; (setq node-computer-science paragraph-node)
80 ;; (dom-texts paragraph-node)))))
82 ;; `node-computer-science' ; looks fine
84 ;;; this examples works by a new `dom-search' rule.
85 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
86 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
87 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
88 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
89 ;; (lambda (node) (and (eq (cl-first node) 'p)
90 ;; (null (cl-second node)))))))
91 ;; (setq node-computer-science paragraph-node)
92 ;; (dom-texts paragraph-node)))))
94 (defun org-tag-eldoc-wikipedia-query (tag)
95 "Query TAG on Wikipedia then return a cons cell of tag and explanation."
96 (org-tag-eldoc-database-query tag)
97 (if (stringp org-tag-eldoc--explanation)
98 org-tag-eldoc--explanation
99 (org-tag-eldoc-wikipedia--request tag)
100 (sit-for 1.0)
101 (org-tag-eldoc-database-save tag org-tag-eldoc--explanation)))
105 (provide 'org-tag-eldoc-wikipedia)
107 ;;; org-tag-eldoc-wikipedia.el ends here