Fix string strip properties function error
[org-tag-eldoc.git] / org-tag-eldoc-wikipedia.el
blob7442b8333f6204644c008671bad208af9f9bdb75
1 ;;; org-tag-eldoc-wikipedia.el --- Web Scraping on Wikipedia -*- lexical-binding: t; -*-
2 ;; -*- coding: utf-8 -*-
4 ;; Copyright (C) 2024-2025 Christopher M. Miles, all rights reserved.
6 ;;; Commentary:
8 ;; search API: https://wikipedia.org/w/index.php?search=[query]
9 ;; tag URL: https://en.wikipedia.org/wiki/Computer_science
11 ;;; Code:
13 (require 'url)
14 (require 'url-http) ; for `url-http-end-of-headers'
15 (require 'request)
16 (require 'dom)
17 (require 'elquery nil t) ; optionally use elquery.el to replace dom.el.
18 (require 'org-tag-eldoc-common)
21 (defun org-tag-eldoc-wikipedia--request (tag)
22 "Send HTTP GET request to get TAG explanation data."
23 (let ((request-backend 'url-retrieve) ; use `url-retrieve' backend for proxy.
24 (url-proxy-services org-tag-eldoc-request-proxy)
25 (url (format "https://en.wikipedia.org/w/index.php?title=%s" (capitalize tag))))
26 (request url
27 :type "GET"
28 :parser (lambda ()
29 (cond
30 ((fboundp 'libxml-parse-html-region) ; convert HTML -> Elisp alist structure
31 (libxml-parse-html-region (point-min) (point-max)))
32 ((featurep 'elquery) ; convert HTML -> elquery object structure
33 (elquery-read-buffer (current-buffer)))))
34 :success (cl-function
35 (lambda (&key data &allow-other-keys)
36 ;; DEBUG: (setq request-result data)
37 (cond
38 ((featurep 'dom)
39 ;; for `libxml' parser
40 (setq org-tag-eldoc--explanation
41 (org-tag-eldoc-common--format-explanation
42 (dom-texts
43 (dom-search (dom-by-class data "mw-content-ltr\\ mw-parser-output")
44 (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node)))))))))
45 ((featurep 'elquery)
46 ;; for `elquery' parser
47 (setq org-tag-eldoc--explanation
48 (elquery-text (car (elquery-$ "#mw-content-text > div.mw-content-ltr.mw-parser-output > p" data))))))))
49 :error (cl-function
50 (lambda (&rest args &key error-thrown &allow-other-keys)
51 (message "[org-tag-eldoc] (Wikipedia) request error %s!" error-thrown)
52 nil))
53 :status-code '((404 . (lambda (&rest _) nil))
54 (500 . (lambda (&rest _) nil))))
56 ;; (with-current-buffer (url-retrieve-synchronously url)
57 ;; (let ((dom (progn
58 ;; (goto-char url-http-end-of-headers)
59 ;; (libxml-parse-html-region (point) (point-max)))))
60 ;; ;; DEBUG: (setq request-result dom)
61 ;; (org-tag-eldoc-common--format-explanation
62 ;; (cond
63 ;; ((featurep 'dom)
64 ;; (dom-texts
65 ;; (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
66 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node)))))))
67 ;; ((featurep 'elquery)
68 ;; (elquery-text (car (elquery-$ "#mw-content-text > div.mw-content-ltr.mw-parser-output > p" data))))))))
71 ;;; TEST: https://en.wikipedia.org/w/index.php?title=Computer_science
72 ;; (org-tag-eldoc-wikipedia--request "computer_science")
74 ;;; TEST
75 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
76 ;; (with-current-buffer
77 ;; (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
78 ;; (let ((dom (progn
79 ;; (goto-char url-http-end-of-headers)
80 ;; (libxml-parse-html-region (point) (point-max)))))
81 ;; (setq request-result dom)
82 ;; (setq org-tag-eldoc--explanation
83 ;; (dom-texts
84 ;; (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
85 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node))))))))))
87 ;;; TEST
88 ;; (dom-texts
89 ;; (dom-search (dom-by-class request-result "mw-content-ltr\\ mw-parser-output")
90 ;; (lambda (node) (and (eq (cl-first node) 'p) (null (cl-second node))))))
92 ;;; TEST
93 ;;; this example works fine.
94 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
95 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=GitHub")
96 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
97 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
98 ;; (lambda (node) (eq (car node) 'p)))))
99 ;; (setq node-github paragraph-node)
100 ;; (dom-texts paragraph-node)))))
102 ;; `node-github' ; looks fine
104 ;;; this example failed on `dom-texts'.
105 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
106 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
107 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
108 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
109 ;; (lambda (node) (eq (car node) 'p)))))
110 ;; (setq node-computer-science paragraph-node)
111 ;; (dom-texts paragraph-node)))))
113 ;; `node-computer-science' ; looks fine
115 ;;; this examples works by a new `dom-search' rule.
116 ;; (let ((url-proxy-services org-tag-eldoc-request-proxy))
117 ;; (with-current-buffer (url-retrieve-synchronously "https://en.wikipedia.org/w/index.php?title=Computer_science")
118 ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
119 ;; (let ((paragraph-node (dom-search (dom-by-class dom "mw-content-ltr\\ mw-parser-output")
120 ;; (lambda (node) (and (eq (cl-first node) 'p)
121 ;; (null (cl-second node)))))))
122 ;; (setq node-computer-science paragraph-node)
123 ;; (dom-texts paragraph-node)))))
125 (defun org-tag-eldoc-wikipedia-query (tag)
126 "Query TAG on Wikipedia then return a cons cell of tag and explanation."
127 (if (stringp org-tag-eldoc--explanation)
128 org-tag-eldoc--explanation
129 (org-tag-eldoc-wikipedia--request tag)
130 (sit-for 1.0)
131 (org-tag-eldoc-database-save tag org-tag-eldoc--explanation)))
135 (provide 'org-tag-eldoc-wikipedia)
137 ;;; org-tag-eldoc-wikipedia.el ends here