Update TODO
[dueringa_WikiWalker.git] / src / CurlWikiGrabber.cpp
blob17509a2aa2ba4be49781c53e1f3a4367dcb57019
1 //! \file CurlWikiGrabber.cpp
3 #include "CurlWikiGrabber.h"
5 #include <cassert>
6 #include <cstdlib>
8 #include <curl/curl.h>
10 #include "WalkerException.h"
12 namespace WikiWalker
14 static size_t write_callback(char* ptr,
15 size_t size,
16 size_t nmemb,
17 void* userdata)
19 static_cast<std::string*>(userdata)->append(ptr, size * nmemb);
20 return size * nmemb;
23 CurlWikiGrabber::CurlWikiGrabber()
25 int error = curl_global_init(CURL_GLOBAL_ALL);
27 if(error != 0) {
28 throw WalkerException("CURL init failed");
32 CurlWikiGrabber::~CurlWikiGrabber()
34 curl_global_cleanup();
37 //! \todo Curl return code checking
38 std::string CurlWikiGrabber::grabUrl(const std::string& url) const
40 CURL* handle = curl_easy_init();
42 if(nullptr == handle) {
43 throw WalkerException("error initiating curl");
46 CURLcode crv = curl_easy_setopt(handle, CURLOPT_URL, url.c_str());
47 assert(crv == 0);
48 crv = curl_easy_setopt(handle,
49 CURLOPT_USERAGENT,
50 "WikiWalker / github.com/dueringa/WikiWalker");
51 assert(crv == 0);
52 crv = curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, write_callback);
53 assert(crv == 0);
54 // allow redirects
55 crv = curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1);
56 assert(crv == 0);
57 crv = curl_easy_setopt(handle, CURLOPT_ACCEPT_ENCODING, "gzip");
58 assert(crv == 0);
60 const char* certbundle = std::getenv("CURL_CA_BUNDLE");
62 if(skipSslVerificationState_) {
63 // hostname verification
64 crv = curl_easy_setopt(handle, CURLOPT_SSL_VERIFYHOST, 0);
65 assert(crv == 0);
66 // check against CA
67 crv = curl_easy_setopt(handle, CURLOPT_SSL_VERIFYPEER, 0);
68 assert(crv == 0);
69 if(CURL_AT_LEAST_VERSION(7, 41, 0)) {
70 // ignore status
71 crv = curl_easy_setopt(handle, CURLOPT_SSL_VERIFYSTATUS, 0);
72 assert(crv == 0);
74 } else if(certbundle != nullptr) {
75 crv = curl_easy_setopt(handle, CURLOPT_CAINFO, certbundle);
76 assert(crv == 0);
79 std::string gotContent;
80 crv = curl_easy_setopt(handle, CURLOPT_WRITEDATA, &gotContent);
81 assert(crv == 0);
83 gotContent = "";
84 crv = curl_easy_perform(handle);
86 if(crv != 0) {
87 const char* err = curl_easy_strerror(crv);
88 std::string text(err);
89 throw WalkerException(text);
92 long httpcode = 0;
93 crv = curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &httpcode);
94 assert(crv == 0);
96 curl_easy_cleanup(handle);
98 handle = nullptr;
100 if(httpcode != 200) {
101 throw WalkerException("Error getting article. HTTP error.");
104 return gotContent;
107 void CurlWikiGrabber::skipSslVerification(bool state)
109 skipSslVerificationState_ = state;
111 } // namespace WikiWalker
113 // note to self: API
114 // https://en.wikipedia.org/w/api.php
115 // /w/api.php?action=query&format=json&prop=links&plnamespace=0&titles=<title>
116 // maybe &pllimit=100