1 //! \file CurlWikiGrabber.cpp
3 #include "CurlWikiGrabber.h"
10 #include "WalkerException.h"
14 static size_t write_callback(char* ptr
,
19 static_cast<std::string
*>(userdata
)->append(ptr
, size
* nmemb
);
23 CurlWikiGrabber::CurlWikiGrabber()
25 int error
= curl_global_init(CURL_GLOBAL_ALL
);
28 throw WalkerException("CURL init failed");
32 CurlWikiGrabber::~CurlWikiGrabber()
34 curl_global_cleanup();
37 //! \todo Curl return code checking
38 std::string
CurlWikiGrabber::grabUrl(const std::string
& url
) const
40 CURL
* handle
= curl_easy_init();
42 if(nullptr == handle
) {
43 throw WalkerException("error initiating curl");
46 CURLcode crv
= curl_easy_setopt(handle
, CURLOPT_URL
, url
.c_str());
48 crv
= curl_easy_setopt(handle
,
50 "WikiWalker / github.com/dueringa/WikiWalker");
52 crv
= curl_easy_setopt(handle
, CURLOPT_WRITEFUNCTION
, write_callback
);
55 crv
= curl_easy_setopt(handle
, CURLOPT_FOLLOWLOCATION
, 1);
57 crv
= curl_easy_setopt(handle
, CURLOPT_ACCEPT_ENCODING
, "gzip");
60 const char* certbundle
= std::getenv("CURL_CA_BUNDLE");
62 if(skipSslVerificationState_
) {
63 // hostname verification
64 crv
= curl_easy_setopt(handle
, CURLOPT_SSL_VERIFYHOST
, 0);
67 crv
= curl_easy_setopt(handle
, CURLOPT_SSL_VERIFYPEER
, 0);
69 if(CURL_AT_LEAST_VERSION(7, 41, 0)) {
71 crv
= curl_easy_setopt(handle
, CURLOPT_SSL_VERIFYSTATUS
, 0);
74 } else if(certbundle
!= nullptr) {
75 crv
= curl_easy_setopt(handle
, CURLOPT_CAINFO
, certbundle
);
79 std::string gotContent
;
80 crv
= curl_easy_setopt(handle
, CURLOPT_WRITEDATA
, &gotContent
);
84 crv
= curl_easy_perform(handle
);
87 const char* err
= curl_easy_strerror(crv
);
88 std::string
text(err
);
89 throw WalkerException(text
);
93 crv
= curl_easy_getinfo(handle
, CURLINFO_RESPONSE_CODE
, &httpcode
);
96 curl_easy_cleanup(handle
);
100 if(httpcode
!= 200) {
101 throw WalkerException("Error getting article. HTTP error.");
107 void CurlWikiGrabber::skipSslVerification(bool state
)
109 skipSslVerificationState_
= state
;
111 } // namespace WikiWalker
114 // https://en.wikipedia.org/w/api.php
115 // /w/api.php?action=query&format=json&prop=links&plnamespace=0&titles=<title>
116 // maybe &pllimit=100