xbmc/addons/Scraper.cpp

   1 /*
   2  *  Copyright (C) 2005-2018 Team Kodi
   3  *  This file is part of Kodi - https://kodi.tv
   4  *
   5  *  SPDX-License-Identifier: GPL-2.0-or-later
   6  *  See LICENSES/README.md for more information.
   7  */
   8
   9 #include "Scraper.h"
  10
  11 #include "FileItem.h"
  12 #include "FileItemList.h"
  13 #include "ServiceBroker.h"
  14 #include "URL.h"
  15 #include "Util.h"
  16 #include "addons/AddonManager.h"
  17 #include "addons/addoninfo/AddonInfo.h"
  18 #include "addons/addoninfo/AddonType.h"
  19 #include "addons/settings/AddonSettings.h"
  20 #include "filesystem/CurlFile.h"
  21 #include "filesystem/Directory.h"
  22 #include "filesystem/File.h"
  23 #include "filesystem/PluginDirectory.h"
  24 #include "guilib/LocalizeStrings.h"
  25 #include "music/Album.h"
  26 #include "music/Artist.h"
  27 #include "music/MusicDatabase.h"
  28 #include "music/infoscanner/MusicAlbumInfo.h"
  29 #include "music/infoscanner/MusicArtistInfo.h"
  30 #include "settings/AdvancedSettings.h"
  31 #include "settings/SettingsComponent.h"
  32 #include "settings/SettingsValueFlatJsonSerializer.h"
  33 #include "utils/CharsetConverter.h"
  34 #include "utils/JSONVariantWriter.h"
  35 #include "utils/ScraperParser.h"
  36 #include "utils/ScraperUrl.h"
  37 #include "utils/StringUtils.h"
  38 #include "utils/URIUtils.h"
  39 #include "utils/XMLUtils.h"
  40 #include "utils/log.h"
  41 #include "video/VideoDatabase.h"
  42
  43 #include <algorithm>
  44 #include <sstream>
  45
  46 #include <fstrcmp.h>
  47
  48 using namespace XFILE;
  49 using namespace KODI;
  50 using namespace MUSIC_GRABBER;
  51
  52 namespace ADDON
  53 {
  54
  55 typedef struct
  56 {
  57   const char *name;
  58   CONTENT_TYPE type;
  59   int pretty;
  60 } ContentMapping;
  61
  62 static const ContentMapping content[] = {{"unknown", CONTENT_NONE, 231},
  63                                          {"albums", CONTENT_ALBUMS, 132},
  64                                          {"music", CONTENT_ALBUMS, 132},
  65                                          {"artists", CONTENT_ARTISTS, 133},
  66                                          {"movies", CONTENT_MOVIES, 20342},
  67                                          {"tvshows", CONTENT_TVSHOWS, 20343},
  68                                          {"musicvideos", CONTENT_MUSICVIDEOS, 20389}};
  69
  70 std::string TranslateContent(const CONTENT_TYPE &type, bool pretty /*=false*/)
  71 {
  72   for (const ContentMapping& map : content)
  73   {
  74     if (type == map.type)
  75     {
  76       if (pretty && map.pretty)
  77         return g_localizeStrings.Get(map.pretty);
  78       else
  79         return map.name;
  80     }
  81   }
  82   return "";
  83 }
  84
  85 CONTENT_TYPE TranslateContent(const std::string &string)
  86 {
  87   for (const ContentMapping& map : content)
  88   {
  89     if (string == map.name)
  90       return map.type;
  91   }
  92   return CONTENT_NONE;
  93 }
  94
  95 AddonType ScraperTypeFromContent(const CONTENT_TYPE& content)
  96 {
  97   switch (content)
  98   {
  99   case CONTENT_ALBUMS:
 100     return AddonType::SCRAPER_ALBUMS;
 101   case CONTENT_ARTISTS:
 102     return AddonType::SCRAPER_ARTISTS;
 103   case CONTENT_MOVIES:
 104     return AddonType::SCRAPER_MOVIES;
 105   case CONTENT_MUSICVIDEOS:
 106     return AddonType::SCRAPER_MUSICVIDEOS;
 107   case CONTENT_TVSHOWS:
 108     return AddonType::SCRAPER_TVSHOWS;
 109   default:
 110     return AddonType::UNKNOWN;
 111   }
 112 }
 113
 114 // if the XML root is <error>, throw CScraperError with enclosed <title>/<message> values
 115 static void CheckScraperError(const TiXmlElement *pxeRoot)
 116 {
 117   if (!pxeRoot || StringUtils::CompareNoCase(pxeRoot->Value(), "error"))
 118     return;
 119   std::string sTitle;
 120   std::string sMessage;
 121   XMLUtils::GetString(pxeRoot, "title", sTitle);
 122   XMLUtils::GetString(pxeRoot, "message", sMessage);
 123   throw CScraperError(sTitle, sMessage);
 124 }
 125
 126 CScraper::CScraper(const AddonInfoPtr& addonInfo, AddonType addonType)
 127   : CAddon(addonInfo, addonType)
 128 {
 129   m_requiressettings = addonInfo->Type(addonType)->GetValue("@requiressettings").asBoolean();
 130
 131   CDateTimeSpan persistence;
 132   std::string tmp = addonInfo->Type(addonType)->GetValue("@cachepersistence").asString();
 133   if (!tmp.empty())
 134     m_persistence.SetFromTimeString(tmp);
 135
 136   switch (addonType)
 137   {
 138     case AddonType::SCRAPER_ALBUMS:
 139       m_pathContent = CONTENT_ALBUMS;
 140       break;
 141     case AddonType::SCRAPER_ARTISTS:
 142       m_pathContent = CONTENT_ARTISTS;
 143       break;
 144     case AddonType::SCRAPER_MOVIES:
 145       m_pathContent = CONTENT_MOVIES;
 146       break;
 147     case AddonType::SCRAPER_MUSICVIDEOS:
 148       m_pathContent = CONTENT_MUSICVIDEOS;
 149       break;
 150     case AddonType::SCRAPER_TVSHOWS:
 151       m_pathContent = CONTENT_TVSHOWS;
 152       break;
 153     default:
 154       break;
 155   }
 156
 157   m_isPython = URIUtils::GetExtension(addonInfo->Type(addonType)->LibPath()) == ".py";
 158 }
 159
 160 bool CScraper::Supports(const CONTENT_TYPE &content) const
 161 {
 162   return Type() == ScraperTypeFromContent(content);
 163 }
 164
 165 bool CScraper::SetPathSettings(CONTENT_TYPE content, const std::string &xml)
 166 {
 167   m_pathContent = content;
 168   if (!LoadSettings(false, false))
 169     return false;
 170
 171   if (xml.empty())
 172     return true;
 173
 174   CXBMCTinyXML doc;
 175   doc.Parse(xml);
 176   return SettingsFromXML(doc, false);
 177 }
 178
 179 std::string CScraper::GetPathSettings()
 180 {
 181   if (!LoadSettings(false, true))
 182     return "";
 183
 184   std::stringstream stream;
 185   CXBMCTinyXML doc;
 186   SettingsToXML(doc);
 187   if (doc.RootElement())
 188     stream << *doc.RootElement();
 189
 190   return stream.str();
 191 }
 192
 193 void CScraper::ClearCache()
 194 {
 195   std::string strCachePath = URIUtils::AddFileToFolder(CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_cachePath, "scrapers");
 196
 197   // create scraper cache dir if needed
 198   if (!CDirectory::Exists(strCachePath))
 199     CDirectory::Create(strCachePath);
 200
 201   strCachePath = URIUtils::AddFileToFolder(strCachePath, ID());
 202   URIUtils::AddSlashAtEnd(strCachePath);
 203
 204   if (CDirectory::Exists(strCachePath))
 205   {
 206     CFileItemList items;
 207     CDirectory::GetDirectory(strCachePath, items, "", DIR_FLAG_DEFAULTS);
 208     for (int i = 0; i < items.Size(); ++i)
 209     {
 210       // wipe cache
 211       if (items[i]->m_dateTime + m_persistence <= CDateTime::GetCurrentDateTime())
 212         CFile::Delete(items[i]->GetDynPath());
 213     }
 214   }
 215   else
 216     CDirectory::Create(strCachePath);
 217 }
 218
 219 // returns a vector of strings: the first is the XML output by the function; the rest
 220 // is XML output by chained functions, possibly recursively
 221 // the CCurlFile object is passed in so that URL fetches can be canceled from other threads
 222 // throws CScraperError abort on internal failures (e.g., parse errors)
 223 std::vector<std::string> CScraper::Run(const std::string &function,
 224                                        const CScraperUrl &scrURL,
 225                                        CCurlFile &http,
 226                                        const std::vector<std::string> *extras)
 227 {
 228   if (!Load())
 229     throw CScraperError();
 230
 231   std::string strXML = InternalRun(function, scrURL, http, extras);
 232   if (strXML.empty())
 233   {
 234     if (function != "NfoUrl" && function != "ResolveIDToUrl")
 235       CLog::Log(LOGERROR, "{}: Unable to parse web site", __FUNCTION__);
 236     throw CScraperError();
 237   }
 238
 239   CLog::Log(LOGDEBUG, "scraper: {} returned {}", function, strXML);
 240
 241   CXBMCTinyXML doc;
 242   /* all data was converted to UTF-8 before being processed by scraper */
 243   doc.Parse(strXML, TIXML_ENCODING_UTF8);
 244   if (!doc.RootElement())
 245   {
 246     CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
 247     throw CScraperError();
 248   }
 249
 250   std::vector<std::string> result;
 251   result.push_back(strXML);
 252   TiXmlElement *xchain = doc.RootElement()->FirstChildElement();
 253   // skip children of the root element until <url> or <chain>
 254   while (xchain && strcmp(xchain->Value(), "url") && strcmp(xchain->Value(), "chain"))
 255     xchain = xchain->NextSiblingElement();
 256   while (xchain)
 257   {
 258     // <chain|url function="...">param</>
 259     const char *szFunction = xchain->Attribute("function");
 260     if (szFunction)
 261     {
 262       CScraperUrl scrURL2;
 263       std::vector<std::string> extras;
 264       // for <chain>, pass the contained text as a parameter; for <url>, as URL content
 265       if (strcmp(xchain->Value(), "chain") == 0)
 266       {
 267         if (xchain->FirstChild())
 268           extras.emplace_back(xchain->FirstChild()->Value());
 269       }
 270       else
 271         scrURL2.ParseAndAppendUrl(xchain);
 272       // Fix for empty chains. $$1 would still contain the
 273       // previous value as there is no child of the xml node.
 274       // since $$1 will always either contain the data from an
 275       // url or the parameters to a chain, we can safely clear it here
 276       // to fix this issue
 277       m_parser.m_param[0].clear();
 278       std::vector<std::string> result2 = RunNoThrow(szFunction, scrURL2, http, &extras);
 279       result.insert(result.end(), result2.begin(), result2.end());
 280     }
 281     xchain = xchain->NextSiblingElement();
 282     // continue to skip past non-<url> or <chain> elements
 283     while (xchain && strcmp(xchain->Value(), "url") && strcmp(xchain->Value(), "chain"))
 284       xchain = xchain->NextSiblingElement();
 285   }
 286
 287   return result;
 288 }
 289
 290 // just like Run, but returns an empty list instead of throwing in case of error
 291 // don't use in new code; errors should be handled appropriately
 292 std::vector<std::string> CScraper::RunNoThrow(const std::string &function,
 293                                               const CScraperUrl &url,
 294                                               XFILE::CCurlFile &http,
 295                                               const std::vector<std::string> *extras)
 296 {
 297   std::vector<std::string> vcs;
 298   try
 299   {
 300     vcs = Run(function, url, http, extras);
 301   }
 302   catch (const CScraperError &sce)
 303   {
 304     assert(sce.FAborted()); // the only kind we should get
 305   }
 306   return vcs;
 307 }
 308
 309 std::string CScraper::InternalRun(const std::string &function,
 310                                   const CScraperUrl &scrURL,
 311                                   CCurlFile &http,
 312                                   const std::vector<std::string> *extras)
 313 {
 314   // walk the list of input URLs and fetch each into parser parameters
 315   const auto& urls = scrURL.GetUrls();
 316   size_t i;
 317   for (i = 0; i < urls.size(); ++i)
 318   {
 319     if (!CScraperUrl::Get(urls[i], m_parser.m_param[i], http, ID()) ||
 320         m_parser.m_param[i].empty())
 321       return "";
 322   }
 323   // put the 'extra' parameters into the parser parameter list too
 324   if (extras)
 325   {
 326     for (size_t j = 0; j < extras->size(); ++j)
 327       m_parser.m_param[j + i] = (*extras)[j];
 328   }
 329
 330   return m_parser.Parse(function, this);
 331 }
 332
 333 std::string CScraper::GetPathSettingsAsJSON()
 334 {
 335   static const std::string EmptyPathSettings = "{}";
 336
 337   if (!LoadSettings(false, true))
 338     return EmptyPathSettings;
 339
 340   CSettingsValueFlatJsonSerializer jsonSerializer;
 341   auto json = jsonSerializer.SerializeValues(GetSettings()->GetSettingsManager());
 342   if (json.empty())
 343     return EmptyPathSettings;
 344
 345   return json;
 346 }
 347
 348 bool CScraper::Load()
 349 {
 350   if (m_fLoaded || m_isPython)
 351     return true;
 352
 353   bool result = m_parser.Load(LibPath());
 354   if (result)
 355   {
 356     //! @todo this routine assumes that deps are a single level, and assumes the dep is installed.
 357     //!       1. Does it make sense to have recursive dependencies?
 358     //!       2. Should we be checking the dep versions or do we assume it is ok?
 359     auto deps = GetDependencies();
 360     auto itr = deps.begin();
 361     while (itr != deps.end())
 362     {
 363       if (itr->id == "xbmc.metadata")
 364       {
 365         ++itr;
 366         continue;
 367       }
 368       AddonPtr dep;
 369
 370       bool bOptional = itr->optional;
 371
 372       if (CServiceBroker::GetAddonMgr().GetAddon((*itr).id, dep, ADDON::OnlyEnabled::CHOICE_YES))
 373       {
 374         CXBMCTinyXML doc;
 375         if (dep->Type() == AddonType::SCRAPER_LIBRARY && doc.LoadFile(dep->LibPath()))
 376           m_parser.AddDocument(&doc);
 377       }
 378       else
 379       {
 380         if (!bOptional)
 381         {
 382           result = false;
 383           break;
 384         }
 385       }
 386       ++itr;
 387     }
 388   }
 389
 390   if (!result)
 391     CLog::Log(LOGWARNING, "failed to load scraper XML from {}", LibPath());
 392   return m_fLoaded = result;
 393 }
 394
 395 bool CScraper::IsInUse() const
 396 {
 397   if (Supports(CONTENT_ALBUMS) || Supports(CONTENT_ARTISTS))
 398   { // music scraper
 399     CMusicDatabase db;
 400     if (db.Open() && db.ScraperInUse(ID()))
 401       return true;
 402   }
 403   else
 404   { // video scraper
 405     CVideoDatabase db;
 406     if (db.Open() && db.ScraperInUse(ID()))
 407       return true;
 408   }
 409   return false;
 410 }
 411
 412 bool CScraper::IsNoop()
 413 {
 414   if (!Load())
 415     throw CScraperError();
 416
 417   return !m_isPython && m_parser.IsNoop();
 418 }
 419
 420 // pass in contents of .nfo file; returns URL (possibly empty if none found)
 421 // and may populate strId, or throws CScraperError on error
 422 CScraperUrl CScraper::NfoUrl(const std::string &sNfoContent)
 423 {
 424   CScraperUrl scurlRet;
 425
 426   if (IsNoop())
 427     return scurlRet;
 428
 429   if (m_isPython)
 430   {
 431     std::stringstream str;
 432     str << "plugin://" << ID() << "?action=NfoUrl&nfo=" << CURL::Encode(sNfoContent)
 433         << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
 434
 435     CFileItemList items;
 436     if (!XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
 437       return scurlRet;
 438
 439     if (items.Size() == 0)
 440       return scurlRet;
 441     if (items.Size() > 1)
 442       CLog::Log(LOGWARNING, "{}: scraper returned multiple results; using first", __FUNCTION__);
 443
 444     CScraperUrl::SUrlEntry surl;
 445     surl.m_type = CScraperUrl::UrlType::General;
 446     surl.m_url = items[0]->GetDynPath();
 447     scurlRet.AppendUrl(surl);
 448     return scurlRet;
 449   }
 450
 451   // scraper function takes contents of .nfo file, returns XML (see below)
 452   std::vector<std::string> vcsIn;
 453   vcsIn.push_back(sNfoContent);
 454   CScraperUrl scurl;
 455   CCurlFile fcurl;
 456   std::vector<std::string> vcsOut = Run("NfoUrl", scurl, fcurl, &vcsIn);
 457   if (vcsOut.empty() || vcsOut[0].empty())
 458     return scurlRet;
 459   if (vcsOut.size() > 1)
 460     CLog::Log(LOGWARNING, "{}: scraper returned multiple results; using first", __FUNCTION__);
 461
 462   // parse returned XML: either <error> element on error, blank on failure,
 463   // or <url>...</url> or <url>...</url><id>...</id> on success
 464   for (size_t i = 0; i < vcsOut.size(); ++i)
 465   {
 466     CXBMCTinyXML doc;
 467     doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
 468     CheckScraperError(doc.RootElement());
 469
 470     if (doc.RootElement())
 471     {
 472       /*
 473        NOTE: Scrapers might return invalid xml with some loose
 474        elements (eg. '<url>http://some.url</url><id>123</id>').
 475        Since XMLUtils::GetString() is assuming well formed xml
 476        with start and end-tags we're not able to use it.
 477        Check for the desired Elements instead.
 478       */
 479       TiXmlElement* pxeUrl = nullptr;
 480       TiXmlElement* pId = nullptr;
 481       if (!strcmp(doc.RootElement()->Value(), "details"))
 482       {
 483         pxeUrl = doc.RootElement()->FirstChildElement("url");
 484         pId = doc.RootElement()->FirstChildElement("id");
 485       }
 486       else
 487       {
 488         pId = doc.FirstChildElement("id");
 489         pxeUrl = doc.FirstChildElement("url");
 490       }
 491       if (pId && pId->FirstChild())
 492         scurlRet.SetId(pId->FirstChild()->ValueStr());
 493
 494       if (pxeUrl && pxeUrl->Attribute("function"))
 495         continue;
 496
 497       if (pxeUrl)
 498         scurlRet.ParseAndAppendUrl(pxeUrl);
 499       else if (!strcmp(doc.RootElement()->Value(), "url"))
 500         scurlRet.ParseAndAppendUrl(doc.RootElement());
 501       else
 502         continue;
 503       break;
 504     }
 505   }
 506   return scurlRet;
 507 }
 508
 509 CScraperUrl CScraper::ResolveIDToUrl(const std::string &externalID)
 510 {
 511   CScraperUrl scurlRet;
 512
 513   if (m_isPython)
 514   {
 515     std::stringstream str;
 516     str << "plugin://" << ID() << "?action=resolveid&key=" << CURL::Encode(externalID)
 517         << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
 518
 519     CFileItem item("resolve me", false);
 520
 521     if (XFILE::CPluginDirectory::GetPluginResult(str.str(), item, false))
 522       scurlRet.ParseFromData(item.GetDynPath());
 523
 524     return scurlRet;
 525   }
 526
 527   // scraper function takes an external ID, returns XML (see below)
 528   std::vector<std::string> vcsIn;
 529   vcsIn.push_back(externalID);
 530   CScraperUrl scurl;
 531   CCurlFile fcurl;
 532   std::vector<std::string> vcsOut = Run("ResolveIDToUrl", scurl, fcurl, &vcsIn);
 533   if (vcsOut.empty() || vcsOut[0].empty())
 534     return scurlRet;
 535   if (vcsOut.size() > 1)
 536     CLog::Log(LOGWARNING, "{}: scraper returned multiple results; using first", __FUNCTION__);
 537
 538   // parse returned XML: either <error> element on error, blank on failure,
 539   // or <url>...</url> or <url>...</url><id>...</id> on success
 540   for (size_t i = 0; i < vcsOut.size(); ++i)
 541   {
 542     CXBMCTinyXML doc;
 543     doc.Parse(vcsOut[i], TIXML_ENCODING_UTF8);
 544     CheckScraperError(doc.RootElement());
 545
 546     if (doc.RootElement())
 547     {
 548       /*
 549        NOTE: Scrapers might return invalid xml with some loose
 550        elements (eg. '<url>http://some.url</url><id>123</id>').
 551        Since XMLUtils::GetString() is assuming well formed xml
 552        with start and end-tags we're not able to use it.
 553        Check for the desired Elements instead.
 554        */
 555       TiXmlElement* pxeUrl = nullptr;
 556       TiXmlElement* pId = nullptr;
 557       if (!strcmp(doc.RootElement()->Value(), "details"))
 558       {
 559         pxeUrl = doc.RootElement()->FirstChildElement("url");
 560         pId = doc.RootElement()->FirstChildElement("id");
 561       }
 562       else
 563       {
 564         pId = doc.FirstChildElement("id");
 565         pxeUrl = doc.FirstChildElement("url");
 566       }
 567       if (pId && pId->FirstChild())
 568         scurlRet.SetId(pId->FirstChild()->ValueStr());
 569
 570       if (pxeUrl && pxeUrl->Attribute("function"))
 571         continue;
 572
 573       if (pxeUrl)
 574         scurlRet.ParseAndAppendUrl(pxeUrl);
 575       else if (!strcmp(doc.RootElement()->Value(), "url"))
 576         scurlRet.ParseAndAppendUrl(doc.RootElement());
 577       else
 578         continue;
 579       break;
 580     }
 581   }
 582   return scurlRet;
 583 }
 584
 585 static bool RelevanceSortFunction(const CScraperUrl &left, const CScraperUrl &right)
 586 {
 587   return left.GetRelevance() > right.GetRelevance();
 588 }
 589
 590 template<class T>
 591 static T FromFileItem(const CFileItem &item);
 592
 593 template<>
 594 CScraperUrl FromFileItem<CScraperUrl>(const CFileItem &item)
 595 {
 596   CScraperUrl url;
 597
 598   url.SetTitle(item.GetLabel());
 599   if (item.HasProperty("relevance"))
 600     url.SetRelevance(item.GetProperty("relevance").asDouble());
 601   CScraperUrl::SUrlEntry surl;
 602   surl.m_type = CScraperUrl::UrlType::General;
 603   surl.m_url = item.GetDynPath();
 604   url.AppendUrl(surl);
 605
 606   return url;
 607 }
 608
 609 template<>
 610 CMusicAlbumInfo FromFileItem<CMusicAlbumInfo>(const CFileItem &item)
 611 {
 612   CMusicAlbumInfo info;
 613   const std::string& sTitle = item.GetLabel();
 614   std::string sArtist = item.GetProperty("album.artist").asString();
 615   std::string sAlbumName;
 616   if (!sArtist.empty())
 617     sAlbumName = StringUtils::Format("{} - {}", sArtist, sTitle);
 618   else
 619     sAlbumName = sTitle;
 620
 621   CScraperUrl url;
 622   url.AppendUrl(CScraperUrl::SUrlEntry(item.GetDynPath()));
 623
 624   info = CMusicAlbumInfo(sTitle, sArtist, sAlbumName, url);
 625   if (item.HasProperty("relevance"))
 626     info.SetRelevance(item.GetProperty("relevance").asFloat());
 627
 628   if (item.HasProperty("album.releasestatus"))
 629     info.GetAlbum().strReleaseStatus = item.GetProperty("album.releasestatus").asString();
 630   if (item.HasProperty("album.type"))
 631     info.GetAlbum().strType = item.GetProperty("album.type").asString();
 632   if (item.HasProperty("album.year"))
 633     info.GetAlbum().strReleaseDate = item.GetProperty("album.year").asString();
 634   if (item.HasProperty("album.label"))
 635     info.GetAlbum().strLabel = item.GetProperty("album.label").asString();
 636   info.GetAlbum().art = item.GetArt();
 637
 638   return info;
 639 }
 640
 641 template<>
 642 CMusicArtistInfo FromFileItem<CMusicArtistInfo>(const CFileItem &item)
 643 {
 644   CMusicArtistInfo info;
 645   const std::string& sTitle = item.GetLabel();
 646
 647   CScraperUrl url;
 648   url.AppendUrl(CScraperUrl::SUrlEntry(item.GetDynPath()));
 649
 650   info = CMusicArtistInfo(sTitle, url);
 651   if (item.HasProperty("artist.genre"))
 652     info.GetArtist().genre = StringUtils::Split(item.GetProperty("artist.genre").asString(),
 653                                                 CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
 654   if (item.HasProperty("artist.disambiguation"))
 655     info.GetArtist().strDisambiguation = item.GetProperty("artist.disambiguation").asString();
 656   if (item.HasProperty("artist.type"))
 657     info.GetArtist().strType = item.GetProperty("artist.type").asString();
 658   if (item.HasProperty("artist.gender"))
 659     info.GetArtist().strGender = item.GetProperty("artist.gender").asString();
 660   if (item.HasProperty("artist.born"))
 661     info.GetArtist().strBorn = item.GetProperty("artist.born").asString();
 662
 663   return info;
 664 }
 665
 666 template<class T>
 667 static std::vector<T> PythonFind(const std::string &ID,
 668                                  const std::map<std::string, std::string> &additionals)
 669 {
 670   std::vector<T> result;
 671   CFileItemList items;
 672   std::stringstream str;
 673   str << "plugin://" << ID << "?action=find";
 674   for (const auto &it : additionals)
 675     str << "&" << it.first << "=" << CURL::Encode(it.second);
 676
 677   if (XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
 678   {
 679     for (const auto& it : items)
 680       result.emplace_back(std::move(FromFileItem<T>(*it)));
 681   }
 682
 683   return result;
 684 }
 685
 686 static std::string FromString(const CFileItem &item, const std::string &key)
 687 {
 688   return item.GetProperty(key).asString();
 689 }
 690
 691 static std::vector<std::string> FromArray(const CFileItem &item, const std::string &key, int sep)
 692 {
 693   return StringUtils::Split(item.GetProperty(key).asString(),
 694                             sep ? CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_videoItemSeparator
 695                                 : CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
 696 }
 697
 698 static void ParseThumbs(CScraperUrl &scurl,
 699                         const CFileItem &item,
 700                         int nThumbs,
 701                         const std::string &tag)
 702 {
 703   for (int i = 0; i < nThumbs; ++i)
 704   {
 705     std::stringstream prefix;
 706     prefix << tag << i + 1;
 707     std::string url = FromString(item, prefix.str() + ".url");
 708     std::string aspect = FromString(item, prefix.str() + ".aspect");
 709     std::string preview = FromString(item, prefix.str() + ".preview");
 710     scurl.AddParsedUrl(url, aspect, preview);
 711   }
 712 }
 713
 714 static std::string ParseFanart(const CFileItem &item, int nFanart, const std::string &tag)
 715 {
 716   std::string result;
 717   TiXmlElement fanart("fanart");
 718   for (int i = 0; i < nFanart; ++i)
 719   {
 720     std::stringstream prefix;
 721     prefix << tag << i + 1;
 722     std::string url = FromString(item, prefix.str() + ".url");
 723     std::string preview = FromString(item, prefix.str() + ".preview");
 724     TiXmlElement thumb("thumb");
 725     thumb.SetAttribute("preview", preview);
 726     TiXmlText text(url);
 727     thumb.InsertEndChild(text);
 728     fanart.InsertEndChild(thumb);
 729   }
 730   result << fanart;
 731
 732   return result;
 733 }
 734
 735 template<class T>
 736 static bool DetailsFromFileItem(const CFileItem&, T&);
 737
 738 template<>
 739 bool DetailsFromFileItem<CAlbum>(const CFileItem& item, CAlbum& album)
 740 {
 741   album.strAlbum = item.GetLabel();
 742   album.strMusicBrainzAlbumID = FromString(item, "album.musicbrainzid");
 743   album.strReleaseGroupMBID = FromString(item, "album.releasegroupid");
 744
 745   int nArtists = item.GetProperty("album.artists").asInteger32();
 746   album.artistCredits.reserve(nArtists);
 747   for (int i = 0; i < nArtists; ++i)
 748   {
 749     std::stringstream prefix;
 750     prefix << "album.artist" << i + 1;
 751     CArtistCredit artistCredit;
 752     artistCredit.SetArtist(FromString(item, prefix.str() + ".name"));
 753     artistCredit.SetMusicBrainzArtistID(FromString(item, prefix.str() + ".musicbrainzid"));
 754     album.artistCredits.push_back(artistCredit);
 755   }
 756
 757   album.strArtistDesc = FromString(item, "album.artist_description");
 758   album.genre = FromArray(item, "album.genre", 0);
 759   album.styles = FromArray(item, "album.styles", 0);
 760   album.moods = FromArray(item, "album.moods", 0);
 761   album.themes = FromArray(item, "album.themes", 0);
 762   album.bCompilation = item.GetProperty("album.compilation").asBoolean();
 763   album.strReview = FromString(item, "album.review");
 764   album.strReleaseDate = FromString(item, "album.releasedate");
 765   if (album.strReleaseDate.empty())
 766     album.strReleaseDate = FromString(item, "album.year");
 767   album.strOrigReleaseDate = FromString(item, "album.originaldate");
 768   album.strLabel = FromString(item, "album.label");
 769   album.strType = FromString(item, "album.type");
 770   album.strReleaseStatus = FromString(item, "album.releasestatus");
 771   album.fRating = item.GetProperty("album.rating").asFloat();
 772   album.iUserrating = item.GetProperty("album.user_rating").asInteger32();
 773   album.iVotes = item.GetProperty("album.votes").asInteger32();
 774
 775   /* Scrapers fetch a list of possible art but do not set the current images used because art
 776      selection depends on other preferences so is handled by CMusicInfoScanner
 777      album.art = item.GetArt();
 778   */
 779
 780   int nThumbs = item.GetProperty("album.thumbs").asInteger32();
 781   ParseThumbs(album.thumbURL, item, nThumbs, "album.thumb");
 782   return true;
 783 }
 784
 785 template<>
 786 bool DetailsFromFileItem<CArtist>(const CFileItem& item, CArtist& artist)
 787 {
 788   artist.strArtist = item.GetLabel();
 789   artist.strMusicBrainzArtistID = FromString(item, "artist.musicbrainzid");
 790   artist.strDisambiguation = FromString(item, "artist.disambiguation");
 791   artist.strType = FromString(item, "artist.type");
 792   artist.strGender = FromString(item, "artist.gender");
 793   artist.genre = FromArray(item, "artist.genre", 0);
 794   artist.styles = FromArray(item, "artist.styles", 0);
 795   artist.moods = FromArray(item, "artist.moods", 0);
 796   artist.yearsActive = FromArray(item, "artist.years_active", 0);
 797   artist.instruments = FromArray(item, "artist.instruments", 0);
 798   artist.strBorn = FromString(item, "artist.born");
 799   artist.strFormed = FromString(item, "artist.formed");
 800   artist.strBiography = FromString(item, "artist.biography");
 801   artist.strDied = FromString(item, "artist.died");
 802   artist.strDisbanded = FromString(item, "artist.disbanded");
 803
 804   /* Scrapers fetch a list of possible art but do not set the current images used because art
 805      selection depends on other preferences so is handled by CMusicInfoScanner
 806      artist.art = item.GetArt();
 807   */
 808
 809   int nAlbums = item.GetProperty("artist.albums").asInteger32();
 810   artist.discography.reserve(nAlbums);
 811   for (int i = 0; i < nAlbums; ++i)
 812   {
 813     std::stringstream prefix;
 814     prefix << "artist.album" << i + 1;
 815     CDiscoAlbum discoAlbum;
 816     discoAlbum.strAlbum = FromString(item, prefix.str() + ".title");
 817     discoAlbum.strYear = FromString(item, prefix.str() + ".year");
 818     discoAlbum.strReleaseGroupMBID = FromString(item, prefix.str() + ".musicbrainzreleasegroupid");
 819     artist.discography.emplace_back(discoAlbum);
 820   }
 821
 822   const int numvideolinks = item.GetProperty("artist.videolinks").asInteger32();
 823   if (numvideolinks > 0)
 824   {
 825     artist.videolinks.reserve(numvideolinks);
 826     for (int i = 1; i <= numvideolinks; ++i)
 827     {
 828       std::stringstream prefix;
 829       prefix << "artist.videolink" << i;
 830       ArtistVideoLinks videoLink;
 831       videoLink.title = FromString(item, prefix.str() + ".title");
 832       videoLink.mbTrackID = FromString(item, prefix.str() + ".mbtrackid");
 833       videoLink.videoURL = FromString(item, prefix.str() + ".url");
 834       videoLink.thumbURL = FromString(item, prefix.str() + ".thumb");
 835       artist.videolinks.emplace_back(std::move(videoLink));
 836     }
 837   }
 838
 839   int nThumbs = item.GetProperty("artist.thumbs").asInteger32();
 840   ParseThumbs(artist.thumbURL, item, nThumbs, "artist.thumb");
 841
 842   // Support deprecated fanarts property, add to artist.thumbURL
 843   int nFanart = item.GetProperty("artist.fanarts").asInteger32();
 844   if (nFanart > 0)
 845   {
 846     CFanart fanart;
 847     fanart.m_xml = ParseFanart(item, nFanart, "artist.fanart");
 848     fanart.Unpack();
 849     for (unsigned int i = 0; i < fanart.GetNumFanarts(); i++)
 850       artist.thumbURL.AddParsedUrl(fanart.GetImageURL(i), "fanart", fanart.GetPreviewURL(i));
 851   }
 852   return true;
 853 }
 854
 855 template<>
 856 bool DetailsFromFileItem<CVideoInfoTag>(const CFileItem& item, CVideoInfoTag& tag)
 857 {
 858   if (item.HasVideoInfoTag())
 859   {
 860     tag = *item.GetVideoInfoTag();
 861     return true;
 862   }
 863   return false;
 864 }
 865
 866 template<class T>
 867 static bool PythonDetails(const std::string& ID,
 868                           const std::string& key,
 869                           const std::string& url,
 870                           const std::string& action,
 871                           const std::string& pathSettings,
 872                           const std::unordered_map<std::string, std::string>& uniqueIDs,
 873                           T& result)
 874 {
 875   CVariant ids;
 876   for (const auto& [identifierType, identifier] : uniqueIDs)
 877     ids[identifierType] = identifier;
 878   std::string uids;
 879   CJSONVariantWriter::Write(ids, uids, true);
 880   std::stringstream str;
 881   str << "plugin://" << ID << "?action=" << action << "&" << key << "=" << CURL::Encode(url);
 882   str << "&pathSettings=" << CURL::Encode(pathSettings);
 883   if (!uniqueIDs.empty())
 884     str << "&uniqueIDs=" << CURL::Encode(uids);
 885
 886   CFileItem item(url, false);
 887
 888   if (!XFILE::CPluginDirectory::GetPluginResult(str.str(), item, false))
 889     return false;
 890
 891   return DetailsFromFileItem(item, result);
 892 }
 893
 894 template<class T>
 895 static bool PythonDetails(const std::string& ID,
 896                           const std::string& key,
 897                           const std::string& url,
 898                           const std::string& action,
 899                           const std::string& pathSettings,
 900                           T& result)
 901 {
 902   const std::unordered_map<std::string, std::string> ids;
 903   return PythonDetails(ID, key, url, action, pathSettings, ids, result);
 904 }
 905
 906 // fetch list of matching movies sorted by relevance (may be empty);
 907 // throws CScraperError on error; first called with fFirst set, then unset if first try fails
 908 std::vector<CScraperUrl> CScraper::FindMovie(XFILE::CCurlFile &fcurl,
 909                                              const std::string &movieTitle, int movieYear,
 910                                              bool fFirst)
 911 {
 912   // prepare parameters for URL creation
 913   std::string sTitle, sYear;
 914   if (movieYear < 0)
 915   {
 916     std::string sTitleYear;
 917     CUtil::CleanString(movieTitle, sTitle, sTitleYear, sYear, true /*fRemoveExt*/, fFirst);
 918   }
 919   else
 920   {
 921     sTitle = movieTitle;
 922     sYear = std::to_string( movieYear );
 923   }
 924
 925   CLog::Log(LOGDEBUG,
 926             "{}: Searching for '{}' using {} scraper "
 927             "(path: '{}', content: '{}', version: '{}')",
 928             __FUNCTION__, sTitle, Name(), Path(), ADDON::TranslateContent(Content()),
 929             Version().asString());
 930
 931   std::vector<CScraperUrl> vcscurl;
 932   if (IsNoop())
 933     return vcscurl;
 934
 935   if (!fFirst)
 936     StringUtils::Replace(sTitle, '-', ' ');
 937
 938   if (m_isPython)
 939   {
 940     std::map<std::string, std::string> additionals{{"title", sTitle}};
 941     if (!sYear.empty())
 942       additionals.insert({"year", sYear});
 943     additionals.emplace("pathSettings", GetPathSettingsAsJSON());
 944     return PythonFind<CScraperUrl>(ID(), additionals);
 945   }
 946
 947   std::vector<std::string> vcsIn(1);
 948   g_charsetConverter.utf8To(SearchStringEncoding(), sTitle, vcsIn[0]);
 949   vcsIn[0] = CURL::Encode(vcsIn[0]);
 950   if (fFirst && !sYear.empty())
 951     vcsIn.push_back(sYear);
 952
 953   // request a search URL from the title/filename/etc.
 954   CScraperUrl scurl;
 955   std::vector<std::string> vcsOut = Run("CreateSearchUrl", scurl, fcurl, &vcsIn);
 956   if (vcsOut.empty())
 957   {
 958     CLog::Log(LOGDEBUG, "{}: CreateSearchUrl failed", __FUNCTION__);
 959     throw CScraperError();
 960   }
 961   scurl.ParseFromData(vcsOut[0]);
 962
 963   // do the search, and parse the result into a list
 964   vcsIn.clear();
 965   vcsIn.push_back(scurl.GetFirstThumbUrl());
 966   vcsOut = Run("GetSearchResults", scurl, fcurl, &vcsIn);
 967
 968   bool fSort(true);
 969   std::set<std::string> stsDupeCheck;
 970   bool fResults(false);
 971   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
 972   {
 973     CXBMCTinyXML doc;
 974     doc.Parse(*i, TIXML_ENCODING_UTF8);
 975     if (!doc.RootElement())
 976     {
 977       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
 978       continue; // might have more valid results later
 979     }
 980
 981     CheckScraperError(doc.RootElement());
 982
 983     TiXmlHandle xhDoc(&doc);
 984     TiXmlHandle xhResults = xhDoc.FirstChild("results");
 985     if (!xhResults.Element())
 986       continue;
 987     fResults = true; // even if empty
 988
 989     // we need to sort if returned results don't specify 'sorted="yes"'
 990     if (fSort)
 991     {
 992       const char *sorted = xhResults.Element()->Attribute("sorted");
 993       if (sorted != nullptr)
 994         fSort = !StringUtils::EqualsNoCase(sorted, "yes");
 995     }
 996
 997     for (TiXmlElement *pxeMovie = xhResults.FirstChild("entity").Element(); pxeMovie;
 998          pxeMovie = pxeMovie->NextSiblingElement())
 999     {
1000       TiXmlNode *pxnTitle = pxeMovie->FirstChild("title");
1001       TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
1002       if (pxnTitle && pxnTitle->FirstChild() && pxeLink && pxeLink->FirstChild())
1003       {
1004         CScraperUrl scurlMovie;
1005         auto title = pxnTitle->FirstChild()->ValueStr();
1006         std::string id;
1007         if (XMLUtils::GetString(pxeMovie, "id", id))
1008           scurlMovie.SetId(id);
1009
1010         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1011           scurlMovie.ParseAndAppendUrl(pxeLink);
1012
1013         // calculate the relevance of this hit
1014         std::string sCompareTitle = scurlMovie.GetTitle();
1015         StringUtils::ToLower(sCompareTitle);
1016         std::string sMatchTitle = sTitle;
1017         StringUtils::ToLower(sMatchTitle);
1018
1019         /*
1020          * Identify the best match by performing a fuzzy string compare on the search term and
1021          * the result. Additionally, use the year (if available) to further refine the best match.
1022          * An exact match scores 1, a match off by a year scores 0.5 (release dates can vary between
1023          * countries), otherwise it scores 0.
1024          */
1025         std::string sCompareYear;
1026         XMLUtils::GetString(pxeMovie, "year", sCompareYear);
1027
1028         double yearScore = 0;
1029         if (!sYear.empty() && !sCompareYear.empty())
1030           yearScore =
1031               std::max(0.0, 1 - 0.5 * abs(atoi(sYear.c_str()) - atoi(sCompareYear.c_str())));
1032
1033         scurlMovie.SetRelevance(fstrcmp(sMatchTitle.c_str(), sCompareTitle.c_str()) + yearScore);
1034
1035         // reconstruct a title for the user
1036         if (!sCompareYear.empty())
1037           title += StringUtils::Format(" ({})", sCompareYear);
1038
1039         std::string sLanguage;
1040         if (XMLUtils::GetString(pxeMovie, "language", sLanguage) && !sLanguage.empty())
1041           title += StringUtils::Format(" ({})", sLanguage);
1042
1043         // filter for dupes from naughty scrapers
1044         if (stsDupeCheck.insert(scurlMovie.GetFirstThumbUrl() + " " + title).second)
1045         {
1046           scurlMovie.SetTitle(title);
1047           vcscurl.push_back(scurlMovie);
1048         }
1049       }
1050     }
1051   }
1052
1053   if (!fResults)
1054     throw CScraperError(); // scraper aborted
1055
1056   if (fSort)
1057     std::stable_sort(vcscurl.begin(), vcscurl.end(), RelevanceSortFunction);
1058
1059   return vcscurl;
1060 }
1061
1062 // find album by artist, using fcurl for web fetches
1063 // returns a list of albums (empty if no match or failure)
1064 std::vector<CMusicAlbumInfo> CScraper::FindAlbum(CCurlFile &fcurl,
1065                                                  const std::string &sAlbum,
1066                                                  const std::string &sArtist)
1067 {
1068   CLog::Log(LOGDEBUG,
1069             "{}: Searching for '{} - {}' using {} scraper "
1070             "(path: '{}', content: '{}', version: '{}')",
1071             __FUNCTION__, sArtist, sAlbum, Name(), Path(), ADDON::TranslateContent(Content()),
1072             Version().asString());
1073
1074   std::vector<CMusicAlbumInfo> vcali;
1075   if (IsNoop())
1076     return vcali;
1077
1078   if (m_isPython)
1079     return PythonFind<CMusicAlbumInfo>(ID(),
1080       {{"title", sAlbum}, {"artist", sArtist}, {"pathSettings", GetPathSettingsAsJSON()}});
1081
1082   // scraper function is given the album and artist as parameters and
1083   // returns an XML <url> element parseable by CScraperUrl
1084   std::vector<std::string> extras(2);
1085   g_charsetConverter.utf8To(SearchStringEncoding(), sAlbum, extras[0]);
1086   g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[1]);
1087   extras[0] = CURL::Encode(extras[0]);
1088   extras[1] = CURL::Encode(extras[1]);
1089   CScraperUrl scurl;
1090   std::vector<std::string> vcsOut = RunNoThrow("CreateAlbumSearchUrl", scurl, fcurl, &extras);
1091   if (vcsOut.size() > 1)
1092     CLog::Log(LOGWARNING, "{}: scraper returned multiple results; using first", __FUNCTION__);
1093
1094   if (vcsOut.empty() || vcsOut[0].empty())
1095     return vcali;
1096   scurl.ParseFromData(vcsOut[0]);
1097
1098   // the next function is passed the contents of the returned URL, and returns
1099   // an empty string on failure; on success, returns XML matches in the form:
1100   // <results>
1101   //  <entity>
1102   //   <title>...</title>
1103   //   <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
1104   //   <artist>...</artist>
1105   //   <year>...</year>
1106   //   <relevance [scale="..."]>...</relevance> (scale defaults to 1; score is divided by it)
1107   //  </entity>
1108   //  ...
1109   // </results>
1110   vcsOut = RunNoThrow("GetAlbumSearchResults", scurl, fcurl);
1111
1112   // parse the returned XML into a vector of album objects
1113   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1114   {
1115     CXBMCTinyXML doc;
1116     doc.Parse(*i, TIXML_ENCODING_UTF8);
1117     TiXmlHandle xhDoc(&doc);
1118
1119     for (TiXmlElement *pxeAlbum = xhDoc.FirstChild("results").FirstChild("entity").Element();
1120          pxeAlbum; pxeAlbum = pxeAlbum->NextSiblingElement())
1121     {
1122       std::string sTitle;
1123       if (XMLUtils::GetString(pxeAlbum, "title", sTitle) && !sTitle.empty())
1124       {
1125         std::string sArtist;
1126         std::string sAlbumName;
1127         if (XMLUtils::GetString(pxeAlbum, "artist", sArtist) && !sArtist.empty())
1128           sAlbumName = StringUtils::Format("{} - {}", sArtist, sTitle);
1129         else
1130           sAlbumName = sTitle;
1131
1132         std::string sYear;
1133         if (XMLUtils::GetString(pxeAlbum, "year", sYear) && !sYear.empty())
1134           sAlbumName = StringUtils::Format("{} ({})", sAlbumName, sYear);
1135
1136         // if no URL is provided, use the URL we got back from CreateAlbumSearchUrl
1137         // (e.g., in case we only got one result back and were sent to the detail page)
1138         TiXmlElement *pxeLink = pxeAlbum->FirstChildElement("url");
1139         CScraperUrl scurlAlbum;
1140         if (!pxeLink)
1141           scurlAlbum.ParseFromData(scurl.GetData());
1142         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1143           scurlAlbum.ParseAndAppendUrl(pxeLink);
1144
1145         if (!scurlAlbum.HasUrls())
1146           continue;
1147
1148         CMusicAlbumInfo ali(sTitle, sArtist, sAlbumName, scurlAlbum);
1149
1150         TiXmlElement *pxeRel = pxeAlbum->FirstChildElement("relevance");
1151         if (pxeRel && pxeRel->FirstChild())
1152         {
1153           const char *szScale = pxeRel->Attribute("scale");
1154           float flScale = szScale ? float(atof(szScale)) : 1;
1155           ali.SetRelevance(float(atof(pxeRel->FirstChild()->Value())) / flScale);
1156         }
1157
1158         vcali.push_back(ali);
1159       }
1160     }
1161   }
1162   return vcali;
1163 }
1164
1165 // find artist, using fcurl for web fetches
1166 // returns a list of artists (empty if no match or failure)
1167 std::vector<CMusicArtistInfo> CScraper::FindArtist(CCurlFile &fcurl, const std::string &sArtist)
1168 {
1169   CLog::Log(LOGDEBUG,
1170             "{}: Searching for '{}' using {} scraper "
1171             "(file: '{}', content: '{}', version: '{}')",
1172             __FUNCTION__, sArtist, Name(), Path(), ADDON::TranslateContent(Content()),
1173             Version().asString());
1174
1175   std::vector<CMusicArtistInfo> vcari;
1176   if (IsNoop())
1177     return vcari;
1178
1179   if (m_isPython)
1180     return PythonFind<CMusicArtistInfo>(ID(),
1181       {{"artist", sArtist}, {"pathSettings", GetPathSettingsAsJSON()}});
1182
1183   // scraper function is given the artist as parameter and
1184   // returns an XML <url> element parseable by CScraperUrl
1185   std::vector<std::string> extras(1);
1186   g_charsetConverter.utf8To(SearchStringEncoding(), sArtist, extras[0]);
1187   extras[0] = CURL::Encode(extras[0]);
1188   CScraperUrl scurl;
1189   std::vector<std::string> vcsOut = RunNoThrow("CreateArtistSearchUrl", scurl, fcurl, &extras);
1190
1191   if (vcsOut.empty() || vcsOut[0].empty())
1192     return vcari;
1193   scurl.ParseFromData(vcsOut[0]);
1194
1195   // the next function is passed the contents of the returned URL, and returns
1196   // an empty string on failure; on success, returns XML matches in the form:
1197   // <results>
1198   //  <entity>
1199   //   <title>...</title>
1200   //   <year>...</year>
1201   //   <genre>...</genre>
1202   //   <disambiguation>...</disambiguation>
1203   //   <url>...</url> (with the usual CScraperUrl decorations like post or spoof)
1204   //  </entity>
1205   //  ...
1206   // </results>
1207   vcsOut = RunNoThrow("GetArtistSearchResults", scurl, fcurl);
1208
1209   // parse the returned XML into a vector of artist objects
1210   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1211   {
1212     CXBMCTinyXML doc;
1213     doc.Parse(*i, TIXML_ENCODING_UTF8);
1214     if (!doc.RootElement())
1215     {
1216       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
1217       return vcari;
1218     }
1219     TiXmlHandle xhDoc(&doc);
1220     for (TiXmlElement *pxeArtist = xhDoc.FirstChild("results").FirstChild("entity").Element();
1221          pxeArtist; pxeArtist = pxeArtist->NextSiblingElement())
1222     {
1223       TiXmlNode *pxnTitle = pxeArtist->FirstChild("title");
1224       if (pxnTitle && pxnTitle->FirstChild())
1225       {
1226         CScraperUrl scurlArtist;
1227
1228         TiXmlElement *pxeLink = pxeArtist->FirstChildElement("url");
1229         if (!pxeLink)
1230           scurlArtist.ParseFromData(scurl.GetData());
1231         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1232           scurlArtist.ParseAndAppendUrl(pxeLink);
1233
1234         if (!scurlArtist.HasUrls())
1235           continue;
1236
1237         CMusicArtistInfo ari(pxnTitle->FirstChild()->Value(), scurlArtist);
1238         std::string genre;
1239         XMLUtils::GetString(pxeArtist, "genre", genre);
1240         if (!genre.empty())
1241           ari.GetArtist().genre =
1242               StringUtils::Split(genre, CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_musicItemSeparator);
1243         XMLUtils::GetString(pxeArtist, "disambiguation", ari.GetArtist().strDisambiguation);
1244         XMLUtils::GetString(pxeArtist, "year", ari.GetArtist().strBorn);
1245
1246         vcari.push_back(ari);
1247       }
1248     }
1249   }
1250   return vcari;
1251 }
1252
1253 // fetch list of episodes from URL (from video database)
1254 VIDEO::EPISODELIST CScraper::GetEpisodeList(XFILE::CCurlFile& fcurl, const CScraperUrl& scurl)
1255 {
1256   VIDEO::EPISODELIST vcep;
1257   if (!scurl.HasUrls())
1258     return vcep;
1259
1260   CLog::Log(LOGDEBUG,
1261             "{}: Searching '{}' using {} scraper "
1262             "(file: '{}', content: '{}', version: '{}')",
1263             __FUNCTION__, scurl.GetFirstThumbUrl(), Name(), Path(),
1264             ADDON::TranslateContent(Content()), Version().asString());
1265
1266   if (m_isPython)
1267   {
1268     std::stringstream str;
1269     str << "plugin://" << ID()
1270         << "?action=getepisodelist&url=" << CURL::Encode(scurl.GetFirstThumbUrl())
1271         << "&pathSettings=" << CURL::Encode(GetPathSettingsAsJSON());
1272
1273     CFileItemList items;
1274     if (!XFILE::CDirectory::GetDirectory(str.str(), items, "", DIR_FLAG_DEFAULTS))
1275       return vcep;
1276
1277     for (int i = 0; i < items.Size(); ++i)
1278     {
1279       VIDEO::EPISODE ep;
1280       const auto& tag = *items[i]->GetVideoInfoTag();
1281       ep.strTitle = tag.m_strTitle;
1282       ep.iSeason = tag.m_iSeason;
1283       ep.iEpisode = tag.m_iEpisode;
1284       ep.cDate = tag.m_firstAired;
1285       ep.iSubepisode = items[i]->GetProperty("video.sub_episode").asInteger();
1286       CScraperUrl::SUrlEntry surl;
1287       surl.m_type = CScraperUrl::UrlType::General;
1288       surl.m_url = items[i]->GetURL().Get();
1289       ep.cScraperUrl.AppendUrl(surl);
1290       vcep.push_back(ep);
1291     }
1292
1293     return vcep;
1294   }
1295
1296   std::vector<std::string> vcsIn;
1297   vcsIn.push_back(scurl.GetFirstThumbUrl());
1298   std::vector<std::string> vcsOut = RunNoThrow("GetEpisodeList", scurl, fcurl, &vcsIn);
1299
1300   // parse the XML response
1301   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1302   {
1303     CXBMCTinyXML doc;
1304     doc.Parse(*i);
1305     if (!doc.RootElement())
1306     {
1307       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
1308       continue;
1309     }
1310
1311     TiXmlHandle xhDoc(&doc);
1312     for (TiXmlElement *pxeMovie = xhDoc.FirstChild("episodeguide").FirstChild("episode").Element();
1313          pxeMovie; pxeMovie = pxeMovie->NextSiblingElement())
1314     {
1315       VIDEO::EPISODE ep;
1316       TiXmlElement *pxeLink = pxeMovie->FirstChildElement("url");
1317       std::string strEpNum;
1318       if (pxeLink && XMLUtils::GetInt(pxeMovie, "season", ep.iSeason) &&
1319           XMLUtils::GetString(pxeMovie, "epnum", strEpNum) && !strEpNum.empty())
1320       {
1321         CScraperUrl &scurlEp(ep.cScraperUrl);
1322         size_t dot = strEpNum.find('.');
1323         ep.iEpisode = atoi(strEpNum.c_str());
1324         ep.iSubepisode = (dot != std::string::npos) ? atoi(strEpNum.substr(dot + 1).c_str()) : 0;
1325         std::string title;
1326         if (!XMLUtils::GetString(pxeMovie, "title", title) || title.empty())
1327           title = g_localizeStrings.Get(10005); // Not available
1328         scurlEp.SetTitle(title);
1329         std::string id;
1330         if (XMLUtils::GetString(pxeMovie, "id", id))
1331           scurlEp.SetId(id);
1332
1333         for (; pxeLink && pxeLink->FirstChild(); pxeLink = pxeLink->NextSiblingElement("url"))
1334           scurlEp.ParseAndAppendUrl(pxeLink);
1335
1336         // date must be the format of yyyy-mm-dd
1337         ep.cDate.SetValid(false);
1338         std::string sDate;
1339         if (XMLUtils::GetString(pxeMovie, "aired", sDate) && sDate.length() == 10)
1340         {
1341           tm tm;
1342           if (strptime(sDate.c_str(), "%Y-%m-%d", &tm))
1343             ep.cDate.SetDate(1900 + tm.tm_year, tm.tm_mon + 1, tm.tm_mday);
1344         }
1345         vcep.push_back(ep);
1346       }
1347     }
1348   }
1349
1350   return vcep;
1351 }
1352
1353 // takes URL; returns true and populates video details on success, false otherwise
1354 bool CScraper::GetVideoDetails(XFILE::CCurlFile& fcurl,
1355                                const std::unordered_map<std::string, std::string>& uniqueIDs,
1356                                const CScraperUrl& scurl,
1357                                bool fMovie /*else episode*/,
1358                                CVideoInfoTag& video)
1359 {
1360   CLog::Log(LOGDEBUG,
1361             "{}: Reading {} '{}' using {} scraper "
1362             "(file: '{}', content: '{}', version: '{}')",
1363             __FUNCTION__, fMovie ? MediaTypeMovie : MediaTypeEpisode, scurl.GetFirstThumbUrl(),
1364             Name(), Path(), ADDON::TranslateContent(Content()), Version().asString());
1365
1366   video.Reset();
1367
1368   if (m_isPython)
1369     return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1370                          fMovie ? "getdetails" : "getepisodedetails", GetPathSettingsAsJSON(),
1371                          uniqueIDs, video);
1372
1373   std::string sFunc = fMovie ? "GetDetails" : "GetEpisodeDetails";
1374   std::vector<std::string> vcsIn;
1375   vcsIn.push_back(scurl.GetId());
1376   vcsIn.push_back(scurl.GetFirstThumbUrl());
1377   std::vector<std::string> vcsOut = RunNoThrow(sFunc, scurl, fcurl, &vcsIn);
1378
1379   // parse XML output
1380   bool fRet(false);
1381   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1382   {
1383     CXBMCTinyXML doc;
1384     doc.Parse(*i, TIXML_ENCODING_UTF8);
1385     if (!doc.RootElement())
1386     {
1387       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
1388       continue;
1389     }
1390
1391     TiXmlHandle xhDoc(&doc);
1392     TiXmlElement *pxeDetails = xhDoc.FirstChild("details").Element();
1393     if (!pxeDetails)
1394     {
1395       CLog::Log(LOGERROR, "{}: Invalid XML file (want <details>)", __FUNCTION__);
1396       continue;
1397     }
1398     video.Load(pxeDetails, true /*fChain*/);
1399     fRet = true; // but don't exit in case of chaining
1400   }
1401   return fRet;
1402 }
1403
1404 // takes a URL; returns true and populates album on success, false otherwise
1405 bool CScraper::GetAlbumDetails(CCurlFile &fcurl, const CScraperUrl &scurl, CAlbum &album)
1406 {
1407   CLog::Log(LOGDEBUG,
1408             "{}: Reading '{}' using {} scraper "
1409             "(file: '{}', content: '{}', version: '{}')",
1410             __FUNCTION__, scurl.GetFirstThumbUrl(), Name(), Path(),
1411             ADDON::TranslateContent(Content()), Version().asString());
1412
1413   if (m_isPython)
1414     return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1415       "getdetails", GetPathSettingsAsJSON(), album);
1416
1417   std::vector<std::string> vcsOut = RunNoThrow("GetAlbumDetails", scurl, fcurl);
1418
1419   // parse the returned XML into an album object (see CAlbum::Load for details)
1420   bool fRet(false);
1421   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1422   {
1423     CXBMCTinyXML doc;
1424     doc.Parse(*i, TIXML_ENCODING_UTF8);
1425     if (!doc.RootElement())
1426     {
1427       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
1428       return false;
1429     }
1430     fRet = album.Load(doc.RootElement(), i != vcsOut.begin());
1431   }
1432   return fRet;
1433 }
1434
1435 // takes a URL (one returned from FindArtist), the original search string, and
1436 // returns true and populates artist on success, false on failure
1437 bool CScraper::GetArtistDetails(CCurlFile &fcurl,
1438                                 const CScraperUrl &scurl,
1439                                 const std::string &sSearch,
1440                                 CArtist &artist)
1441 {
1442   if (!scurl.HasUrls())
1443     return false;
1444
1445   CLog::Log(LOGDEBUG,
1446             "{}: Reading '{}' ('{}') using {} scraper "
1447             "(file: '{}', content: '{}', version: '{}')",
1448             __FUNCTION__, scurl.GetFirstThumbUrl(), sSearch, Name(), Path(),
1449             ADDON::TranslateContent(Content()), Version().asString());
1450
1451   if (m_isPython)
1452     return PythonDetails(ID(), "url", scurl.GetFirstThumbUrl(),
1453       "getdetails", GetPathSettingsAsJSON(), artist);
1454
1455   // pass in the original search string for chaining to search other sites
1456   std::vector<std::string> vcIn;
1457   vcIn.push_back(sSearch);
1458   vcIn[0] = CURL::Encode(vcIn[0]);
1459
1460   std::vector<std::string> vcsOut = RunNoThrow("GetArtistDetails", scurl, fcurl, &vcIn);
1461
1462   // ok, now parse the xml file
1463   bool fRet(false);
1464   for (std::vector<std::string>::const_iterator i = vcsOut.begin(); i != vcsOut.end(); ++i)
1465   {
1466     CXBMCTinyXML doc;
1467     doc.Parse(*i, TIXML_ENCODING_UTF8);
1468     if (!doc.RootElement())
1469     {
1470       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
1471       return false;
1472     }
1473
1474     fRet = artist.Load(doc.RootElement(), i != vcsOut.begin());
1475   }
1476   return fRet;
1477 }
1478
1479 bool CScraper::GetArtwork(XFILE::CCurlFile &fcurl, CVideoInfoTag &details)
1480 {
1481   if (!details.HasUniqueID())
1482     return false;
1483
1484   CLog::Log(LOGDEBUG,
1485             "{}: Reading artwork for '{}' using {} scraper "
1486             "(file: '{}', content: '{}', version: '{}')",
1487             __FUNCTION__, details.GetUniqueID(), Name(), Path(), ADDON::TranslateContent(Content()),
1488             Version().asString());
1489
1490   if (m_isPython)
1491     return PythonDetails(ID(), "id", details.GetUniqueID(),
1492       "getartwork", GetPathSettingsAsJSON(), details);
1493
1494   std::vector<std::string> vcsIn;
1495   CScraperUrl scurl;
1496   vcsIn.push_back(details.GetUniqueID());
1497   std::vector<std::string> vcsOut = RunNoThrow("GetArt", scurl, fcurl, &vcsIn);
1498
1499   bool fRet(false);
1500   for (std::vector<std::string>::const_iterator it = vcsOut.begin(); it != vcsOut.end(); ++it)
1501   {
1502     CXBMCTinyXML doc;
1503     doc.Parse(*it, TIXML_ENCODING_UTF8);
1504     if (!doc.RootElement())
1505     {
1506       CLog::Log(LOGERROR, "{}: Unable to parse XML", __FUNCTION__);
1507       return false;
1508     }
1509     fRet = details.Load(doc.RootElement(), it != vcsOut.begin());
1510   }
1511   return fRet;
1512 }
1513 }