src/torrent_info.cpp

   1 /*
   2
   3 Copyright (c) 2003-2008, Arvid Norberg
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions
   8 are met:
   9
  10     * Redistributions of source code must retain the above copyright
  11       notice, this list of conditions and the following disclaimer.
  12     * Redistributions in binary form must reproduce the above copyright
  13       notice, this list of conditions and the following disclaimer in
  14       the documentation and/or other materials provided with the distribution.
  15     * Neither the name of the author nor the names of its
  16       contributors may be used to endorse or promote products derived
  17       from this software without specific prior written permission.
  18
  19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29 POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 #include "libtorrent/pch.hpp"
  34
  35 #include <ctime>
  36 #include <iostream>
  37 #include <fstream>
  38 #include <iomanip>
  39 #include <iterator>
  40 #include <algorithm>
  41 #include <set>
  42
  43 #ifdef _MSC_VER
  44 #pragma warning(push, 1)
  45 #endif
  46
  47 #include <boost/lexical_cast.hpp>
  48 #include <boost/filesystem/path.hpp>
  49 #include <boost/filesystem.hpp>
  50 #include <boost/bind.hpp>
  51
  52 #ifdef _MSC_VER
  53 #pragma warning(pop)
  54 #endif
  55
  56 #include "libtorrent/torrent_info.hpp"
  57 #include "libtorrent/bencode.hpp"
  58 #include "libtorrent/hasher.hpp"
  59 #include "libtorrent/entry.hpp"
  60 #include "libtorrent/file.hpp"
  61
  62 namespace gr = boost::gregorian;
  63
  64 using namespace libtorrent;
  65
  66 namespace
  67 {
  68
  69         namespace fs = boost::filesystem;
  70
  71         void convert_to_utf8(std::string& str, unsigned char chr)
  72         {
  73                 str += 0xc0 | ((chr & 0xff) >> 6);
  74                 str += 0x80 | (chr & 0x3f);
  75         }
  76
  77         void verify_encoding(file_entry& target)
  78         {
  79                 std::string tmp_path;
  80                 std::string file_path = target.path.string();
  81                 bool valid_encoding = true;
  82                 for (std::string::iterator i = file_path.begin()
  83                         , end(file_path.end()); i != end; ++i)
  84                 {
  85                         // valid ascii-character
  86                         if ((*i & 0x80) == 0)
  87                         {
  88                                 tmp_path += *i;
  89                                 continue;
  90                         }
  91
  92                         if (std::distance(i, end) < 2)
  93                         {
  94                                 convert_to_utf8(tmp_path, *i);
  95                                 valid_encoding = false;
  96                                 continue;
  97                         }
  98
  99                         // valid 2-byte utf-8 character
 100                         if ((i[0] & 0xe0) == 0xc0
 101                                 && (i[1] & 0xc0) == 0x80)
 102                         {
 103                                 tmp_path += i[0];
 104                                 tmp_path += i[1];
 105                                 i += 1;
 106                                 continue;
 107                         }
 108
 109                         if (std::distance(i, end) < 3)
 110                         {
 111                                 convert_to_utf8(tmp_path, *i);
 112                                 valid_encoding = false;
 113                                 continue;
 114                         }
 115
 116                         // valid 3-byte utf-8 character
 117                         if ((i[0] & 0xf0) == 0xe0
 118                                 && (i[1] & 0xc0) == 0x80
 119                                 && (i[2] & 0xc0) == 0x80)
 120                         {
 121                                 tmp_path += i[0];
 122                                 tmp_path += i[1];
 123                                 tmp_path += i[2];
 124                                 i += 2;
 125                                 continue;
 126                         }
 127
 128                         if (std::distance(i, end) < 4)
 129                         {
 130                                 convert_to_utf8(tmp_path, *i);
 131                                 valid_encoding = false;
 132                                 continue;
 133                         }
 134
 135                         // valid 4-byte utf-8 character
 136                         if ((i[0] & 0xf0) == 0xe0
 137                                 && (i[1] & 0xc0) == 0x80
 138                                 && (i[2] & 0xc0) == 0x80
 139                                 && (i[3] & 0xc0) == 0x80)
 140                         {
 141                                 tmp_path += i[0];
 142                                 tmp_path += i[1];
 143                                 tmp_path += i[2];
 144                                 tmp_path += i[3];
 145                                 i += 3;
 146                                 continue;
 147                         }
 148
 149                         convert_to_utf8(tmp_path, *i);
 150                         valid_encoding = false;
 151                 }
 152                 // the encoding was not valid utf-8
 153                 // save the original encoding and replace the
 154                 // commonly used path with the correctly
 155                 // encoded string
 156                 if (!valid_encoding) target.path = tmp_path;
 157         }
 158
 159         bool extract_single_file(lazy_entry const& dict, file_entry& target
 160                 , std::string const& root_dir)
 161         {
 162                 lazy_entry const* length = dict.dict_find("length");
 163                 if (length == 0 || length->type() != lazy_entry::int_t)
 164                         return false;
 165                 target.size = length->int_value();
 166                 target.path = root_dir;
 167                 target.file_base = 0;
 168
 169                 // prefer the name.utf-8
 170                 // because if it exists, it is more
 171                 // likely to be correctly encoded
 172
 173                 lazy_entry const* p = dict.dict_find("path.utf-8");
 174                 if (p == 0 || p->type() != lazy_entry::list_t)
 175                         p = dict.dict_find("path");
 176                 if (p == 0 || p->type() != lazy_entry::list_t)
 177                         return false;
 178
 179                 for (int i = 0, end(p->list_size()); i < end; ++i)
 180                 {
 181                         if (p->list_at(i)->type() != lazy_entry::string_t)
 182                                 return false;
 183                         std::string path_element = p->list_at(i)->string_value();
 184                         if (path_element != "..")
 185                                 target.path /= path_element;
 186                 }
 187                 verify_encoding(target);
 188                 if (target.path.is_complete())
 189                         return false;
 190                 return true;
 191         }
 192
 193         bool extract_files(lazy_entry const& list, file_storage& target
 194                 , std::string const& root_dir)
 195         {
 196                 if (list.type() != lazy_entry::list_t) return false;
 197                 for (int i = 0, end(list.list_size()); i < end; ++i)
 198                 {
 199                         file_entry e;
 200                         if (!extract_single_file(*list.list_at(i), e, root_dir))
 201                                 return false;
 202                         target.add_file(e);
 203                 }
 204                 return true;
 205         }
 206 }
 207
 208 namespace libtorrent
 209 {
 210
 211         int load_file(fs::path const& filename, std::vector<char>& v)
 212         {
 213                 file f;
 214                 error_code ec;
 215                 if (!f.open(filename, file::in, ec)) return -1;
 216                 f.seek(0, file::end, ec);
 217                 if (ec) return -1;
 218                 size_type s = f.tell(ec);
 219                 if (ec) return -1;
 220                 if (s > 5000000) return -2;
 221                 v.resize(s);
 222                 f.seek(0, file::begin, ec);
 223                 if (ec) return -1;
 224                 size_type read = f.read(&v[0], s, ec);
 225                 if (read != s) return -3;
 226                 if (ec) return -3;
 227                 return 0;
 228         }
 229
 230         // standard constructor that parses a torrent file
 231         torrent_info::torrent_info(entry const& torrent_file)
 232                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 233                 , m_multifile(false)
 234                 , m_private(false)
 235                 , m_info_section_size(0)
 236                 , m_piece_hashes(0)
 237         {
 238                 std::vector<char> tmp;
 239                 std::back_insert_iterator<std::vector<char> > out(tmp);
 240                 bencode(out, torrent_file);
 241
 242                 lazy_entry e;
 243                 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
 244                 std::string error;
 245 #ifndef BOOST_NO_EXCEPTIONS
 246                 if (!parse_torrent_file(e, error))
 247                         throw invalid_torrent_file();
 248 #else
 249                 parse_torrent_file(e, error);
 250 #endif
 251         }
 252
 253         torrent_info::torrent_info(lazy_entry const& torrent_file)
 254                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 255                 , m_multifile(false)
 256                 , m_private(false)
 257                 , m_info_section_size(0)
 258                 , m_piece_hashes(0)
 259         {
 260                 std::string error;
 261 #ifndef BOOST_NO_EXCEPTIONS
 262                 if (!parse_torrent_file(torrent_file, error))
 263                         throw invalid_torrent_file();
 264 #else
 265                 parse_torrent_file(torrent_file, error);
 266 #endif
 267         }
 268
 269         torrent_info::torrent_info(char const* buffer, int size)
 270                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 271                 , m_multifile(false)
 272                 , m_private(false)
 273                 , m_info_section_size(0)
 274                 , m_piece_hashes(0)
 275         {
 276                 std::string error;
 277                 lazy_entry e;
 278                 lazy_bdecode(buffer, buffer + size, e);
 279 #ifndef BOOST_NO_EXCEPTIONS
 280                 if (!parse_torrent_file(e, error))
 281                         throw invalid_torrent_file();
 282 #else
 283                 parse_torrent_file(e, error);
 284 #endif
 285         }
 286
 287         // constructor used for creating new torrents
 288         // will not contain any hashes, comments, creation date
 289         // just the necessary to use it with piece manager
 290         // used for torrents with no metadata
 291         torrent_info::torrent_info(sha1_hash const& info_hash)
 292                 : m_info_hash(info_hash)
 293                 , m_creation_date(pt::second_clock::universal_time())
 294                 , m_multifile(false)
 295                 , m_private(false)
 296                 , m_info_section_size(0)
 297                 , m_piece_hashes(0)
 298         {}
 299
 300         torrent_info::torrent_info(fs::path const& filename)
 301                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 302                 , m_multifile(false)
 303                 , m_private(false)
 304         {
 305                 std::vector<char> buf;
 306                 int ret = load_file(filename, buf);
 307                 if (ret < 0) return;
 308
 309                 lazy_entry e;
 310                 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
 311                 std::string error;
 312 #ifndef BOOST_NO_EXCEPTIONS
 313                 if (!parse_torrent_file(e, error))
 314                         throw invalid_torrent_file();
 315 #else
 316                 parse_torrent_file(e, error);
 317 #endif
 318         }
 319
 320         torrent_info::~torrent_info()
 321         {}
 322
 323         void torrent_info::swap(torrent_info& ti)
 324         {
 325                 using std::swap;
 326                 m_urls.swap(ti.m_urls);
 327                 m_url_seeds.swap(ti.m_url_seeds);
 328                 m_files.swap(ti.m_files);
 329                 m_nodes.swap(ti.m_nodes);
 330                 swap(m_info_hash, ti.m_info_hash);
 331                 swap(m_creation_date, ti.m_creation_date);
 332                 m_comment.swap(ti.m_comment);
 333                 m_created_by.swap(ti.m_created_by);
 334                 swap(m_multifile, ti.m_multifile);
 335                 swap(m_private, ti.m_private);
 336                 swap(m_info_section, ti.m_info_section);
 337                 swap(m_info_section_size, ti.m_info_section_size);
 338                 swap(m_piece_hashes, ti.m_piece_hashes);
 339                 swap(m_info_dict, ti.m_info_dict);
 340         }
 341
 342         bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
 343         {
 344                 if (info.type() != lazy_entry::dict_t)
 345                 {
 346                         error = "'info' entry is not a dictionary";
 347                         return false;
 348                 }
 349
 350                 // hash the info-field to calculate info-hash
 351                 hasher h;
 352                 std::pair<char const*, int> section = info.data_section();
 353                 h.update(section.first, section.second);
 354                 m_info_hash = h.final();
 355
 356                 // copy the info section
 357                 m_info_section_size = section.second;
 358                 m_info_section.reset(new char[m_info_section_size]);
 359                 memcpy(m_info_section.get(), section.first, m_info_section_size);
 360                 TORRENT_ASSERT(section.first[0] == 'd');
 361                 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
 362
 363                 // extract piece length
 364                 int piece_length = info.dict_find_int_value("piece length", -1);
 365                 if (piece_length <= 0)
 366                 {
 367                         error = "invalid or missing 'piece length' entry in torrent file";
 368                         return false;
 369                 }
 370                 m_files.set_piece_length(piece_length);
 371
 372                 // extract file name (or the directory name if it's a multifile libtorrent)
 373                 std::string name = info.dict_find_string_value("name.utf-8");
 374                 if (name.empty()) name = info.dict_find_string_value("name");
 375                 if (name.empty())
 376                 {
 377                         error = "missing name in torrent file";
 378                         return false;
 379                 }
 380
 381                 fs::path tmp = name;
 382                 if (tmp.is_complete())
 383                 {
 384                         name = tmp.leaf();
 385                 }
 386                 else if (tmp.has_branch_path())
 387                 {
 388                         fs::path p;
 389                         for (fs::path::iterator i = tmp.begin()
 390                                 , end(tmp.end()); i != end; ++i)
 391                         {
 392                                 if (*i == "." || *i == "..") continue;
 393                                 p /= *i;
 394                         }
 395                         name = p.string();
 396                 }
 397                 if (name == ".." || name == ".")
 398                 {
 399                         error = "invalid 'name' of torrent (possible exploit attempt)";
 400                         return false;
 401                 }
 402
 403                 // extract file list
 404                 lazy_entry const* i = info.dict_find_list("files");
 405                 if (i == 0)
 406                 {
 407                         // if there's no list of files, there has to be a length
 408                         // field.
 409                         file_entry e;
 410                         e.path = name;
 411                         e.offset = 0;
 412                         e.size = info.dict_find_int_value("length", -1);
 413                         if (e.size < 0)
 414                         {
 415                                 error = "invalid length of torrent";
 416                                 return false;
 417                         }
 418                         m_files.add_file(e);
 419                         m_multifile = false;
 420                 }
 421                 else
 422                 {
 423                         if (!extract_files(*i, m_files, name))
 424                         {
 425                                 error = "failed to parse files from torrent file";
 426                                 return false;
 427                         }
 428                         m_multifile = true;
 429                 }
 430                 m_files.set_name(name);
 431
 432                 // extract sha-1 hashes for all pieces
 433                 // we want this division to round upwards, that's why we have the
 434                 // extra addition
 435
 436                 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
 437                         / m_files.piece_length()));
 438
 439                 lazy_entry const* pieces = info.dict_find("pieces");
 440                 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
 441                 {
 442                         error = "invalid or missing 'pieces' entry in torrent file";
 443                         return false;
 444                 }
 445
 446                 if (pieces->string_length() != m_files.num_pieces() * 20)
 447                 {
 448                         error = "incorrect number of piece hashes in torrent file";
 449                         return false;
 450                 }
 451
 452                 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
 453                 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
 454                 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
 455
 456                 m_private = info.dict_find_int_value("private", 0);
 457                 return true;
 458         }
 459
 460         bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
 461         {
 462                 if (torrent_file.type() != lazy_entry::dict_t)
 463                 {
 464                         error = "torrent file is not a dictionary";
 465                         return false;
 466                 }
 467
 468                 // extract the url of the tracker
 469                 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
 470                 if (i)
 471                 {
 472                         m_urls.reserve(i->list_size());
 473                         for (int j = 0, end(i->list_size()); j < end; ++j)
 474                         {
 475                                 lazy_entry const* tier = i->list_at(j);
 476                                 if (tier->type() != lazy_entry::list_t) continue;
 477                                 for (int k = 0, end(tier->list_size()); k < end; ++k)
 478                                 {
 479                                         announce_entry e(tier->list_string_value_at(k));
 480                                         if (e.url.empty()) continue;
 481                                         e.tier = j;
 482                                         m_urls.push_back(e);
 483                                 }
 484                         }
 485
 486                         // shuffle each tier
 487                         std::vector<announce_entry>::iterator start = m_urls.begin();
 488                         std::vector<announce_entry>::iterator stop;
 489                         int current_tier = m_urls.front().tier;
 490                         for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
 491                         {
 492                                 if (stop->tier != current_tier)
 493                                 {
 494                                         std::random_shuffle(start, stop);
 495                                         start = stop;
 496                                         current_tier = stop->tier;
 497                                 }
 498                         }
 499                         std::random_shuffle(start, stop);
 500                 }
 501
 502
 503                 if (m_urls.empty())
 504                 {
 505                         announce_entry e(torrent_file.dict_find_string_value("announce"));
 506                         if (!e.url.empty()) m_urls.push_back(e);
 507                 }
 508
 509                 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
 510                 if (nodes)
 511                 {
 512                         for (int i = 0, end(nodes->list_size()); i < end; ++i)
 513                         {
 514                                 lazy_entry const* n = nodes->list_at(i);
 515                                 if (n->type() != lazy_entry::list_t
 516                                         || n->list_size() < 2
 517                                         || n->list_at(0)->type() != lazy_entry::string_t
 518                                         || n->list_at(1)->type() != lazy_entry::int_t)
 519                                         continue;
 520                                 m_nodes.push_back(std::make_pair(
 521                                         n->list_at(0)->string_value()
 522                                         , int(n->list_at(1)->int_value())));
 523                         }
 524                 }
 525
 526                 // extract creation date
 527                 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
 528                 if (cd >= 0)
 529                 {
 530                         m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
 531                                 + pt::seconds(long(cd));
 532                 }
 533
 534                 // if there are any url-seeds, extract them
 535                 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
 536                 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
 537                 {
 538                         m_url_seeds.push_back(url_seeds->string_value());
 539                 }
 540                 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
 541                 {
 542                         for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
 543                         {
 544                                 lazy_entry const* url = url_seeds->list_at(i);
 545                                 if (url->type() != lazy_entry::string_t) continue;
 546                                 m_url_seeds.push_back(url->string_value());
 547                         }
 548                 }
 549
 550                 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
 551                 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
 552
 553                 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
 554                 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
 555
 556                 lazy_entry const* info = torrent_file.dict_find_dict("info");
 557                 if (info == 0)
 558                 {
 559                         error = "missing or invalid 'info' section in torrent file";
 560                         return false;
 561                 }
 562                 return parse_info_section(*info, error);
 563         }
 564
 565         boost::optional<pt::ptime>
 566         torrent_info::creation_date() const
 567         {
 568                 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 569                 {
 570                         return boost::optional<pt::ptime>(m_creation_date);
 571                 }
 572                 return boost::optional<pt::ptime>();
 573         }
 574
 575         void torrent_info::add_tracker(std::string const& url, int tier)
 576         {
 577                 announce_entry e(url);
 578                 e.tier = tier;
 579                 m_urls.push_back(e);
 580
 581                 using boost::bind;
 582                 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
 583                         , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
 584         }
 585
 586 // ------- start deprecation -------
 587
 588         void torrent_info::print(std::ostream& os) const
 589         {
 590                 os << "trackers:\n";
 591                 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
 592                         i != trackers().end(); ++i)
 593                 {
 594                         os << i->tier << ": " << i->url << "\n";
 595                 }
 596                 if (!m_comment.empty())
 597                         os << "comment: " << m_comment << "\n";
 598 //              if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 599 //                      os << "creation date: " << to_simple_string(m_creation_date) << "\n";
 600                 os << "private: " << (m_private?"yes":"no") << "\n";
 601                 os << "number of pieces: " << num_pieces() << "\n";
 602                 os << "piece length: " << piece_length() << "\n";
 603                 os << "files:\n";
 604                 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
 605                         os << "  " << std::setw(11) << i->size << "  " << i->path.string() << "\n";
 606         }
 607
 608 // ------- end deprecation -------
 609
 610 }
 611