src/torrent_info.cpp

   1 /*
   2
   3 Copyright (c) 2003-2008, Arvid Norberg
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions
   8 are met:
   9
  10     * Redistributions of source code must retain the above copyright
  11       notice, this list of conditions and the following disclaimer.
  12     * Redistributions in binary form must reproduce the above copyright
  13       notice, this list of conditions and the following disclaimer in
  14       the documentation and/or other materials provided with the distribution.
  15     * Neither the name of the author nor the names of its
  16       contributors may be used to endorse or promote products derived
  17       from this software without specific prior written permission.
  18
  19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29 POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 #include "libtorrent/pch.hpp"
  34
  35 #include <ctime>
  36 #include <iostream>
  37 #include <fstream>
  38 #include <iomanip>
  39 #include <iterator>
  40 #include <algorithm>
  41 #include <set>
  42
  43 #ifdef _MSC_VER
  44 #pragma warning(push, 1)
  45 #endif
  46
  47 #include <boost/lexical_cast.hpp>
  48 #include <boost/filesystem/path.hpp>
  49 #include <boost/filesystem.hpp>
  50 #include <boost/bind.hpp>
  51
  52 #ifdef _MSC_VER
  53 #pragma warning(pop)
  54 #endif
  55
  56 #include "libtorrent/torrent_info.hpp"
  57 #include "libtorrent/bencode.hpp"
  58 #include "libtorrent/hasher.hpp"
  59 #include "libtorrent/entry.hpp"
  60 #include "libtorrent/file.hpp"
  61
  62 namespace gr = boost::gregorian;
  63
  64 using namespace libtorrent;
  65
  66 namespace
  67 {
  68
  69         namespace fs = boost::filesystem;
  70
  71         void convert_to_utf8(std::string& str, unsigned char chr)
  72         {
  73                 str += 0xc0 | ((chr & 0xff) >> 6);
  74                 str += 0x80 | (chr & 0x3f);
  75         }
  76
  77         void verify_encoding(file_entry& target)
  78         {
  79                 std::string tmp_path;
  80                 std::string file_path = target.path.string();
  81                 bool valid_encoding = true;
  82                 for (std::string::iterator i = file_path.begin()
  83                         , end(file_path.end()); i != end; ++i)
  84                 {
  85                         // valid ascii-character
  86                         if ((*i & 0x80) == 0)
  87                         {
  88                                 tmp_path += *i;
  89                                 continue;
  90                         }
  91
  92                         if (std::distance(i, end) < 2)
  93                         {
  94                                 convert_to_utf8(tmp_path, *i);
  95                                 valid_encoding = false;
  96                                 continue;
  97                         }
  98
  99                         // valid 2-byte utf-8 character
 100                         if ((i[0] & 0xe0) == 0xc0
 101                                 && (i[1] & 0xc0) == 0x80)
 102                         {
 103                                 tmp_path += i[0];
 104                                 tmp_path += i[1];
 105                                 i += 1;
 106                                 continue;
 107                         }
 108
 109                         if (std::distance(i, end) < 3)
 110                         {
 111                                 convert_to_utf8(tmp_path, *i);
 112                                 valid_encoding = false;
 113                                 continue;
 114                         }
 115
 116                         // valid 3-byte utf-8 character
 117                         if ((i[0] & 0xf0) == 0xe0
 118                                 && (i[1] & 0xc0) == 0x80
 119                                 && (i[2] & 0xc0) == 0x80)
 120                         {
 121                                 tmp_path += i[0];
 122                                 tmp_path += i[1];
 123                                 tmp_path += i[2];
 124                                 i += 2;
 125                                 continue;
 126                         }
 127
 128                         if (std::distance(i, end) < 4)
 129                         {
 130                                 convert_to_utf8(tmp_path, *i);
 131                                 valid_encoding = false;
 132                                 continue;
 133                         }
 134
 135                         // valid 4-byte utf-8 character
 136                         if ((i[0] & 0xf0) == 0xe0
 137                                 && (i[1] & 0xc0) == 0x80
 138                                 && (i[2] & 0xc0) == 0x80
 139                                 && (i[3] & 0xc0) == 0x80)
 140                         {
 141                                 tmp_path += i[0];
 142                                 tmp_path += i[1];
 143                                 tmp_path += i[2];
 144                                 tmp_path += i[3];
 145                                 i += 3;
 146                                 continue;
 147                         }
 148
 149                         convert_to_utf8(tmp_path, *i);
 150                         valid_encoding = false;
 151                 }
 152                 // the encoding was not valid utf-8
 153                 // save the original encoding and replace the
 154                 // commonly used path with the correctly
 155                 // encoded string
 156                 if (!valid_encoding) target.path = tmp_path;
 157         }
 158
 159         bool extract_single_file(lazy_entry const& dict, file_entry& target
 160                 , std::string const& root_dir)
 161         {
 162                 lazy_entry const* length = dict.dict_find("length");
 163                 if (length == 0 || length->type() != lazy_entry::int_t)
 164                         return false;
 165                 target.size = length->int_value();
 166                 target.path = root_dir;
 167                 target.file_base = 0;
 168
 169                 // prefer the name.utf-8
 170                 // because if it exists, it is more
 171                 // likely to be correctly encoded
 172
 173                 lazy_entry const* p = dict.dict_find("path.utf-8");
 174                 if (p == 0 || p->type() != lazy_entry::list_t)
 175                         p = dict.dict_find("path");
 176                 if (p == 0 || p->type() != lazy_entry::list_t)
 177                         return false;
 178
 179                 for (int i = 0, end(p->list_size()); i < end; ++i)
 180                 {
 181                         if (p->list_at(i)->type() != lazy_entry::string_t)
 182                                 return false;
 183                         std::string path_element = p->list_at(i)->string_value();
 184                         if (path_element != "..")
 185                                 target.path /= path_element;
 186                 }
 187                 verify_encoding(target);
 188                 if (target.path.is_complete())
 189                         return false;
 190                 return true;
 191         }
 192
 193         bool extract_files(lazy_entry const& list, file_storage& target
 194                 , std::string const& root_dir)
 195         {
 196                 if (list.type() != lazy_entry::list_t) return false;
 197                 for (int i = 0, end(list.list_size()); i < end; ++i)
 198                 {
 199                         file_entry e;
 200                         if (!extract_single_file(*list.list_at(i), e, root_dir))
 201                                 return false;
 202                         target.add_file(e);
 203                 }
 204                 return true;
 205         }
 206 }
 207
 208 namespace libtorrent
 209 {
 210
 211         int load_file(fs::path const& filename, std::vector<char>& v)
 212         {
 213                 file f;
 214                 error_code ec;
 215                 if (!f.open(filename, file::in, ec)) return -1;
 216                 f.seek(0, file::end, ec);
 217                 if (ec) return -1;
 218                 size_type s = f.tell(ec);
 219                 if (ec) return -1;
 220                 if (s > 5000000) return -2;
 221                 v.resize(s);
 222                 f.seek(0, file::begin, ec);
 223                 if (ec) return -1;
 224                 size_type read = f.read(&v[0], s, ec);
 225                 if (read != s) return -3;
 226                 if (ec) return -3;
 227                 return 0;
 228         }
 229
 230 #ifndef TORRENT_NO_DEPRECATE
 231         // standard constructor that parses a torrent file
 232         torrent_info::torrent_info(entry const& torrent_file)
 233                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 234                 , m_multifile(false)
 235                 , m_private(false)
 236                 , m_info_section_size(0)
 237                 , m_piece_hashes(0)
 238         {
 239                 std::vector<char> tmp;
 240                 std::back_insert_iterator<std::vector<char> > out(tmp);
 241                 bencode(out, torrent_file);
 242
 243                 lazy_entry e;
 244                 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
 245                 std::string error;
 246 #ifndef BOOST_NO_EXCEPTIONS
 247                 if (!parse_torrent_file(e, error))
 248                         throw invalid_torrent_file();
 249 #else
 250                 parse_torrent_file(e, error);
 251 #endif
 252         }
 253 #endif
 254
 255         torrent_info::torrent_info(lazy_entry const& torrent_file)
 256                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 257                 , m_multifile(false)
 258                 , m_private(false)
 259                 , m_info_section_size(0)
 260                 , m_piece_hashes(0)
 261         {
 262                 std::string error;
 263 #ifndef BOOST_NO_EXCEPTIONS
 264                 if (!parse_torrent_file(torrent_file, error))
 265                         throw invalid_torrent_file();
 266 #else
 267                 parse_torrent_file(torrent_file, error);
 268 #endif
 269         }
 270
 271         torrent_info::torrent_info(char const* buffer, int size)
 272                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 273                 , m_multifile(false)
 274                 , m_private(false)
 275                 , m_info_section_size(0)
 276                 , m_piece_hashes(0)
 277         {
 278                 std::string error;
 279                 lazy_entry e;
 280                 lazy_bdecode(buffer, buffer + size, e);
 281 #ifndef BOOST_NO_EXCEPTIONS
 282                 if (!parse_torrent_file(e, error))
 283                         throw invalid_torrent_file();
 284 #else
 285                 parse_torrent_file(e, error);
 286 #endif
 287         }
 288
 289         // constructor used for creating new torrents
 290         // will not contain any hashes, comments, creation date
 291         // just the necessary to use it with piece manager
 292         // used for torrents with no metadata
 293         torrent_info::torrent_info(sha1_hash const& info_hash)
 294                 : m_info_hash(info_hash)
 295                 , m_creation_date(pt::second_clock::universal_time())
 296                 , m_multifile(false)
 297                 , m_private(false)
 298                 , m_info_section_size(0)
 299                 , m_piece_hashes(0)
 300         {}
 301
 302         torrent_info::torrent_info(fs::path const& filename)
 303                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 304                 , m_multifile(false)
 305                 , m_private(false)
 306         {
 307                 std::vector<char> buf;
 308                 int ret = load_file(filename, buf);
 309                 if (ret < 0) return;
 310
 311                 lazy_entry e;
 312                 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
 313                 std::string error;
 314 #ifndef BOOST_NO_EXCEPTIONS
 315                 if (!parse_torrent_file(e, error))
 316                         throw invalid_torrent_file();
 317 #else
 318                 parse_torrent_file(e, error);
 319 #endif
 320         }
 321
 322         torrent_info::~torrent_info()
 323         {}
 324
 325         void torrent_info::swap(torrent_info& ti)
 326         {
 327                 using std::swap;
 328                 m_urls.swap(ti.m_urls);
 329                 m_url_seeds.swap(ti.m_url_seeds);
 330                 m_files.swap(ti.m_files);
 331                 m_nodes.swap(ti.m_nodes);
 332                 swap(m_info_hash, ti.m_info_hash);
 333                 swap(m_creation_date, ti.m_creation_date);
 334                 m_comment.swap(ti.m_comment);
 335                 m_created_by.swap(ti.m_created_by);
 336                 swap(m_multifile, ti.m_multifile);
 337                 swap(m_private, ti.m_private);
 338                 swap(m_info_section, ti.m_info_section);
 339                 swap(m_info_section_size, ti.m_info_section_size);
 340                 swap(m_piece_hashes, ti.m_piece_hashes);
 341                 swap(m_info_dict, ti.m_info_dict);
 342         }
 343
 344         bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
 345         {
 346                 if (info.type() != lazy_entry::dict_t)
 347                 {
 348                         error = "'info' entry is not a dictionary";
 349                         return false;
 350                 }
 351
 352                 // hash the info-field to calculate info-hash
 353                 hasher h;
 354                 std::pair<char const*, int> section = info.data_section();
 355                 h.update(section.first, section.second);
 356                 m_info_hash = h.final();
 357
 358                 // copy the info section
 359                 m_info_section_size = section.second;
 360                 m_info_section.reset(new char[m_info_section_size]);
 361                 memcpy(m_info_section.get(), section.first, m_info_section_size);
 362                 TORRENT_ASSERT(section.first[0] == 'd');
 363                 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
 364
 365                 // extract piece length
 366                 int piece_length = info.dict_find_int_value("piece length", -1);
 367                 if (piece_length <= 0)
 368                 {
 369                         error = "invalid or missing 'piece length' entry in torrent file";
 370                         return false;
 371                 }
 372                 m_files.set_piece_length(piece_length);
 373
 374                 // extract file name (or the directory name if it's a multifile libtorrent)
 375                 std::string name = info.dict_find_string_value("name.utf-8");
 376                 if (name.empty()) name = info.dict_find_string_value("name");
 377                 if (name.empty())
 378                 {
 379                         error = "missing name in torrent file";
 380                         return false;
 381                 }
 382
 383                 fs::path tmp = name;
 384                 if (tmp.is_complete())
 385                 {
 386                         name = tmp.leaf();
 387                 }
 388                 else if (tmp.has_branch_path())
 389                 {
 390                         fs::path p;
 391                         for (fs::path::iterator i = tmp.begin()
 392                                 , end(tmp.end()); i != end; ++i)
 393                         {
 394                                 if (*i == "." || *i == "..") continue;
 395                                 p /= *i;
 396                         }
 397                         name = p.string();
 398                 }
 399                 if (name == ".." || name == ".")
 400                 {
 401                         error = "invalid 'name' of torrent (possible exploit attempt)";
 402                         return false;
 403                 }
 404
 405                 // extract file list
 406                 lazy_entry const* i = info.dict_find_list("files");
 407                 if (i == 0)
 408                 {
 409                         // if there's no list of files, there has to be a length
 410                         // field.
 411                         file_entry e;
 412                         e.path = name;
 413                         e.offset = 0;
 414                         e.size = info.dict_find_int_value("length", -1);
 415                         if (e.size < 0)
 416                         {
 417                                 error = "invalid length of torrent";
 418                                 return false;
 419                         }
 420                         m_files.add_file(e);
 421                         m_multifile = false;
 422                 }
 423                 else
 424                 {
 425                         if (!extract_files(*i, m_files, name))
 426                         {
 427                                 error = "failed to parse files from torrent file";
 428                                 return false;
 429                         }
 430                         m_multifile = true;
 431                 }
 432                 m_files.set_name(name);
 433
 434                 // extract sha-1 hashes for all pieces
 435                 // we want this division to round upwards, that's why we have the
 436                 // extra addition
 437
 438                 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
 439                         / m_files.piece_length()));
 440
 441                 lazy_entry const* pieces = info.dict_find("pieces");
 442                 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
 443                 {
 444                         error = "invalid or missing 'pieces' entry in torrent file";
 445                         return false;
 446                 }
 447
 448                 if (pieces->string_length() != m_files.num_pieces() * 20)
 449                 {
 450                         error = "incorrect number of piece hashes in torrent file";
 451                         return false;
 452                 }
 453
 454                 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
 455                 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
 456                 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
 457
 458                 m_private = info.dict_find_int_value("private", 0);
 459                 return true;
 460         }
 461
 462         bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
 463         {
 464                 if (torrent_file.type() != lazy_entry::dict_t)
 465                 {
 466                         error = "torrent file is not a dictionary";
 467                         return false;
 468                 }
 469
 470                 // extract the url of the tracker
 471                 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
 472                 if (i)
 473                 {
 474                         m_urls.reserve(i->list_size());
 475                         for (int j = 0, end(i->list_size()); j < end; ++j)
 476                         {
 477                                 lazy_entry const* tier = i->list_at(j);
 478                                 if (tier->type() != lazy_entry::list_t) continue;
 479                                 for (int k = 0, end(tier->list_size()); k < end; ++k)
 480                                 {
 481                                         announce_entry e(tier->list_string_value_at(k));
 482                                         if (e.url.empty()) continue;
 483                                         e.tier = j;
 484                                         m_urls.push_back(e);
 485                                 }
 486                         }
 487
 488                         // shuffle each tier
 489                         std::vector<announce_entry>::iterator start = m_urls.begin();
 490                         std::vector<announce_entry>::iterator stop;
 491                         int current_tier = m_urls.front().tier;
 492                         for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
 493                         {
 494                                 if (stop->tier != current_tier)
 495                                 {
 496                                         std::random_shuffle(start, stop);
 497                                         start = stop;
 498                                         current_tier = stop->tier;
 499                                 }
 500                         }
 501                         std::random_shuffle(start, stop);
 502                 }
 503
 504
 505                 if (m_urls.empty())
 506                 {
 507                         announce_entry e(torrent_file.dict_find_string_value("announce"));
 508                         if (!e.url.empty()) m_urls.push_back(e);
 509                 }
 510
 511                 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
 512                 if (nodes)
 513                 {
 514                         for (int i = 0, end(nodes->list_size()); i < end; ++i)
 515                         {
 516                                 lazy_entry const* n = nodes->list_at(i);
 517                                 if (n->type() != lazy_entry::list_t
 518                                         || n->list_size() < 2
 519                                         || n->list_at(0)->type() != lazy_entry::string_t
 520                                         || n->list_at(1)->type() != lazy_entry::int_t)
 521                                         continue;
 522                                 m_nodes.push_back(std::make_pair(
 523                                         n->list_at(0)->string_value()
 524                                         , int(n->list_at(1)->int_value())));
 525                         }
 526                 }
 527
 528                 // extract creation date
 529                 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
 530                 if (cd >= 0)
 531                 {
 532                         m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
 533                                 + pt::seconds(long(cd));
 534                 }
 535
 536                 // if there are any url-seeds, extract them
 537                 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
 538                 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
 539                 {
 540                         m_url_seeds.push_back(url_seeds->string_value());
 541                 }
 542                 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
 543                 {
 544                         for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
 545                         {
 546                                 lazy_entry const* url = url_seeds->list_at(i);
 547                                 if (url->type() != lazy_entry::string_t) continue;
 548                                 m_url_seeds.push_back(url->string_value());
 549                         }
 550                 }
 551
 552                 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
 553                 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
 554
 555                 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
 556                 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
 557
 558                 lazy_entry const* info = torrent_file.dict_find_dict("info");
 559                 if (info == 0)
 560                 {
 561                         error = "missing or invalid 'info' section in torrent file";
 562                         return false;
 563                 }
 564                 return parse_info_section(*info, error);
 565         }
 566
 567         boost::optional<pt::ptime>
 568         torrent_info::creation_date() const
 569         {
 570                 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 571                 {
 572                         return boost::optional<pt::ptime>(m_creation_date);
 573                 }
 574                 return boost::optional<pt::ptime>();
 575         }
 576
 577         void torrent_info::add_tracker(std::string const& url, int tier)
 578         {
 579                 announce_entry e(url);
 580                 e.tier = tier;
 581                 m_urls.push_back(e);
 582
 583                 using boost::bind;
 584                 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
 585                         , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
 586         }
 587
 588 #ifndef TORRENT_NO_DEPRECATE
 589 // ------- start deprecation -------
 590
 591         void torrent_info::print(std::ostream& os) const
 592         {
 593                 os << "trackers:\n";
 594                 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
 595                         i != trackers().end(); ++i)
 596                 {
 597                         os << i->tier << ": " << i->url << "\n";
 598                 }
 599                 if (!m_comment.empty())
 600                         os << "comment: " << m_comment << "\n";
 601 //              if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 602 //                      os << "creation date: " << to_simple_string(m_creation_date) << "\n";
 603                 os << "private: " << (m_private?"yes":"no") << "\n";
 604                 os << "number of pieces: " << num_pieces() << "\n";
 605                 os << "piece length: " << piece_length() << "\n";
 606                 os << "files:\n";
 607                 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
 608                         os << "  " << std::setw(11) << i->size << "  " << i->path.string() << "\n";
 609         }
 610
 611 // ------- end deprecation -------
 612 #endif
 613
 614 }
 615