src/torrent_info.cpp

   1 /*
   2
   3 Copyright (c) 2003-2008, Arvid Norberg
   4 All rights reserved.
   5
   6 Redistribution and use in source and binary forms, with or without
   7 modification, are permitted provided that the following conditions
   8 are met:
   9
  10     * Redistributions of source code must retain the above copyright
  11       notice, this list of conditions and the following disclaimer.
  12     * Redistributions in binary form must reproduce the above copyright
  13       notice, this list of conditions and the following disclaimer in
  14       the documentation and/or other materials provided with the distribution.
  15     * Neither the name of the author nor the names of its
  16       contributors may be used to endorse or promote products derived
  17       from this software without specific prior written permission.
  18
  19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29 POSSIBILITY OF SUCH DAMAGE.
  30
  31 */
  32
  33 #include "libtorrent/pch.hpp"
  34
  35 #include <ctime>
  36 #include <iostream>
  37 #include <fstream>
  38 #include <iomanip>
  39 #include <iterator>
  40 #include <algorithm>
  41 #include <set>
  42
  43 #ifdef _MSC_VER
  44 #pragma warning(push, 1)
  45 #endif
  46
  47 #include <boost/lexical_cast.hpp>
  48 #include <boost/filesystem/path.hpp>
  49 #include <boost/filesystem.hpp>
  50 #include <boost/bind.hpp>
  51
  52 #ifdef _MSC_VER
  53 #pragma warning(pop)
  54 #endif
  55
  56 #include "libtorrent/torrent_info.hpp"
  57 #include "libtorrent/bencode.hpp"
  58 #include "libtorrent/hasher.hpp"
  59 #include "libtorrent/entry.hpp"
  60 #include "libtorrent/file.hpp"
  61
  62 namespace gr = boost::gregorian;
  63
  64 using namespace libtorrent;
  65
  66 namespace
  67 {
  68
  69         namespace fs = boost::filesystem;
  70
  71         void convert_to_utf8(std::string& str, unsigned char chr)
  72         {
  73                 str += 0xc0 | ((chr & 0xff) >> 6);
  74                 str += 0x80 | (chr & 0x3f);
  75         }
  76
  77         void verify_encoding(file_entry& target)
  78         {
  79                 std::string tmp_path;
  80                 std::string file_path = target.path.string();
  81                 bool valid_encoding = true;
  82                 for (std::string::iterator i = file_path.begin()
  83                         , end(file_path.end()); i != end; ++i)
  84                 {
  85                         // valid ascii-character
  86                         if ((*i & 0x80) == 0)
  87                         {
  88                                 tmp_path += *i;
  89                                 continue;
  90                         }
  91
  92                         if (std::distance(i, end) < 2)
  93                         {
  94                                 convert_to_utf8(tmp_path, *i);
  95                                 valid_encoding = false;
  96                                 continue;
  97                         }
  98
  99                         // valid 2-byte utf-8 character
 100                         if ((i[0] & 0xe0) == 0xc0
 101                                 && (i[1] & 0xc0) == 0x80)
 102                         {
 103                                 tmp_path += i[0];
 104                                 tmp_path += i[1];
 105                                 i += 1;
 106                                 continue;
 107                         }
 108
 109                         if (std::distance(i, end) < 3)
 110                         {
 111                                 convert_to_utf8(tmp_path, *i);
 112                                 valid_encoding = false;
 113                                 continue;
 114                         }
 115
 116                         // valid 3-byte utf-8 character
 117                         if ((i[0] & 0xf0) == 0xe0
 118                                 && (i[1] & 0xc0) == 0x80
 119                                 && (i[2] & 0xc0) == 0x80)
 120                         {
 121                                 tmp_path += i[0];
 122                                 tmp_path += i[1];
 123                                 tmp_path += i[2];
 124                                 i += 2;
 125                                 continue;
 126                         }
 127
 128                         if (std::distance(i, end) < 4)
 129                         {
 130                                 convert_to_utf8(tmp_path, *i);
 131                                 valid_encoding = false;
 132                                 continue;
 133                         }
 134
 135                         // valid 4-byte utf-8 character
 136                         if ((i[0] & 0xf0) == 0xe0
 137                                 && (i[1] & 0xc0) == 0x80
 138                                 && (i[2] & 0xc0) == 0x80
 139                                 && (i[3] & 0xc0) == 0x80)
 140                         {
 141                                 tmp_path += i[0];
 142                                 tmp_path += i[1];
 143                                 tmp_path += i[2];
 144                                 tmp_path += i[3];
 145                                 i += 3;
 146                                 continue;
 147                         }
 148
 149                         convert_to_utf8(tmp_path, *i);
 150                         valid_encoding = false;
 151                 }
 152                 // the encoding was not valid utf-8
 153                 // save the original encoding and replace the
 154                 // commonly used path with the correctly
 155                 // encoded string
 156                 if (!valid_encoding) target.path = tmp_path;
 157         }
 158
 159         bool extract_single_file(lazy_entry const& dict, file_entry& target
 160                 , std::string const& root_dir)
 161         {
 162                 lazy_entry const* length = dict.dict_find("length");
 163                 if (length == 0 || length->type() != lazy_entry::int_t)
 164                         return false;
 165                 target.size = length->int_value();
 166                 target.path = root_dir;
 167                 target.file_base = 0;
 168
 169                 // prefer the name.utf-8
 170                 // because if it exists, it is more
 171                 // likely to be correctly encoded
 172
 173                 lazy_entry const* p = dict.dict_find("path.utf-8");
 174                 if (p == 0 || p->type() != lazy_entry::list_t)
 175                         p = dict.dict_find("path");
 176                 if (p == 0 || p->type() != lazy_entry::list_t)
 177                         return false;
 178
 179                 for (int i = 0, end(p->list_size()); i < end; ++i)
 180                 {
 181                         if (p->list_at(i)->type() != lazy_entry::string_t)
 182                                 return false;
 183                         std::string path_element = p->list_at(i)->string_value();
 184                         if (path_element != "..")
 185                                 target.path /= path_element;
 186                 }
 187                 verify_encoding(target);
 188                 if (target.path.is_complete())
 189                         return false;
 190                 return true;
 191         }
 192
 193         bool extract_files(lazy_entry const& list, file_storage& target
 194                 , std::string const& root_dir)
 195         {
 196                 if (list.type() != lazy_entry::list_t) return false;
 197                 for (int i = 0, end(list.list_size()); i < end; ++i)
 198                 {
 199                         file_entry e;
 200                         if (!extract_single_file(*list.list_at(i), e, root_dir))
 201                                 return false;
 202                         target.add_file(e);
 203                 }
 204                 return true;
 205         }
 206 }
 207
 208 namespace libtorrent
 209 {
 210
 211         int load_file(fs::path const& filename, std::vector<char>& v)
 212         {
 213                 file f;
 214                 if (!f.open(filename, file::in)) return -1;
 215                 f.seek(0, file::end);
 216                 size_type s = f.tell();
 217                 if (s > 5000000) return -2;
 218                 v.resize(s);
 219                 f.seek(0);
 220                 size_type read = f.read(&v[0], s);
 221                 if (read != s) return -3;
 222                 return 0;
 223         }
 224
 225         // standard constructor that parses a torrent file
 226         torrent_info::torrent_info(entry const& torrent_file)
 227                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 228                 , m_multifile(false)
 229                 , m_private(false)
 230                 , m_info_section_size(0)
 231                 , m_piece_hashes(0)
 232         {
 233                 std::vector<char> tmp;
 234                 std::back_insert_iterator<std::vector<char> > out(tmp);
 235                 bencode(out, torrent_file);
 236
 237                 lazy_entry e;
 238                 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
 239                 std::string error;
 240 #ifndef BOOST_NO_EXCEPTIONS
 241                 if (!parse_torrent_file(e, error))
 242                         throw invalid_torrent_file();
 243 #else
 244                 parse_torrent_file(e, error);
 245 #endif
 246         }
 247
 248         torrent_info::torrent_info(lazy_entry const& torrent_file)
 249                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 250                 , m_multifile(false)
 251                 , m_private(false)
 252                 , m_info_section_size(0)
 253                 , m_piece_hashes(0)
 254         {
 255                 std::string error;
 256 #ifndef BOOST_NO_EXCEPTIONS
 257                 if (!parse_torrent_file(torrent_file, error))
 258                         throw invalid_torrent_file();
 259 #else
 260                 parse_torrent_file(torrent_file, error);
 261 #endif
 262         }
 263
 264         torrent_info::torrent_info(char const* buffer, int size)
 265                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 266                 , m_multifile(false)
 267                 , m_private(false)
 268                 , m_info_section_size(0)
 269                 , m_piece_hashes(0)
 270         {
 271                 std::string error;
 272                 lazy_entry e;
 273                 lazy_bdecode(buffer, buffer + size, e);
 274 #ifndef BOOST_NO_EXCEPTIONS
 275                 if (!parse_torrent_file(e, error))
 276                         throw invalid_torrent_file();
 277 #else
 278                 parse_torrent_file(e, error);
 279 #endif
 280         }
 281
 282         // constructor used for creating new torrents
 283         // will not contain any hashes, comments, creation date
 284         // just the necessary to use it with piece manager
 285         // used for torrents with no metadata
 286         torrent_info::torrent_info(sha1_hash const& info_hash)
 287                 : m_info_hash(info_hash)
 288                 , m_creation_date(pt::second_clock::universal_time())
 289                 , m_multifile(false)
 290                 , m_private(false)
 291                 , m_info_section_size(0)
 292                 , m_piece_hashes(0)
 293         {}
 294
 295         torrent_info::torrent_info(fs::path const& filename)
 296                 : m_creation_date(pt::ptime(pt::not_a_date_time))
 297                 , m_multifile(false)
 298                 , m_private(false)
 299         {
 300                 std::vector<char> buf;
 301                 int ret = load_file(filename, buf);
 302                 if (ret < 0) return;
 303
 304                 lazy_entry e;
 305                 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
 306                 std::string error;
 307 #ifndef BOOST_NO_EXCEPTIONS
 308                 if (!parse_torrent_file(e, error))
 309                         throw invalid_torrent_file();
 310 #else
 311                 parse_torrent_file(e, error);
 312 #endif
 313         }
 314
 315         torrent_info::~torrent_info()
 316         {}
 317
 318         void torrent_info::swap(torrent_info& ti)
 319         {
 320                 using std::swap;
 321                 m_urls.swap(ti.m_urls);
 322                 m_url_seeds.swap(ti.m_url_seeds);
 323                 m_files.swap(ti.m_files);
 324                 m_nodes.swap(ti.m_nodes);
 325                 swap(m_info_hash, ti.m_info_hash);
 326                 swap(m_creation_date, ti.m_creation_date);
 327                 m_comment.swap(ti.m_comment);
 328                 m_created_by.swap(ti.m_created_by);
 329                 swap(m_multifile, ti.m_multifile);
 330                 swap(m_private, ti.m_private);
 331                 swap(m_info_section, ti.m_info_section);
 332                 swap(m_info_section_size, ti.m_info_section_size);
 333                 swap(m_piece_hashes, ti.m_piece_hashes);
 334                 swap(m_info_dict, ti.m_info_dict);
 335         }
 336
 337         bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
 338         {
 339                 if (info.type() != lazy_entry::dict_t)
 340                 {
 341                         error = "'info' entry is not a dictionary";
 342                         return false;
 343                 }
 344
 345                 // hash the info-field to calculate info-hash
 346                 hasher h;
 347                 std::pair<char const*, int> section = info.data_section();
 348                 h.update(section.first, section.second);
 349                 m_info_hash = h.final();
 350
 351                 // copy the info section
 352                 m_info_section_size = section.second;
 353                 m_info_section.reset(new char[m_info_section_size]);
 354                 memcpy(m_info_section.get(), section.first, m_info_section_size);
 355                 TORRENT_ASSERT(section.first[0] == 'd');
 356                 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
 357
 358                 // extract piece length
 359                 int piece_length = info.dict_find_int_value("piece length", -1);
 360                 if (piece_length <= 0)
 361                 {
 362                         error = "invalid or missing 'piece length' entry in torrent file";
 363                         return false;
 364                 }
 365                 m_files.set_piece_length(piece_length);
 366
 367                 // extract file name (or the directory name if it's a multifile libtorrent)
 368                 std::string name = info.dict_find_string_value("name.utf-8");
 369                 if (name.empty()) name = info.dict_find_string_value("name");
 370                 if (name.empty())
 371                 {
 372                         error = "missing name in torrent file";
 373                         return false;
 374                 }
 375
 376                 fs::path tmp = name;
 377                 if (tmp.is_complete())
 378                 {
 379                         name = tmp.leaf();
 380                 }
 381                 else if (tmp.has_branch_path())
 382                 {
 383                         fs::path p;
 384                         for (fs::path::iterator i = tmp.begin()
 385                                 , end(tmp.end()); i != end; ++i)
 386                         {
 387                                 if (*i == "." || *i == "..") continue;
 388                                 p /= *i;
 389                         }
 390                         name = p.string();
 391                 }
 392                 if (name == ".." || name == ".")
 393                 {
 394                         error = "invalid 'name' of torrent (possible exploit attempt)";
 395                         return false;
 396                 }
 397
 398                 // extract file list
 399                 lazy_entry const* i = info.dict_find_list("files");
 400                 if (i == 0)
 401                 {
 402                         // if there's no list of files, there has to be a length
 403                         // field.
 404                         file_entry e;
 405                         e.path = name;
 406                         e.offset = 0;
 407                         e.size = info.dict_find_int_value("length", -1);
 408                         if (e.size < 0)
 409                         {
 410                                 error = "invalid length of torrent";
 411                                 return false;
 412                         }
 413                         m_files.add_file(e);
 414                         m_multifile = false;
 415                 }
 416                 else
 417                 {
 418                         if (!extract_files(*i, m_files, name))
 419                         {
 420                                 error = "failed to parse files from torrent file";
 421                                 return false;
 422                         }
 423                         m_multifile = true;
 424                 }
 425                 m_files.set_name(name);
 426
 427                 // extract sha-1 hashes for all pieces
 428                 // we want this division to round upwards, that's why we have the
 429                 // extra addition
 430
 431                 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
 432                         / m_files.piece_length()));
 433
 434                 lazy_entry const* pieces = info.dict_find("pieces");
 435                 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
 436                 {
 437                         error = "invalid or missing 'pieces' entry in torrent file";
 438                         return false;
 439                 }
 440
 441                 if (pieces->string_length() != m_files.num_pieces() * 20)
 442                 {
 443                         error = "incorrect number of piece hashes in torrent file";
 444                         return false;
 445                 }
 446
 447                 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
 448                 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
 449                 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
 450
 451                 m_private = info.dict_find_int_value("private", 0);
 452                 return true;
 453         }
 454
 455         bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
 456         {
 457                 if (torrent_file.type() != lazy_entry::dict_t)
 458                 {
 459                         error = "torrent file is not a dictionary";
 460                         return false;
 461                 }
 462
 463                 // extract the url of the tracker
 464                 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
 465                 if (i)
 466                 {
 467                         m_urls.reserve(i->list_size());
 468                         for (int j = 0, end(i->list_size()); j < end; ++j)
 469                         {
 470                                 lazy_entry const* tier = i->list_at(j);
 471                                 if (tier->type() != lazy_entry::list_t) continue;
 472                                 for (int k = 0, end(tier->list_size()); k < end; ++k)
 473                                 {
 474                                         announce_entry e(tier->list_string_value_at(k));
 475                                         if (e.url.empty()) continue;
 476                                         e.tier = j;
 477                                         m_urls.push_back(e);
 478                                 }
 479                         }
 480
 481                         // shuffle each tier
 482                         std::vector<announce_entry>::iterator start = m_urls.begin();
 483                         std::vector<announce_entry>::iterator stop;
 484                         int current_tier = m_urls.front().tier;
 485                         for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
 486                         {
 487                                 if (stop->tier != current_tier)
 488                                 {
 489                                         std::random_shuffle(start, stop);
 490                                         start = stop;
 491                                         current_tier = stop->tier;
 492                                 }
 493                         }
 494                         std::random_shuffle(start, stop);
 495                 }
 496
 497
 498                 if (m_urls.empty())
 499                 {
 500                         announce_entry e(torrent_file.dict_find_string_value("announce"));
 501                         if (!e.url.empty()) m_urls.push_back(e);
 502                 }
 503
 504                 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
 505                 if (nodes)
 506                 {
 507                         for (int i = 0, end(nodes->list_size()); i < end; ++i)
 508                         {
 509                                 lazy_entry const* n = nodes->list_at(i);
 510                                 if (n->type() != lazy_entry::list_t
 511                                         || n->list_size() < 2
 512                                         || n->list_at(0)->type() != lazy_entry::string_t
 513                                         || n->list_at(1)->type() != lazy_entry::int_t)
 514                                         continue;
 515                                 m_nodes.push_back(std::make_pair(
 516                                         n->list_at(0)->string_value()
 517                                         , int(n->list_at(1)->int_value())));
 518                         }
 519                 }
 520
 521                 // extract creation date
 522                 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
 523                 if (cd >= 0)
 524                 {
 525                         m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
 526                                 + pt::seconds(long(cd));
 527                 }
 528
 529                 // if there are any url-seeds, extract them
 530                 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
 531                 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
 532                 {
 533                         m_url_seeds.push_back(url_seeds->string_value());
 534                 }
 535                 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
 536                 {
 537                         for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
 538                         {
 539                                 lazy_entry const* url = url_seeds->list_at(i);
 540                                 if (url->type() != lazy_entry::string_t) continue;
 541                                 m_url_seeds.push_back(url->string_value());
 542                         }
 543                 }
 544
 545                 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
 546                 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
 547
 548                 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
 549                 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
 550
 551                 lazy_entry const* info = torrent_file.dict_find_dict("info");
 552                 if (info == 0)
 553                 {
 554                         error = "missing or invalid 'info' section in torrent file";
 555                         return false;
 556                 }
 557                 return parse_info_section(*info, error);
 558         }
 559
 560         boost::optional<pt::ptime>
 561         torrent_info::creation_date() const
 562         {
 563                 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 564                 {
 565                         return boost::optional<pt::ptime>(m_creation_date);
 566                 }
 567                 return boost::optional<pt::ptime>();
 568         }
 569
 570         void torrent_info::add_tracker(std::string const& url, int tier)
 571         {
 572                 announce_entry e(url);
 573                 e.tier = tier;
 574                 m_urls.push_back(e);
 575
 576                 using boost::bind;
 577                 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
 578                         , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
 579         }
 580
 581 // ------- start deprecation -------
 582
 583         void torrent_info::print(std::ostream& os) const
 584         {
 585                 os << "trackers:\n";
 586                 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
 587                         i != trackers().end(); ++i)
 588                 {
 589                         os << i->tier << ": " << i->url << "\n";
 590                 }
 591                 if (!m_comment.empty())
 592                         os << "comment: " << m_comment << "\n";
 593 //              if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
 594 //                      os << "creation date: " << to_simple_string(m_creation_date) << "\n";
 595                 os << "private: " << (m_private?"yes":"no") << "\n";
 596                 os << "number of pieces: " << num_pieces() << "\n";
 597                 os << "piece length: " << piece_length() << "\n";
 598                 os << "files:\n";
 599                 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
 600                         os << "  " << std::setw(11) << i->size << "  " << i->path.string() << "\n";
 601         }
 602
 603 // ------- end deprecation -------
 604
 605 }
 606