file_progress fix
[libtorrent.git] / src / torrent_info.cpp
blobc6c222fa057bf545e9fcdbe3c0805f586a36d8c2
1 /*
3 Copyright (c) 2003-2008, Arvid Norberg
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the distribution.
15 * Neither the name of the author nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
33 #include "libtorrent/pch.hpp"
35 #include <ctime>
36 #include <iostream>
37 #include <fstream>
38 #include <iomanip>
39 #include <iterator>
40 #include <algorithm>
41 #include <set>
43 #ifdef _MSC_VER
44 #pragma warning(push, 1)
45 #endif
47 #include <boost/lexical_cast.hpp>
48 #include <boost/filesystem/path.hpp>
49 #include <boost/filesystem.hpp>
50 #include <boost/bind.hpp>
52 #ifdef _MSC_VER
53 #pragma warning(pop)
54 #endif
56 #include "libtorrent/torrent_info.hpp"
57 #include "libtorrent/bencode.hpp"
58 #include "libtorrent/hasher.hpp"
59 #include "libtorrent/entry.hpp"
60 #include "libtorrent/file.hpp"
62 namespace gr = boost::gregorian;
64 using namespace libtorrent;
66 namespace
69 namespace fs = boost::filesystem;
71 void convert_to_utf8(std::string& str, unsigned char chr)
73 str += 0xc0 | ((chr & 0xff) >> 6);
74 str += 0x80 | (chr & 0x3f);
77 void verify_encoding(file_entry& target)
79 std::string tmp_path;
80 std::string file_path = target.path.string();
81 bool valid_encoding = true;
82 for (std::string::iterator i = file_path.begin()
83 , end(file_path.end()); i != end; ++i)
85 // valid ascii-character
86 if ((*i & 0x80) == 0)
88 tmp_path += *i;
89 continue;
92 if (std::distance(i, end) < 2)
94 convert_to_utf8(tmp_path, *i);
95 valid_encoding = false;
96 continue;
99 // valid 2-byte utf-8 character
100 if ((i[0] & 0xe0) == 0xc0
101 && (i[1] & 0xc0) == 0x80)
103 tmp_path += i[0];
104 tmp_path += i[1];
105 i += 1;
106 continue;
109 if (std::distance(i, end) < 3)
111 convert_to_utf8(tmp_path, *i);
112 valid_encoding = false;
113 continue;
116 // valid 3-byte utf-8 character
117 if ((i[0] & 0xf0) == 0xe0
118 && (i[1] & 0xc0) == 0x80
119 && (i[2] & 0xc0) == 0x80)
121 tmp_path += i[0];
122 tmp_path += i[1];
123 tmp_path += i[2];
124 i += 2;
125 continue;
128 if (std::distance(i, end) < 4)
130 convert_to_utf8(tmp_path, *i);
131 valid_encoding = false;
132 continue;
135 // valid 4-byte utf-8 character
136 if ((i[0] & 0xf0) == 0xe0
137 && (i[1] & 0xc0) == 0x80
138 && (i[2] & 0xc0) == 0x80
139 && (i[3] & 0xc0) == 0x80)
141 tmp_path += i[0];
142 tmp_path += i[1];
143 tmp_path += i[2];
144 tmp_path += i[3];
145 i += 3;
146 continue;
149 convert_to_utf8(tmp_path, *i);
150 valid_encoding = false;
152 // the encoding was not valid utf-8
153 // save the original encoding and replace the
154 // commonly used path with the correctly
155 // encoded string
156 if (!valid_encoding) target.path = tmp_path;
159 bool extract_single_file(lazy_entry const& dict, file_entry& target
160 , std::string const& root_dir)
162 lazy_entry const* length = dict.dict_find("length");
163 if (length == 0 || length->type() != lazy_entry::int_t)
164 return false;
165 target.size = length->int_value();
166 target.path = root_dir;
167 target.file_base = 0;
169 // prefer the name.utf-8
170 // because if it exists, it is more
171 // likely to be correctly encoded
173 lazy_entry const* p = dict.dict_find("path.utf-8");
174 if (p == 0 || p->type() != lazy_entry::list_t)
175 p = dict.dict_find("path");
176 if (p == 0 || p->type() != lazy_entry::list_t)
177 return false;
179 for (int i = 0, end(p->list_size()); i < end; ++i)
181 if (p->list_at(i)->type() != lazy_entry::string_t)
182 return false;
183 std::string path_element = p->list_at(i)->string_value();
184 if (path_element != "..")
185 target.path /= path_element;
187 verify_encoding(target);
188 if (target.path.is_complete())
189 return false;
190 return true;
193 bool extract_files(lazy_entry const& list, file_storage& target
194 , std::string const& root_dir)
196 if (list.type() != lazy_entry::list_t) return false;
197 for (int i = 0, end(list.list_size()); i < end; ++i)
199 file_entry e;
200 if (!extract_single_file(*list.list_at(i), e, root_dir))
201 return false;
202 target.add_file(e);
204 return true;
208 namespace libtorrent
211 int load_file(fs::path const& filename, std::vector<char>& v)
213 file f;
214 if (!f.open(filename, file::in)) return -1;
215 f.seek(0, file::end);
216 size_type s = f.tell();
217 if (s > 5000000) return -2;
218 v.resize(s);
219 f.seek(0);
220 size_type read = f.read(&v[0], s);
221 if (read != s) return -3;
222 return 0;
225 // standard constructor that parses a torrent file
226 torrent_info::torrent_info(entry const& torrent_file)
227 : m_creation_date(pt::ptime(pt::not_a_date_time))
228 , m_multifile(false)
229 , m_private(false)
230 , m_info_section_size(0)
231 , m_piece_hashes(0)
233 std::vector<char> tmp;
234 std::back_insert_iterator<std::vector<char> > out(tmp);
235 bencode(out, torrent_file);
237 lazy_entry e;
238 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
239 std::string error;
240 #ifndef BOOST_NO_EXCEPTIONS
241 if (!parse_torrent_file(e, error))
242 throw invalid_torrent_file();
243 #else
244 parse_torrent_file(e, error);
245 #endif
248 torrent_info::torrent_info(lazy_entry const& torrent_file)
249 : m_creation_date(pt::ptime(pt::not_a_date_time))
250 , m_multifile(false)
251 , m_private(false)
252 , m_info_section_size(0)
253 , m_piece_hashes(0)
255 std::string error;
256 #ifndef BOOST_NO_EXCEPTIONS
257 if (!parse_torrent_file(torrent_file, error))
258 throw invalid_torrent_file();
259 #else
260 parse_torrent_file(torrent_file, error);
261 #endif
264 torrent_info::torrent_info(char const* buffer, int size)
265 : m_creation_date(pt::ptime(pt::not_a_date_time))
266 , m_multifile(false)
267 , m_private(false)
268 , m_info_section_size(0)
269 , m_piece_hashes(0)
271 std::string error;
272 lazy_entry e;
273 lazy_bdecode(buffer, buffer + size, e);
274 #ifndef BOOST_NO_EXCEPTIONS
275 if (!parse_torrent_file(e, error))
276 throw invalid_torrent_file();
277 #else
278 parse_torrent_file(e, error);
279 #endif
282 // constructor used for creating new torrents
283 // will not contain any hashes, comments, creation date
284 // just the necessary to use it with piece manager
285 // used for torrents with no metadata
286 torrent_info::torrent_info(sha1_hash const& info_hash)
287 : m_info_hash(info_hash)
288 , m_creation_date(pt::second_clock::universal_time())
289 , m_multifile(false)
290 , m_private(false)
291 , m_info_section_size(0)
292 , m_piece_hashes(0)
295 torrent_info::torrent_info(fs::path const& filename)
296 : m_creation_date(pt::ptime(pt::not_a_date_time))
297 , m_multifile(false)
298 , m_private(false)
300 std::vector<char> buf;
301 int ret = load_file(filename, buf);
302 if (ret < 0) return;
304 lazy_entry e;
305 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
306 std::string error;
307 #ifndef BOOST_NO_EXCEPTIONS
308 if (!parse_torrent_file(e, error))
309 throw invalid_torrent_file();
310 #else
311 parse_torrent_file(e, error);
312 #endif
315 torrent_info::~torrent_info()
318 void torrent_info::swap(torrent_info& ti)
320 using std::swap;
321 m_urls.swap(ti.m_urls);
322 m_url_seeds.swap(ti.m_url_seeds);
323 m_files.swap(ti.m_files);
324 m_nodes.swap(ti.m_nodes);
325 swap(m_info_hash, ti.m_info_hash);
326 swap(m_creation_date, ti.m_creation_date);
327 m_comment.swap(ti.m_comment);
328 m_created_by.swap(ti.m_created_by);
329 swap(m_multifile, ti.m_multifile);
330 swap(m_private, ti.m_private);
331 swap(m_info_section, ti.m_info_section);
332 swap(m_info_section_size, ti.m_info_section_size);
333 swap(m_piece_hashes, ti.m_piece_hashes);
334 swap(m_info_dict, ti.m_info_dict);
337 bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
339 if (info.type() != lazy_entry::dict_t)
341 error = "'info' entry is not a dictionary";
342 return false;
345 // hash the info-field to calculate info-hash
346 hasher h;
347 std::pair<char const*, int> section = info.data_section();
348 h.update(section.first, section.second);
349 m_info_hash = h.final();
351 // copy the info section
352 m_info_section_size = section.second;
353 m_info_section.reset(new char[m_info_section_size]);
354 memcpy(m_info_section.get(), section.first, m_info_section_size);
355 TORRENT_ASSERT(section.first[0] == 'd');
356 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
358 // extract piece length
359 int piece_length = info.dict_find_int_value("piece length", -1);
360 if (piece_length <= 0)
362 error = "invalid or missing 'piece length' entry in torrent file";
363 return false;
365 m_files.set_piece_length(piece_length);
367 // extract file name (or the directory name if it's a multifile libtorrent)
368 std::string name = info.dict_find_string_value("name.utf-8");
369 if (name.empty()) name = info.dict_find_string_value("name");
370 if (name.empty())
372 error = "missing name in torrent file";
373 return false;
376 fs::path tmp = name;
377 if (tmp.is_complete())
379 name = tmp.leaf();
381 else if (tmp.has_branch_path())
383 fs::path p;
384 for (fs::path::iterator i = tmp.begin()
385 , end(tmp.end()); i != end; ++i)
387 if (*i == "." || *i == "..") continue;
388 p /= *i;
390 name = p.string();
392 if (name == ".." || name == ".")
394 error = "invalid 'name' of torrent (possible exploit attempt)";
395 return false;
398 // extract file list
399 lazy_entry const* i = info.dict_find_list("files");
400 if (i == 0)
402 // if there's no list of files, there has to be a length
403 // field.
404 file_entry e;
405 e.path = name;
406 e.offset = 0;
407 e.size = info.dict_find_int_value("length", -1);
408 if (e.size < 0)
410 error = "invalid length of torrent";
411 return false;
413 m_files.add_file(e);
414 m_multifile = false;
416 else
418 if (!extract_files(*i, m_files, name))
420 error = "failed to parse files from torrent file";
421 return false;
423 m_multifile = true;
425 m_files.set_name(name);
427 // extract sha-1 hashes for all pieces
428 // we want this division to round upwards, that's why we have the
429 // extra addition
431 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
432 / m_files.piece_length()));
434 lazy_entry const* pieces = info.dict_find("pieces");
435 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
437 error = "invalid or missing 'pieces' entry in torrent file";
438 return false;
441 if (pieces->string_length() != m_files.num_pieces() * 20)
443 error = "incorrect number of piece hashes in torrent file";
444 return false;
447 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
448 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
449 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
451 m_private = info.dict_find_int_value("private", 0);
452 return true;
455 bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
457 if (torrent_file.type() != lazy_entry::dict_t)
459 error = "torrent file is not a dictionary";
460 return false;
463 // extract the url of the tracker
464 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
465 if (i)
467 m_urls.reserve(i->list_size());
468 for (int j = 0, end(i->list_size()); j < end; ++j)
470 lazy_entry const* tier = i->list_at(j);
471 if (tier->type() != lazy_entry::list_t) continue;
472 for (int k = 0, end(tier->list_size()); k < end; ++k)
474 announce_entry e(tier->list_string_value_at(k));
475 if (e.url.empty()) continue;
476 e.tier = j;
477 m_urls.push_back(e);
481 // shuffle each tier
482 std::vector<announce_entry>::iterator start = m_urls.begin();
483 std::vector<announce_entry>::iterator stop;
484 int current_tier = m_urls.front().tier;
485 for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
487 if (stop->tier != current_tier)
489 std::random_shuffle(start, stop);
490 start = stop;
491 current_tier = stop->tier;
494 std::random_shuffle(start, stop);
498 if (m_urls.empty())
500 announce_entry e(torrent_file.dict_find_string_value("announce"));
501 if (!e.url.empty()) m_urls.push_back(e);
504 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
505 if (nodes)
507 for (int i = 0, end(nodes->list_size()); i < end; ++i)
509 lazy_entry const* n = nodes->list_at(i);
510 if (n->type() != lazy_entry::list_t
511 || n->list_size() < 2
512 || n->list_at(0)->type() != lazy_entry::string_t
513 || n->list_at(1)->type() != lazy_entry::int_t)
514 continue;
515 m_nodes.push_back(std::make_pair(
516 n->list_at(0)->string_value()
517 , int(n->list_at(1)->int_value())));
521 // extract creation date
522 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
523 if (cd >= 0)
525 m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
526 + pt::seconds(long(cd));
529 // if there are any url-seeds, extract them
530 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
531 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
533 m_url_seeds.push_back(url_seeds->string_value());
535 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
537 for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
539 lazy_entry const* url = url_seeds->list_at(i);
540 if (url->type() != lazy_entry::string_t) continue;
541 m_url_seeds.push_back(url->string_value());
545 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
546 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
548 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
549 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
551 lazy_entry const* info = torrent_file.dict_find_dict("info");
552 if (info == 0)
554 error = "missing or invalid 'info' section in torrent file";
555 return false;
557 return parse_info_section(*info, error);
560 boost::optional<pt::ptime>
561 torrent_info::creation_date() const
563 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
565 return boost::optional<pt::ptime>(m_creation_date);
567 return boost::optional<pt::ptime>();
570 void torrent_info::add_tracker(std::string const& url, int tier)
572 announce_entry e(url);
573 e.tier = tier;
574 m_urls.push_back(e);
576 using boost::bind;
577 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
578 , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
581 // ------- start deprecation -------
583 void torrent_info::print(std::ostream& os) const
585 os << "trackers:\n";
586 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
587 i != trackers().end(); ++i)
589 os << i->tier << ": " << i->url << "\n";
591 if (!m_comment.empty())
592 os << "comment: " << m_comment << "\n";
593 // if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
594 // os << "creation date: " << to_simple_string(m_creation_date) << "\n";
595 os << "private: " << (m_private?"yes":"no") << "\n";
596 os << "number of pieces: " << num_pieces() << "\n";
597 os << "piece length: " << piece_length() << "\n";
598 os << "files:\n";
599 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
600 os << " " << std::setw(11) << i->size << " " << i->path.string() << "\n";
603 // ------- end deprecation -------