improved error reporting of file errors (uses boost.system's error_code). Now permiss...
[libtorrent.git] / src / torrent_info.cpp
blob03dc235d6f079c1598602e9ab0731c81fd49ef82
1 /*
3 Copyright (c) 2003-2008, Arvid Norberg
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the distribution.
15 * Neither the name of the author nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
33 #include "libtorrent/pch.hpp"
35 #include <ctime>
36 #include <iostream>
37 #include <fstream>
38 #include <iomanip>
39 #include <iterator>
40 #include <algorithm>
41 #include <set>
43 #ifdef _MSC_VER
44 #pragma warning(push, 1)
45 #endif
47 #include <boost/lexical_cast.hpp>
48 #include <boost/filesystem/path.hpp>
49 #include <boost/filesystem.hpp>
50 #include <boost/bind.hpp>
52 #ifdef _MSC_VER
53 #pragma warning(pop)
54 #endif
56 #include "libtorrent/torrent_info.hpp"
57 #include "libtorrent/bencode.hpp"
58 #include "libtorrent/hasher.hpp"
59 #include "libtorrent/entry.hpp"
60 #include "libtorrent/file.hpp"
62 namespace gr = boost::gregorian;
64 using namespace libtorrent;
66 namespace
69 namespace fs = boost::filesystem;
71 void convert_to_utf8(std::string& str, unsigned char chr)
73 str += 0xc0 | ((chr & 0xff) >> 6);
74 str += 0x80 | (chr & 0x3f);
77 void verify_encoding(file_entry& target)
79 std::string tmp_path;
80 std::string file_path = target.path.string();
81 bool valid_encoding = true;
82 for (std::string::iterator i = file_path.begin()
83 , end(file_path.end()); i != end; ++i)
85 // valid ascii-character
86 if ((*i & 0x80) == 0)
88 tmp_path += *i;
89 continue;
92 if (std::distance(i, end) < 2)
94 convert_to_utf8(tmp_path, *i);
95 valid_encoding = false;
96 continue;
99 // valid 2-byte utf-8 character
100 if ((i[0] & 0xe0) == 0xc0
101 && (i[1] & 0xc0) == 0x80)
103 tmp_path += i[0];
104 tmp_path += i[1];
105 i += 1;
106 continue;
109 if (std::distance(i, end) < 3)
111 convert_to_utf8(tmp_path, *i);
112 valid_encoding = false;
113 continue;
116 // valid 3-byte utf-8 character
117 if ((i[0] & 0xf0) == 0xe0
118 && (i[1] & 0xc0) == 0x80
119 && (i[2] & 0xc0) == 0x80)
121 tmp_path += i[0];
122 tmp_path += i[1];
123 tmp_path += i[2];
124 i += 2;
125 continue;
128 if (std::distance(i, end) < 4)
130 convert_to_utf8(tmp_path, *i);
131 valid_encoding = false;
132 continue;
135 // valid 4-byte utf-8 character
136 if ((i[0] & 0xf0) == 0xe0
137 && (i[1] & 0xc0) == 0x80
138 && (i[2] & 0xc0) == 0x80
139 && (i[3] & 0xc0) == 0x80)
141 tmp_path += i[0];
142 tmp_path += i[1];
143 tmp_path += i[2];
144 tmp_path += i[3];
145 i += 3;
146 continue;
149 convert_to_utf8(tmp_path, *i);
150 valid_encoding = false;
152 // the encoding was not valid utf-8
153 // save the original encoding and replace the
154 // commonly used path with the correctly
155 // encoded string
156 if (!valid_encoding) target.path = tmp_path;
159 bool extract_single_file(lazy_entry const& dict, file_entry& target
160 , std::string const& root_dir)
162 lazy_entry const* length = dict.dict_find("length");
163 if (length == 0 || length->type() != lazy_entry::int_t)
164 return false;
165 target.size = length->int_value();
166 target.path = root_dir;
167 target.file_base = 0;
169 // prefer the name.utf-8
170 // because if it exists, it is more
171 // likely to be correctly encoded
173 lazy_entry const* p = dict.dict_find("path.utf-8");
174 if (p == 0 || p->type() != lazy_entry::list_t)
175 p = dict.dict_find("path");
176 if (p == 0 || p->type() != lazy_entry::list_t)
177 return false;
179 for (int i = 0, end(p->list_size()); i < end; ++i)
181 if (p->list_at(i)->type() != lazy_entry::string_t)
182 return false;
183 std::string path_element = p->list_at(i)->string_value();
184 if (path_element != "..")
185 target.path /= path_element;
187 verify_encoding(target);
188 if (target.path.is_complete())
189 return false;
190 return true;
193 bool extract_files(lazy_entry const& list, file_storage& target
194 , std::string const& root_dir)
196 if (list.type() != lazy_entry::list_t) return false;
197 for (int i = 0, end(list.list_size()); i < end; ++i)
199 file_entry e;
200 if (!extract_single_file(*list.list_at(i), e, root_dir))
201 return false;
202 target.add_file(e);
204 return true;
208 namespace libtorrent
211 int load_file(fs::path const& filename, std::vector<char>& v)
213 file f;
214 error_code ec;
215 if (!f.open(filename, file::in, ec)) return -1;
216 f.seek(0, file::end, ec);
217 if (ec) return -1;
218 size_type s = f.tell(ec);
219 if (ec) return -1;
220 if (s > 5000000) return -2;
221 v.resize(s);
222 f.seek(0, file::begin, ec);
223 if (ec) return -1;
224 size_type read = f.read(&v[0], s, ec);
225 if (read != s) return -3;
226 if (ec) return -3;
227 return 0;
230 // standard constructor that parses a torrent file
231 torrent_info::torrent_info(entry const& torrent_file)
232 : m_creation_date(pt::ptime(pt::not_a_date_time))
233 , m_multifile(false)
234 , m_private(false)
235 , m_info_section_size(0)
236 , m_piece_hashes(0)
238 std::vector<char> tmp;
239 std::back_insert_iterator<std::vector<char> > out(tmp);
240 bencode(out, torrent_file);
242 lazy_entry e;
243 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
244 std::string error;
245 #ifndef BOOST_NO_EXCEPTIONS
246 if (!parse_torrent_file(e, error))
247 throw invalid_torrent_file();
248 #else
249 parse_torrent_file(e, error);
250 #endif
253 torrent_info::torrent_info(lazy_entry const& torrent_file)
254 : m_creation_date(pt::ptime(pt::not_a_date_time))
255 , m_multifile(false)
256 , m_private(false)
257 , m_info_section_size(0)
258 , m_piece_hashes(0)
260 std::string error;
261 #ifndef BOOST_NO_EXCEPTIONS
262 if (!parse_torrent_file(torrent_file, error))
263 throw invalid_torrent_file();
264 #else
265 parse_torrent_file(torrent_file, error);
266 #endif
269 torrent_info::torrent_info(char const* buffer, int size)
270 : m_creation_date(pt::ptime(pt::not_a_date_time))
271 , m_multifile(false)
272 , m_private(false)
273 , m_info_section_size(0)
274 , m_piece_hashes(0)
276 std::string error;
277 lazy_entry e;
278 lazy_bdecode(buffer, buffer + size, e);
279 #ifndef BOOST_NO_EXCEPTIONS
280 if (!parse_torrent_file(e, error))
281 throw invalid_torrent_file();
282 #else
283 parse_torrent_file(e, error);
284 #endif
287 // constructor used for creating new torrents
288 // will not contain any hashes, comments, creation date
289 // just the necessary to use it with piece manager
290 // used for torrents with no metadata
291 torrent_info::torrent_info(sha1_hash const& info_hash)
292 : m_info_hash(info_hash)
293 , m_creation_date(pt::second_clock::universal_time())
294 , m_multifile(false)
295 , m_private(false)
296 , m_info_section_size(0)
297 , m_piece_hashes(0)
300 torrent_info::torrent_info(fs::path const& filename)
301 : m_creation_date(pt::ptime(pt::not_a_date_time))
302 , m_multifile(false)
303 , m_private(false)
305 std::vector<char> buf;
306 int ret = load_file(filename, buf);
307 if (ret < 0) return;
309 lazy_entry e;
310 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
311 std::string error;
312 #ifndef BOOST_NO_EXCEPTIONS
313 if (!parse_torrent_file(e, error))
314 throw invalid_torrent_file();
315 #else
316 parse_torrent_file(e, error);
317 #endif
320 torrent_info::~torrent_info()
323 void torrent_info::swap(torrent_info& ti)
325 using std::swap;
326 m_urls.swap(ti.m_urls);
327 m_url_seeds.swap(ti.m_url_seeds);
328 m_files.swap(ti.m_files);
329 m_nodes.swap(ti.m_nodes);
330 swap(m_info_hash, ti.m_info_hash);
331 swap(m_creation_date, ti.m_creation_date);
332 m_comment.swap(ti.m_comment);
333 m_created_by.swap(ti.m_created_by);
334 swap(m_multifile, ti.m_multifile);
335 swap(m_private, ti.m_private);
336 swap(m_info_section, ti.m_info_section);
337 swap(m_info_section_size, ti.m_info_section_size);
338 swap(m_piece_hashes, ti.m_piece_hashes);
339 swap(m_info_dict, ti.m_info_dict);
342 bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
344 if (info.type() != lazy_entry::dict_t)
346 error = "'info' entry is not a dictionary";
347 return false;
350 // hash the info-field to calculate info-hash
351 hasher h;
352 std::pair<char const*, int> section = info.data_section();
353 h.update(section.first, section.second);
354 m_info_hash = h.final();
356 // copy the info section
357 m_info_section_size = section.second;
358 m_info_section.reset(new char[m_info_section_size]);
359 memcpy(m_info_section.get(), section.first, m_info_section_size);
360 TORRENT_ASSERT(section.first[0] == 'd');
361 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
363 // extract piece length
364 int piece_length = info.dict_find_int_value("piece length", -1);
365 if (piece_length <= 0)
367 error = "invalid or missing 'piece length' entry in torrent file";
368 return false;
370 m_files.set_piece_length(piece_length);
372 // extract file name (or the directory name if it's a multifile libtorrent)
373 std::string name = info.dict_find_string_value("name.utf-8");
374 if (name.empty()) name = info.dict_find_string_value("name");
375 if (name.empty())
377 error = "missing name in torrent file";
378 return false;
381 fs::path tmp = name;
382 if (tmp.is_complete())
384 name = tmp.leaf();
386 else if (tmp.has_branch_path())
388 fs::path p;
389 for (fs::path::iterator i = tmp.begin()
390 , end(tmp.end()); i != end; ++i)
392 if (*i == "." || *i == "..") continue;
393 p /= *i;
395 name = p.string();
397 if (name == ".." || name == ".")
399 error = "invalid 'name' of torrent (possible exploit attempt)";
400 return false;
403 // extract file list
404 lazy_entry const* i = info.dict_find_list("files");
405 if (i == 0)
407 // if there's no list of files, there has to be a length
408 // field.
409 file_entry e;
410 e.path = name;
411 e.offset = 0;
412 e.size = info.dict_find_int_value("length", -1);
413 if (e.size < 0)
415 error = "invalid length of torrent";
416 return false;
418 m_files.add_file(e);
419 m_multifile = false;
421 else
423 if (!extract_files(*i, m_files, name))
425 error = "failed to parse files from torrent file";
426 return false;
428 m_multifile = true;
430 m_files.set_name(name);
432 // extract sha-1 hashes for all pieces
433 // we want this division to round upwards, that's why we have the
434 // extra addition
436 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
437 / m_files.piece_length()));
439 lazy_entry const* pieces = info.dict_find("pieces");
440 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
442 error = "invalid or missing 'pieces' entry in torrent file";
443 return false;
446 if (pieces->string_length() != m_files.num_pieces() * 20)
448 error = "incorrect number of piece hashes in torrent file";
449 return false;
452 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
453 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
454 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
456 m_private = info.dict_find_int_value("private", 0);
457 return true;
460 bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
462 if (torrent_file.type() != lazy_entry::dict_t)
464 error = "torrent file is not a dictionary";
465 return false;
468 // extract the url of the tracker
469 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
470 if (i)
472 m_urls.reserve(i->list_size());
473 for (int j = 0, end(i->list_size()); j < end; ++j)
475 lazy_entry const* tier = i->list_at(j);
476 if (tier->type() != lazy_entry::list_t) continue;
477 for (int k = 0, end(tier->list_size()); k < end; ++k)
479 announce_entry e(tier->list_string_value_at(k));
480 if (e.url.empty()) continue;
481 e.tier = j;
482 m_urls.push_back(e);
486 // shuffle each tier
487 std::vector<announce_entry>::iterator start = m_urls.begin();
488 std::vector<announce_entry>::iterator stop;
489 int current_tier = m_urls.front().tier;
490 for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
492 if (stop->tier != current_tier)
494 std::random_shuffle(start, stop);
495 start = stop;
496 current_tier = stop->tier;
499 std::random_shuffle(start, stop);
503 if (m_urls.empty())
505 announce_entry e(torrent_file.dict_find_string_value("announce"));
506 if (!e.url.empty()) m_urls.push_back(e);
509 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
510 if (nodes)
512 for (int i = 0, end(nodes->list_size()); i < end; ++i)
514 lazy_entry const* n = nodes->list_at(i);
515 if (n->type() != lazy_entry::list_t
516 || n->list_size() < 2
517 || n->list_at(0)->type() != lazy_entry::string_t
518 || n->list_at(1)->type() != lazy_entry::int_t)
519 continue;
520 m_nodes.push_back(std::make_pair(
521 n->list_at(0)->string_value()
522 , int(n->list_at(1)->int_value())));
526 // extract creation date
527 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
528 if (cd >= 0)
530 m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
531 + pt::seconds(long(cd));
534 // if there are any url-seeds, extract them
535 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
536 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
538 m_url_seeds.push_back(url_seeds->string_value());
540 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
542 for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
544 lazy_entry const* url = url_seeds->list_at(i);
545 if (url->type() != lazy_entry::string_t) continue;
546 m_url_seeds.push_back(url->string_value());
550 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
551 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
553 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
554 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
556 lazy_entry const* info = torrent_file.dict_find_dict("info");
557 if (info == 0)
559 error = "missing or invalid 'info' section in torrent file";
560 return false;
562 return parse_info_section(*info, error);
565 boost::optional<pt::ptime>
566 torrent_info::creation_date() const
568 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
570 return boost::optional<pt::ptime>(m_creation_date);
572 return boost::optional<pt::ptime>();
575 void torrent_info::add_tracker(std::string const& url, int tier)
577 announce_entry e(url);
578 e.tier = tier;
579 m_urls.push_back(e);
581 using boost::bind;
582 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
583 , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
586 // ------- start deprecation -------
588 void torrent_info::print(std::ostream& os) const
590 os << "trackers:\n";
591 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
592 i != trackers().end(); ++i)
594 os << i->tier << ": " << i->url << "\n";
596 if (!m_comment.empty())
597 os << "comment: " << m_comment << "\n";
598 // if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
599 // os << "creation date: " << to_simple_string(m_creation_date) << "\n";
600 os << "private: " << (m_private?"yes":"no") << "\n";
601 os << "number of pieces: " << num_pieces() << "\n";
602 os << "piece length: " << piece_length() << "\n";
603 os << "files:\n";
604 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
605 os << " " << std::setw(11) << i->size << " " << i->path.string() << "\n";
608 // ------- end deprecation -------