added some precautionary checks in bdecoder
[libtorrent.git] / src / torrent_info.cpp
blob0c3e0fe5dd462595f9c7cc1c8239eb362f7bca68
1 /*
3 Copyright (c) 2003-2008, Arvid Norberg
4 All rights reserved.
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in
14 the documentation and/or other materials provided with the distribution.
15 * Neither the name of the author nor the names of its
16 contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 POSSIBILITY OF SUCH DAMAGE.
33 #include "libtorrent/pch.hpp"
35 #include <ctime>
36 #include <iostream>
37 #include <fstream>
38 #include <iomanip>
39 #include <iterator>
40 #include <algorithm>
41 #include <set>
43 #ifdef _MSC_VER
44 #pragma warning(push, 1)
45 #endif
47 #include <boost/lexical_cast.hpp>
48 #include <boost/filesystem/path.hpp>
49 #include <boost/filesystem.hpp>
50 #include <boost/bind.hpp>
52 #ifdef _MSC_VER
53 #pragma warning(pop)
54 #endif
56 #include "libtorrent/torrent_info.hpp"
57 #include "libtorrent/bencode.hpp"
58 #include "libtorrent/hasher.hpp"
59 #include "libtorrent/entry.hpp"
60 #include "libtorrent/file.hpp"
62 namespace gr = boost::gregorian;
64 using namespace libtorrent;
66 namespace
69 namespace fs = boost::filesystem;
71 void convert_to_utf8(std::string& str, unsigned char chr)
73 str += 0xc0 | ((chr & 0xff) >> 6);
74 str += 0x80 | (chr & 0x3f);
77 void verify_encoding(file_entry& target)
79 std::string tmp_path;
80 std::string file_path = target.path.string();
81 bool valid_encoding = true;
82 for (std::string::iterator i = file_path.begin()
83 , end(file_path.end()); i != end; ++i)
85 // valid ascii-character
86 if ((*i & 0x80) == 0)
88 tmp_path += *i;
89 continue;
92 if (std::distance(i, end) < 2)
94 convert_to_utf8(tmp_path, *i);
95 valid_encoding = false;
96 continue;
99 // valid 2-byte utf-8 character
100 if ((i[0] & 0xe0) == 0xc0
101 && (i[1] & 0xc0) == 0x80)
103 tmp_path += i[0];
104 tmp_path += i[1];
105 i += 1;
106 continue;
109 if (std::distance(i, end) < 3)
111 convert_to_utf8(tmp_path, *i);
112 valid_encoding = false;
113 continue;
116 // valid 3-byte utf-8 character
117 if ((i[0] & 0xf0) == 0xe0
118 && (i[1] & 0xc0) == 0x80
119 && (i[2] & 0xc0) == 0x80)
121 tmp_path += i[0];
122 tmp_path += i[1];
123 tmp_path += i[2];
124 i += 2;
125 continue;
128 if (std::distance(i, end) < 4)
130 convert_to_utf8(tmp_path, *i);
131 valid_encoding = false;
132 continue;
135 // valid 4-byte utf-8 character
136 if ((i[0] & 0xf0) == 0xe0
137 && (i[1] & 0xc0) == 0x80
138 && (i[2] & 0xc0) == 0x80
139 && (i[3] & 0xc0) == 0x80)
141 tmp_path += i[0];
142 tmp_path += i[1];
143 tmp_path += i[2];
144 tmp_path += i[3];
145 i += 3;
146 continue;
149 convert_to_utf8(tmp_path, *i);
150 valid_encoding = false;
152 // the encoding was not valid utf-8
153 // save the original encoding and replace the
154 // commonly used path with the correctly
155 // encoded string
156 if (!valid_encoding) target.path = tmp_path;
159 bool extract_single_file(lazy_entry const& dict, file_entry& target
160 , std::string const& root_dir)
162 lazy_entry const* length = dict.dict_find("length");
163 if (length == 0 || length->type() != lazy_entry::int_t)
164 return false;
165 target.size = length->int_value();
166 target.path = root_dir;
167 target.file_base = 0;
169 // prefer the name.utf-8
170 // because if it exists, it is more
171 // likely to be correctly encoded
173 lazy_entry const* p = dict.dict_find("path.utf-8");
174 if (p == 0 || p->type() != lazy_entry::list_t)
175 p = dict.dict_find("path");
176 if (p == 0 || p->type() != lazy_entry::list_t)
177 return false;
179 for (int i = 0, end(p->list_size()); i < end; ++i)
181 if (p->list_at(i)->type() != lazy_entry::string_t)
182 return false;
183 std::string path_element = p->list_at(i)->string_value();
184 if (path_element != "..")
185 target.path /= path_element;
187 verify_encoding(target);
188 if (target.path.is_complete())
189 return false;
190 return true;
193 bool extract_files(lazy_entry const& list, file_storage& target
194 , std::string const& root_dir)
196 if (list.type() != lazy_entry::list_t) return false;
197 for (int i = 0, end(list.list_size()); i < end; ++i)
199 file_entry e;
200 if (!extract_single_file(*list.list_at(i), e, root_dir))
201 return false;
202 target.add_file(e);
204 return true;
208 namespace libtorrent
211 int load_file(fs::path const& filename, std::vector<char>& v)
213 file f;
214 error_code ec;
215 if (!f.open(filename, file::in, ec)) return -1;
216 f.seek(0, file::end, ec);
217 if (ec) return -1;
218 size_type s = f.tell(ec);
219 if (ec) return -1;
220 if (s > 5000000) return -2;
221 v.resize(s);
222 f.seek(0, file::begin, ec);
223 if (ec) return -1;
224 size_type read = f.read(&v[0], s, ec);
225 if (read != s) return -3;
226 if (ec) return -3;
227 return 0;
230 #ifndef TORRENT_NO_DEPRECATE
231 // standard constructor that parses a torrent file
232 torrent_info::torrent_info(entry const& torrent_file)
233 : m_creation_date(pt::ptime(pt::not_a_date_time))
234 , m_multifile(false)
235 , m_private(false)
236 , m_info_section_size(0)
237 , m_piece_hashes(0)
239 std::vector<char> tmp;
240 std::back_insert_iterator<std::vector<char> > out(tmp);
241 bencode(out, torrent_file);
243 lazy_entry e;
244 lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
245 std::string error;
246 #ifndef BOOST_NO_EXCEPTIONS
247 if (!parse_torrent_file(e, error))
248 throw invalid_torrent_file();
249 #else
250 parse_torrent_file(e, error);
251 #endif
253 #endif
255 torrent_info::torrent_info(lazy_entry const& torrent_file)
256 : m_creation_date(pt::ptime(pt::not_a_date_time))
257 , m_multifile(false)
258 , m_private(false)
259 , m_info_section_size(0)
260 , m_piece_hashes(0)
262 std::string error;
263 #ifndef BOOST_NO_EXCEPTIONS
264 if (!parse_torrent_file(torrent_file, error))
265 throw invalid_torrent_file();
266 #else
267 parse_torrent_file(torrent_file, error);
268 #endif
271 torrent_info::torrent_info(char const* buffer, int size)
272 : m_creation_date(pt::ptime(pt::not_a_date_time))
273 , m_multifile(false)
274 , m_private(false)
275 , m_info_section_size(0)
276 , m_piece_hashes(0)
278 std::string error;
279 lazy_entry e;
280 lazy_bdecode(buffer, buffer + size, e);
281 #ifndef BOOST_NO_EXCEPTIONS
282 if (!parse_torrent_file(e, error))
283 throw invalid_torrent_file();
284 #else
285 parse_torrent_file(e, error);
286 #endif
289 // constructor used for creating new torrents
290 // will not contain any hashes, comments, creation date
291 // just the necessary to use it with piece manager
292 // used for torrents with no metadata
293 torrent_info::torrent_info(sha1_hash const& info_hash)
294 : m_info_hash(info_hash)
295 , m_creation_date(pt::second_clock::universal_time())
296 , m_multifile(false)
297 , m_private(false)
298 , m_info_section_size(0)
299 , m_piece_hashes(0)
302 torrent_info::torrent_info(fs::path const& filename)
303 : m_creation_date(pt::ptime(pt::not_a_date_time))
304 , m_multifile(false)
305 , m_private(false)
307 std::vector<char> buf;
308 int ret = load_file(filename, buf);
309 if (ret < 0) return;
311 lazy_entry e;
312 lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
313 std::string error;
314 #ifndef BOOST_NO_EXCEPTIONS
315 if (!parse_torrent_file(e, error))
316 throw invalid_torrent_file();
317 #else
318 parse_torrent_file(e, error);
319 #endif
322 torrent_info::~torrent_info()
325 void torrent_info::swap(torrent_info& ti)
327 using std::swap;
328 m_urls.swap(ti.m_urls);
329 m_url_seeds.swap(ti.m_url_seeds);
330 m_files.swap(ti.m_files);
331 m_nodes.swap(ti.m_nodes);
332 swap(m_info_hash, ti.m_info_hash);
333 swap(m_creation_date, ti.m_creation_date);
334 m_comment.swap(ti.m_comment);
335 m_created_by.swap(ti.m_created_by);
336 swap(m_multifile, ti.m_multifile);
337 swap(m_private, ti.m_private);
338 swap(m_info_section, ti.m_info_section);
339 swap(m_info_section_size, ti.m_info_section_size);
340 swap(m_piece_hashes, ti.m_piece_hashes);
341 swap(m_info_dict, ti.m_info_dict);
344 bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
346 if (info.type() != lazy_entry::dict_t)
348 error = "'info' entry is not a dictionary";
349 return false;
352 // hash the info-field to calculate info-hash
353 hasher h;
354 std::pair<char const*, int> section = info.data_section();
355 h.update(section.first, section.second);
356 m_info_hash = h.final();
358 // copy the info section
359 m_info_section_size = section.second;
360 m_info_section.reset(new char[m_info_section_size]);
361 memcpy(m_info_section.get(), section.first, m_info_section_size);
362 TORRENT_ASSERT(section.first[0] == 'd');
363 TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
365 // extract piece length
366 int piece_length = info.dict_find_int_value("piece length", -1);
367 if (piece_length <= 0)
369 error = "invalid or missing 'piece length' entry in torrent file";
370 return false;
372 m_files.set_piece_length(piece_length);
374 // extract file name (or the directory name if it's a multifile libtorrent)
375 std::string name = info.dict_find_string_value("name.utf-8");
376 if (name.empty()) name = info.dict_find_string_value("name");
377 if (name.empty())
379 error = "missing name in torrent file";
380 return false;
383 fs::path tmp = name;
384 if (tmp.is_complete())
386 name = tmp.leaf();
388 else if (tmp.has_branch_path())
390 fs::path p;
391 for (fs::path::iterator i = tmp.begin()
392 , end(tmp.end()); i != end; ++i)
394 if (*i == "." || *i == "..") continue;
395 p /= *i;
397 name = p.string();
399 if (name == ".." || name == ".")
401 error = "invalid 'name' of torrent (possible exploit attempt)";
402 return false;
405 // extract file list
406 lazy_entry const* i = info.dict_find_list("files");
407 if (i == 0)
409 // if there's no list of files, there has to be a length
410 // field.
411 file_entry e;
412 e.path = name;
413 e.offset = 0;
414 e.size = info.dict_find_int_value("length", -1);
415 if (e.size < 0)
417 error = "invalid length of torrent";
418 return false;
420 m_files.add_file(e);
421 m_multifile = false;
423 else
425 if (!extract_files(*i, m_files, name))
427 error = "failed to parse files from torrent file";
428 return false;
430 m_multifile = true;
432 m_files.set_name(name);
434 // extract sha-1 hashes for all pieces
435 // we want this division to round upwards, that's why we have the
436 // extra addition
438 m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
439 / m_files.piece_length()));
441 lazy_entry const* pieces = info.dict_find("pieces");
442 if (pieces == 0 || pieces->type() != lazy_entry::string_t)
444 error = "invalid or missing 'pieces' entry in torrent file";
445 return false;
448 if (pieces->string_length() != m_files.num_pieces() * 20)
450 error = "incorrect number of piece hashes in torrent file";
451 return false;
454 m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
455 TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
456 TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
458 m_private = info.dict_find_int_value("private", 0);
459 return true;
462 bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
464 if (torrent_file.type() != lazy_entry::dict_t)
466 error = "torrent file is not a dictionary";
467 return false;
470 // extract the url of the tracker
471 lazy_entry const* i = torrent_file.dict_find_list("announce-list");
472 if (i)
474 m_urls.reserve(i->list_size());
475 for (int j = 0, end(i->list_size()); j < end; ++j)
477 lazy_entry const* tier = i->list_at(j);
478 if (tier->type() != lazy_entry::list_t) continue;
479 for (int k = 0, end(tier->list_size()); k < end; ++k)
481 announce_entry e(tier->list_string_value_at(k));
482 if (e.url.empty()) continue;
483 e.tier = j;
484 m_urls.push_back(e);
488 // shuffle each tier
489 std::vector<announce_entry>::iterator start = m_urls.begin();
490 std::vector<announce_entry>::iterator stop;
491 int current_tier = m_urls.front().tier;
492 for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
494 if (stop->tier != current_tier)
496 std::random_shuffle(start, stop);
497 start = stop;
498 current_tier = stop->tier;
501 std::random_shuffle(start, stop);
505 if (m_urls.empty())
507 announce_entry e(torrent_file.dict_find_string_value("announce"));
508 if (!e.url.empty()) m_urls.push_back(e);
511 lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
512 if (nodes)
514 for (int i = 0, end(nodes->list_size()); i < end; ++i)
516 lazy_entry const* n = nodes->list_at(i);
517 if (n->type() != lazy_entry::list_t
518 || n->list_size() < 2
519 || n->list_at(0)->type() != lazy_entry::string_t
520 || n->list_at(1)->type() != lazy_entry::int_t)
521 continue;
522 m_nodes.push_back(std::make_pair(
523 n->list_at(0)->string_value()
524 , int(n->list_at(1)->int_value())));
528 // extract creation date
529 size_type cd = torrent_file.dict_find_int_value("creation date", -1);
530 if (cd >= 0)
532 m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
533 + pt::seconds(long(cd));
536 // if there are any url-seeds, extract them
537 lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
538 if (url_seeds && url_seeds->type() == lazy_entry::string_t)
540 m_url_seeds.push_back(url_seeds->string_value());
542 else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
544 for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
546 lazy_entry const* url = url_seeds->list_at(i);
547 if (url->type() != lazy_entry::string_t) continue;
548 m_url_seeds.push_back(url->string_value());
552 m_comment = torrent_file.dict_find_string_value("comment.utf-8");
553 if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
555 m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
556 if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
558 lazy_entry const* info = torrent_file.dict_find_dict("info");
559 if (info == 0)
561 error = "missing or invalid 'info' section in torrent file";
562 return false;
564 return parse_info_section(*info, error);
567 boost::optional<pt::ptime>
568 torrent_info::creation_date() const
570 if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
572 return boost::optional<pt::ptime>(m_creation_date);
574 return boost::optional<pt::ptime>();
577 void torrent_info::add_tracker(std::string const& url, int tier)
579 announce_entry e(url);
580 e.tier = tier;
581 m_urls.push_back(e);
583 using boost::bind;
584 std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
585 , bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
588 #ifndef TORRENT_NO_DEPRECATE
589 // ------- start deprecation -------
591 void torrent_info::print(std::ostream& os) const
593 os << "trackers:\n";
594 for (std::vector<announce_entry>::const_iterator i = trackers().begin();
595 i != trackers().end(); ++i)
597 os << i->tier << ": " << i->url << "\n";
599 if (!m_comment.empty())
600 os << "comment: " << m_comment << "\n";
601 // if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
602 // os << "creation date: " << to_simple_string(m_creation_date) << "\n";
603 os << "private: " << (m_private?"yes":"no") << "\n";
604 os << "number of pieces: " << num_pieces() << "\n";
605 os << "piece length: " << piece_length() << "\n";
606 os << "files:\n";
607 for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
608 os << " " << std::setw(11) << i->size << " " << i->path.string() << "\n";
611 // ------- end deprecation -------
612 #endif