Merge pull request #2216 from jwillemsen/jwi-cxxversionchecks
[ACE_TAO.git] / ACE / examples / Web_Crawler / Iterators.cpp
blobd5edd7a5f12bb5ddf3589b9ea3f442c4d80e0d60
1 #include "ace/Truncate.h"
2 #include "Options.h"
3 #include "Iterators.h"
5 URL_Iterator::~URL_Iterator ()
9 int
10 URL_Iterator::destroy ()
12 // Commit suicide.
13 delete this;
14 return 0;
17 HTML_Body_Iterator::HTML_Body_Iterator (URL &url)
18 : url_ (url)
22 int
23 HTML_Body_Iterator::next (ACE_CString &url)
25 size_t len = BUFSIZ;
26 const char *buf;
27 ACE_CString buffer;
28 int href_index = 0;
30 for (buf = this->url_.stream ().recv (len);
31 buf != 0;
32 buf = this->url_.stream ().recv (len))
34 buffer.set (buf, BUFSIZ, 1);
36 href_index = ACE_Utils::truncate_cast<int> (buffer.find ("HREF"));
38 if (href_index < 0)
39 href_index = ACE_Utils::truncate_cast<int> (buffer.find ("href"));
41 // Grep fpr " and grab the string until end-"
42 if ( href_index > 0)
44 // Get back to buffer start location.
45 this->url_.stream ().seek (-1 * static_cast<ACE_OFF_T> (len),
46 SEEK_CUR);
48 int start_index =
49 ACE_Utils::truncate_cast<int> (
50 buffer.find ('\"', href_index));
52 if (start_index <= 0)
53 break;
55 start_index += href_index;
57 int end_index =
58 ACE_Utils::truncate_cast<int> (
59 buffer.find ('\"', start_index + 1));
61 if (end_index <= 0)
62 break;
64 end_index += start_index + 1;
66 ssize_t url_len = end_index - (start_index + 1);
68 ACE_CString temp = buffer.substring (start_index + 1,
69 url_len);
70 url.set (temp.c_str (), len, 1);
72 this->url_.stream ().seek (end_index + 1);
74 return url_len;
77 return 0;
80 HTTP_Header_Iterator::HTTP_Header_Iterator (URL &url)
81 : url_ (url),
82 end_of_header_ (0)
86 int
87 HTTP_Header_Iterator::next (ACE_CString &line)
89 if (this->end_of_header_)
90 return 0;
91 else
93 for (char c;
94 (c = this->url_.stream ().get_char ()) != (char)EOF;
97 // Check to see if we're at the end of the header line.
98 if (c == '\r' && this->url_.stream ().peek_char (0) == '\n')
100 line.set (this->url_.stream ().recv (),
101 this->url_.stream ().recv_len () - 1,
104 // Check to see if we're at the end of the header.
105 if (this->url_.stream ().peek_char (1) == '\r'
106 && this->url_.stream ().peek_char (2) == '\n')
108 this->end_of_header_ = 1;
109 // We're at the end of the header section.
110 this->url_.stream ().seek (3);
112 else
113 // We're at the end of the line.
114 this->url_.stream ().seek (1);
116 return 1;
118 // Handle broken Web servers that use '\n' instead of
119 // '\r\n'.
120 else if (c == '\n')
122 line.set (this->url_.stream ().recv (),
123 (this->url_.stream ().recv_len ()),
126 // Check to see if we're at the end of the header.
127 if (this->url_.stream ().peek_char (0) == '\n')
129 // We're at the end of the header section.
130 this->url_.stream ().seek (1);
131 this->end_of_header_ = 1;
134 return 1;
139 return 0;
142 URL_Download_Iterator::URL_Download_Iterator (URL &url)
143 : url_ (url)
148 URL_Download_Iterator::next (ACE_CString &buffer)
150 size_t len = BUFSIZ;
152 const char *buf = this->url_.stream ().recv (len);
155 if (buf == 0)
156 return 0;
157 else
159 buffer.set (buf, len, 1);
160 return 1;