1 #include "ace/OS_NS_string.h"
2 #include "ace/Truncate.h"
3 #include "URL_Visitor.h"
4 #include "Command_Processor.h"
7 URL_Processing_Strategy::URL_Processing_Strategy (URL
&url
,
8 URL_Iterator
&iterator
)
14 URL_Processing_Strategy::~URL_Processing_Strategy ()
19 URL_Processing_Strategy::destroy ()
26 URL_Download_Strategy::URL_Download_Strategy (URL
&url
,
27 URL_Iterator
&iterator
)
28 : URL_Processing_Strategy (url
, iterator
)
33 URL_Download_Strategy::execute ()
37 // Extract all the contents of the Stream and print them to the
39 while (this->iterator_
.next (buffer
) != 0)
47 HTTP_Header_Processing_Strategy::HTTP_Header_Processing_Strategy (URL
&url
,
48 URL_Iterator
&iterator
)
49 : URL_Processing_Strategy (url
, iterator
)
54 HTTP_Header_Processing_Strategy::execute ()
56 // Set the get() position.Necessary since later a peek is done.
57 if (this->url_
.stream ().get_char () == 0)
58 ACE_ERROR_RETURN ((LM_ERROR
,
59 "%p\n","Header Not Found"),
61 char line_buf
[BUFSIZ
+ 1];
62 ACE_CString
line (line_buf
);
63 // Get the lines in the header iteratively and check for status info.
64 int result
= 1, i
= 0;
65 for (i
= 0, result
= this->iterator_
.next (line
);
67 ++i
, result
= this->iterator_
.next (line
))
71 // Assuming that the status-no is a space away.
73 ACE_Utils::truncate_cast
<int> (line
.find ("HTTP", 0));
74 ACE_CString status
= line
.substring (status_index
+ 9, //HTTP/1.1 200
77 URL_Status
*url_status
= 0;
78 ACE_NEW_RETURN (url_status
,
81 Auto_Destroyer
<URL_Status
> url_status_ptr (url_status
);
82 url_status_ptr
->status (ACE_OS::atoi (status
.c_str ()));
83 this->url_
.reply_status (**url_status_ptr
);
85 if (url_status_ptr
->status () != 200)
90 if (line
.find ("text/html") != ACE_CString::npos
)
92 ACE_CString
url_content_type("text/html");
93 this->url_
.content_type (url_content_type
);
100 HTML_Body_Validation_Strategy::HTML_Body_Validation_Strategy (URL
&url
,
101 URL_Iterator
&iterator
,
102 URL_Validation_Visitor
&context
)
103 : URL_Processing_Strategy (url
, iterator
),
104 visitor_context_ (context
)
109 HTML_Body_Validation_Strategy::execute ()
111 char host_name_buf
[BUFSIZ
+ 1];
112 ACE_CString
host_name (host_name_buf
);
113 host_name
.set (url_
.url_addr ().get_host_name (),1);
115 // All to facilitate relative paths
116 char temp
[BUFSIZ
+ 1];
117 ACE_CString
prev_location (temp
);
119 prev_location
.set (ACE_TEXT_ALWAYS_CHAR (this->url_
.url_addr ().get_path_name ()),
120 ACE_OS::strlen (this->url_
.url_addr ().get_path_name ()),
123 ACE_Utils::truncate_cast
<int> (
124 prev_location
.rfind ('/', prev_location
.length ()));
125 ACE_CString str
= prev_location
.substring (0, index
+ 1);
126 prev_location
.set (str
.c_str (), 1);
128 // Note: prev_location always ends with '/'
129 if (prev_location
[0] != '/')
130 prev_location
= "/" + prev_location
;
132 // Build the url portion which can be attached to teh relative paths.
133 prev_location
= host_name
+ prev_location
;
135 char url_string
[BUFSIZ
+ 1];
136 ACE_CString
url (url_string
);
138 while (this->iterator_
.next (url
) > 0)
140 // Check for relative urls.Strip out "http://" if its there.
141 if (url
.find ("http") == url
.npos
)
143 if (url
[0] == '.' && url
[1] == '.')
145 url
.set (&url
[3], 1);
147 ACE_Utils::truncate_cast
<int> (
148 prev_location
.rfind ('/', prev_location
.length () - 1));
149 prev_location
= prev_location
.substring (0, i
+1);
151 if (url
[0] == '.' && url
[1] == '/')
152 url
.set (&url
[2], 1);
154 url
= prev_location
+ url
;
157 url
.set (&url
[7], 1);
158 // Double slash at the end works!e.g www.cs.wustl.edu/~kirthika//
159 if (url
.find (".html") == url
.npos
)
162 // Create the new URL address.
163 ACE_URL_Addr
*url_addr
;
164 ACE_NEW_RETURN (url_addr
,
167 Auto_Destroyer
<ACE_URL_Addr
> url_addr_ptr (url_addr
);
168 if (url_addr_ptr
->string_to_addr (ACE_TEXT_CHAR_TO_TCHAR (url
.c_str ())) == 0)
171 ACE_NEW_RETURN (http_url
,
172 HTTP_URL (**url_addr_ptr
,
173 dynamic_cast<HTTP_URL
*> (&this->url_
)),
175 URL_Command
*url_command
;
176 ACE_NEW_RETURN (url_command
,
177 URL_Command (http_url
),
180 OPTIONS::instance ()->command_processor ()->insert (url_command
);
187 URL_Validation_Visitation_Strategy_Factory::make_header_iterator ()
191 HTTP_Header_Iterator (*this->url_
),
197 URL_Validation_Visitation_Strategy_Factory::make_body_iterator ()
201 HTML_Body_Iterator (*this->url_
),
206 URL_Processing_Strategy
*
207 URL_Validation_Visitation_Strategy_Factory::make_header_strategy (URL_Iterator
&iterator
)
209 URL_Processing_Strategy
*ps
;
211 HTTP_Header_Processing_Strategy (*this->url_
,
217 URL_Processing_Strategy
*
218 URL_Validation_Visitation_Strategy_Factory::make_body_strategy (URL_Iterator
&iterator
)
220 URL_Processing_Strategy
*ps
;
222 HTML_Body_Validation_Strategy (*this->url_
,
224 this->visitor_context_
),
230 URL_Validation_Visitation_Strategy_Factory::destroy ()
237 URL_Visitor::~URL_Visitor ()
241 URL_Validation_Visitor::URL_Validation_Visitor ()
243 ACE_NEW (this->caching_connect_strategy_
,
244 CACHED_CONNECT_STRATEGY (this->caching_strategy_
));
245 ACE_NEW (this->strat_connector_
,
246 STRATEGY_CONNECTOR(0,
248 caching_connect_strategy_
,
249 &activation_strategy_
));
250 if (strat_connector_
== 0)
251 ACE_ERROR ((LM_ERROR
,
253 "strategy connector creation failed"));
257 URL_Validation_Visitor::~URL_Validation_Visitor ()
259 this->strat_connector_
= 0;
260 if (this->caching_connect_strategy_
!= 0)
261 delete this->caching_connect_strategy_
;
264 URL_Validation_Visitor::URL_CACHE
&
265 URL_Validation_Visitor::url_cache ()
267 return this->url_cache_
;
271 URL_Validation_Visitor::in_cache (const ACE_URL_Addr
&url_addr
)
273 URL_Status
reply_status (URL_Status::STATUS_CODE (1));
275 if (this->url_cache_
.find (url_addr
, reply_status
) == 0)
277 ACE_DEBUG ((LM_DEBUG
,
278 "status %d for URL %s (cached)\n",
279 reply_status
.status (),
280 url_addr
.addr_to_string (0)));
283 if (reply_status
.status () != 200)
292 URL_Visitation_Strategy_Factory
*
293 URL_Validation_Visitor::make_visitation_strategy_factory (URL
&url
)
295 // Since this is HTTP 1.1 we'll need to establish a connection
296 // only once. Trying for relative paths.
298 if (url
.stream ().open (this->strat_connector_
,
299 url
.url_addr ()) == -1)
302 // See if we can get connected and send the GET request via the
304 int result
= url
.send_request ();
307 ACE_ERROR ((LM_ERROR
,
310 if (this->url_cache_
.bind (url
.url_addr (),
311 URL_Status (URL_Status::STATUS_SERVICE_UNAVAILABLE
)) == -1)
312 ACE_ERROR ((LM_ERROR
,
317 // @@ Here's where we could check to see if the <url> was HTTP or
318 // FTP, etc. But for now we'll just assume that everything is an
322 URL_Visitation_Strategy_Factory
*vs
;
324 URL_Validation_Visitation_Strategy_Factory (&url
,
332 URL_Validation_Visitor::destroy ()
334 delete this->strat_connector_
;
341 URL_Validation_Visitor::visit (HTTP_URL
&http_url
)
343 int result
= this->in_cache (http_url
.url_addr ());
346 Auto_Destroyer
<URL_Visitation_Strategy_Factory
> vs (this->make_visitation_strategy_factory (http_url
));
349 ACE_ERROR_RETURN ((LM_ERROR
,
351 "make_visitation_strategy_factory"),
354 Auto_Destroyer
<URL_Iterator
> ihs (vs
->make_header_iterator ());
356 ACE_ERROR_RETURN ((LM_ERROR
,
358 "make_header_iterator"),
360 Auto_Destroyer
<URL_Processing_Strategy
> phs (vs
->make_header_strategy (**ihs
));
362 ACE_ERROR_RETURN ((LM_ERROR
,
364 "make_header_strategy"),
366 int phs_result
= phs
->execute ();
367 if (phs_result
== -1)
368 ACE_DEBUG ((LM_DEBUG
,
371 ACE_DEBUG ((LM_DEBUG
,
372 "URL with status %d %s\n",
373 http_url
.reply_status ().status (),
374 http_url
.url_addr().addr_to_string (0)));
376 // Store the http url in the cache.
377 if (this->url_cache ().bind (http_url
.url_addr (),
378 http_url
.reply_status ()) != 0)
379 ACE_ERROR_RETURN ((LM_ERROR
,
380 "%p\n","url_cache.bind"),
383 // Since it is invalid dont go further.
384 if (phs_result
== -1)
387 // Get back if the recurse option isnt set.
388 if (OPTIONS::instance ()->recurse () != 1)
391 Auto_Destroyer
<URL_Iterator
> is (vs
->make_body_iterator ());
393 ACE_ERROR_RETURN ((LM_ERROR
,
395 "make_body_iterator"),
398 Auto_Destroyer
<URL_Processing_Strategy
> ps (vs
->make_body_strategy (**is
));
400 ACE_ERROR_RETURN ((LM_ERROR
,
402 "make_body_strategy"),
405 if (ps
->execute () == -1)
406 ACE_ERROR_RETURN ((LM_ERROR
,
415 URL_Download_Visitation_Strategy_Factory::destroy ()
423 URL_Download_Visitation_Strategy_Factory::make_header_iterator ()
429 URL_Download_Visitation_Strategy_Factory::make_body_iterator ()
433 URL_Download_Iterator (*this->url_
),
438 URL_Processing_Strategy
*
439 URL_Download_Visitation_Strategy_Factory::make_header_strategy (URL_Iterator
&iterator
)
442 ACE_UNUSED_ARG (iterator
);
447 URL_Processing_Strategy
*
448 URL_Download_Visitation_Strategy_Factory::make_body_strategy (URL_Iterator
&iterator
)
450 URL_Processing_Strategy
*ps
;
452 URL_Download_Strategy (*this->url_
,
458 URL_Visitation_Strategy_Factory::URL_Visitation_Strategy_Factory (URL
*url
)
463 URL_Visitation_Strategy_Factory::~URL_Visitation_Strategy_Factory ()
467 URL_Download_Visitation_Strategy_Factory::URL_Download_Visitation_Strategy_Factory (URL
*url
)
468 : URL_Visitation_Strategy_Factory (url
)
472 URL_Validation_Visitation_Strategy_Factory::URL_Validation_Visitation_Strategy_Factory (URL
*url
,
473 URL_Validation_Visitor
&visitor_context
)
474 : URL_Visitation_Strategy_Factory (url
),
475 visitor_context_ (visitor_context
)
479 URL_Visitation_Strategy_Factory
*
480 URL_Download_Visitor::make_visitation_strategy_factory (URL
&url
)
482 // See if we can get connected and send the GET request via the
486 int retval
= url
.send_request ();
490 // @@ Here's where we could check to see if the <url> was HTTP or
491 // FTP, etc. But for now we'll just assume that everything is an
493 URL_Visitation_Strategy_Factory
*vs
;
495 URL_Download_Visitation_Strategy_Factory (&url
),
501 URL_Download_Visitor::destroy ()
509 URL_Download_Visitor::visit (HTTP_URL
&http_url
)
511 Auto_Destroyer
<URL_Visitation_Strategy_Factory
> vs (this->make_visitation_strategy_factory (http_url
));
514 ACE_ERROR_RETURN ((LM_ERROR
,
516 "make_visitation_strategy_factory"),
519 Auto_Destroyer
<URL_Iterator
> is (vs
->make_body_iterator ());
521 ACE_ERROR_RETURN ((LM_ERROR
,
523 "make_body_iterator"),
526 Auto_Destroyer
<URL_Processing_Strategy
> ps (vs
->make_body_strategy (**is
));
528 ACE_ERROR_RETURN ((LM_ERROR
,
530 "make_body_strategy"),
533 if (ps
->execute () == -1)
534 ACE_ERROR_RETURN ((LM_ERROR
,