Merge pull request #2216 from jwillemsen/jwi-cxxversionchecks
[ACE_TAO.git] / ACE / examples / Web_Crawler / URL_Visitor.h
bloba7b9590f1fb9b8b2be36bf07ef6a13bea35fa88b
1 /* -*- C++ -*- */
3 //=============================================================================
4 /**
5 * @file URL_Visitor.h
7 * @author Douglas C.Schmidt <d.schmidt@vanderbilt.edu> Kirthika Parameswaran <kirthika@cs.wustl.edu>
8 */
9 //=============================================================================
12 #ifndef _URL_VISITOR_H
13 #define _URL_VISITOR_H
14 #include /**/ "ace/pre.h"
16 #include "ace/Strategies_T.h"
18 #if !defined (ACE_LACKS_PRAGMA_ONCE)
19 #pragma once
20 #endif /* ACE_LACKS_PRAGMA_ONCE */
23 #include "HTTP_URL.h"
24 #include "Iterators.h"
25 #include "ace/Hash_Map_Manager_T.h"
26 #include "ace/Caching_Strategies_T.h"
27 #include "ace/Cached_Connect_Strategy_T.h"
28 #include "Options.h"
30 // Forward declarations.
31 class URL_Validation_Visitor;
33 /**
34 * @class URL_Processing_Strategy
36 * @brief Abstract base class for the URL processing strategy.
38 class URL_Processing_Strategy
40 public:
41 /// Constructor.
42 URL_Processing_Strategy (URL &,
43 URL_Iterator &);
45 virtual ~URL_Processing_Strategy ();
47 /// Perform the strategy.
48 virtual int execute () = 0;
50 virtual int destroy ();
52 // Close down the resources.
54 protected:
55 /// A reference to the URL "context" that we're processing.
56 URL &url_;
58 /// Iterator for the URL that we're processing.
59 URL_Iterator &iterator_;
62 /**
63 * @class HTTP_Header_Processing_Strategy
65 * @brief Defines the HTTP header processing strategy.
67 class HTTP_Header_Processing_Strategy : public URL_Processing_Strategy
69 public:
70 /// Constructor.
71 HTTP_Header_Processing_Strategy (URL &,
72 URL_Iterator &);
74 /// Perform the strategy for processing an HTTP header.
75 virtual int execute ();
78 /**
79 * @class HTML_Body_Validation_Strategy
81 * @brief Defines the HTML body processing strategy.
83 * This class iterates through the body of an HTML file and
84 * recursively visits embedded links.
86 class HTML_Body_Validation_Strategy : public URL_Processing_Strategy
88 public:
89 /// Constructor.
90 HTML_Body_Validation_Strategy (URL &,
91 URL_Iterator &,
92 URL_Validation_Visitor &);
94 /**
95 * Perform the strategy for processing an HTML file. This strategy
96 * iterates over the HTML file and recursively visits embedded links
97 * to process them, as well.
99 virtual int execute ();
101 private:
102 /// This is the context of the visit.
103 URL_Validation_Visitor &visitor_context_;
107 * @class URL_Download_Strategy
109 * @brief Defines a URL downloading strategy.
111 * This class downloads a URL's contents into a temporary file.
113 class URL_Download_Strategy : public URL_Processing_Strategy
115 public:
116 /// Constructor.
117 URL_Download_Strategy (URL &,
118 URL_Iterator &);
120 /// Perform the strategy for downloading a URL to a temporary file.
121 virtual int execute ();
125 * @class URL_Visitation_Strategy_Factory
127 * @brief Abstract Factory for the URL visitation strategy.
129 class URL_Visitation_Strategy_Factory
131 public:
132 URL_Visitation_Strategy_Factory (URL *);
134 /// Destructor.
135 virtual ~URL_Visitation_Strategy_Factory ();
137 // = Factory Methods.
138 /// Factory Method that makes the header iterator.
139 virtual URL_Iterator *make_header_iterator () = 0;
141 /// Factory Method that makes the body iterator.
142 virtual URL_Iterator *make_body_iterator () = 0;
144 /// Factory Method that makes the header processing strategy.
145 virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &) = 0;
147 /// Factory Method that makes the body processing strategy .
148 virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &) = 0;
150 /// Close down the resources.
151 virtual int destroy () = 0;
153 protected:
154 /// Stash the URL so we don't have to pass it around.
155 URL *url_;
159 * @class URL_Download_Visitation_Strategy_Factory
161 * @brief Concrete Factory for the URL validation visitation strategy.
163 class URL_Download_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
165 public:
166 /// Constructor.
167 URL_Download_Visitation_Strategy_Factory (URL *);
169 // = Factory Methods.
170 /// Factory Method that makes an <HTTP_Header_Iterator>.
171 virtual URL_Iterator *make_header_iterator ();
173 /// Factory Method that makes an <HTML_Body_Iterator>.
174 virtual URL_Iterator *make_body_iterator ();
176 /// Factory Method that makes the header processing strategy.
177 virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
179 /// Factory Method that makes the body processing strategy .
180 virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
182 /// Close down the resources.
183 virtual int destroy ();
187 * @class URL_Validation_Visitation_Strategy_Factory
189 * @brief Concrete Factory for the URL validation visitation strategy.
191 class URL_Validation_Visitation_Strategy_Factory : public URL_Visitation_Strategy_Factory
193 public:
194 /// Constructor.
195 URL_Validation_Visitation_Strategy_Factory (URL *,
196 URL_Validation_Visitor &);
198 // = Factory Methods.
199 /// Factory Method that makes an <HTTP_Header_Iterator>.
200 virtual URL_Iterator *make_header_iterator ();
202 /// Factory Method that makes an <HTML_Body_Iterator>.
203 virtual URL_Iterator *make_body_iterator ();
205 /// Factory Method that makes the header processing strategy.
206 virtual URL_Processing_Strategy *make_header_strategy (URL_Iterator &);
208 /// Factory Method that makes the body processing strategy .
209 virtual URL_Processing_Strategy *make_body_strategy (URL_Iterator &);
211 /// Close down the resources.
212 virtual int destroy ();
214 private:
215 /// Context of the visitor.
216 URL_Validation_Visitor &visitor_context_;
220 * @class URL_Visitor
222 * @brief Base class for the URL Visitor.
224 * This class plays the "visitor" role in the Visitor pattern.
226 class URL_Visitor
228 public:
229 virtual ~URL_Visitor ();
231 /// Visit an <HTTP_URL>.
232 virtual int visit (HTTP_URL &http_url) = 0;
234 // @@
235 // virtual int visit (FTP_URL &http_url) = 0;
237 /// Cleanup the resources.
238 virtual int destroy () = 0;
240 protected:
241 /// Make the appropriate <URL_Visitation_Strategy_Factory>.
242 virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &) = 0;
245 typedef int ATTRIBUTES;
246 typedef ACE_Svc_Handler <ACE_SOCK_STREAM, ACE_NULL_SYNCH>
247 Client_Svc_Handler;
248 typedef std::pair<Client_Svc_Handler *, ATTRIBUTES>
249 CACHED_HANDLER;
250 typedef ACE_Refcounted_Hash_Recyclable<ACE_INET_Addr>
251 ACE_ADDR;
252 typedef ACE_Hash<ACE_ADDR> H_KEY;
253 typedef ACE_Equal_To<ACE_ADDR> C_KEYS;
255 typedef ACE_Hash_Map_Manager_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
256 HASH_MAP;
257 typedef ACE_Hash_Map_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
258 HASH_MAP_ITERATOR;
259 typedef ACE_Hash_Map_Reverse_Iterator_Ex<ACE_ADDR, CACHED_HANDLER, H_KEY, C_KEYS, ACE_Null_Mutex>
260 HASH_MAP_REVERSE_ITERATOR;
262 typedef ACE_Recyclable_Handler_Cleanup_Strategy<ACE_ADDR, CACHED_HANDLER, HASH_MAP>
263 CLEANUP_STRATEGY;
264 typedef ACE_Recyclable_Handler_Caching_Utility<ACE_ADDR, CACHED_HANDLER, HASH_MAP, HASH_MAP_ITERATOR, ATTRIBUTES>
265 CACHING_UTILITY;
267 typedef ACE_LRU_Caching_Strategy<ATTRIBUTES, CACHING_UTILITY>
268 LRU_CACHING_STRATEGY;
270 typedef LRU_CACHING_STRATEGY
271 CACHING_STRATEGY;
273 typedef ACE_Strategy_Connector<Client_Svc_Handler, ACE_SOCK_CONNECTOR>
274 STRATEGY_CONNECTOR;
276 typedef ACE_NOOP_Creation_Strategy<Client_Svc_Handler>
277 NULL_CREATION_STRATEGY;
279 typedef ACE_NOOP_Concurrency_Strategy<Client_Svc_Handler>
280 NULL_ACTIVATION_STRATEGY;
282 typedef ACE_Cached_Connect_Strategy_Ex<Client_Svc_Handler, ACE_SOCK_CONNECTOR, CACHING_STRATEGY, ATTRIBUTES, ACE_SYNCH_NULL_MUTEX>
283 CACHED_CONNECT_STRATEGY;
286 * @class URL_Validation_Visitor
288 * @brief Subclass that defines the URL validation visitor.
290 * This class checks to make sure that the <HTTP_URL> is valid.
291 * If the <HTTP_URL> is an <HTML> file, it can also be used to
292 * recursively check that all embedded links in this file are
293 * valid.
295 class URL_Validation_Visitor : public URL_Visitor
297 public:
298 typedef ACE_Hash_Map_Manager <ACE_URL_Addr, URL_Status, ACE_Null_Mutex>
299 URL_CACHE;
302 * Visit an <HTTP_URL> to make sure that it's valid. If the content
303 * type of the <HTTP_URL> is "text/html" and the <recursion> option
304 * is enabled then <visit> recursively checks each link embedded in
305 * the HTML page.
307 virtual int visit (HTTP_URL &http_url);
309 // @@
310 // virtual int visit (FTP_URL &http_url);
312 /// Cleanup the resources.
313 URL_Validation_Visitor ();
314 virtual int destroy ();
316 /// Returns a reference to the URL cache.
317 URL_CACHE &url_cache ();
319 protected:
320 /// Factory Method that makes a
321 /// <URL_Validation_Visitation_Strategy_Factory>.
322 virtual ~URL_Validation_Visitor ();
323 virtual URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
325 /// Cache the status of URLs we've already validated.
326 URL_CACHE url_cache_;
328 /// Check to see if the reply status of this <url_addr> is in the
329 /// cache. Returns 1 if so, 0 if not.
330 int in_cache (const ACE_URL_Addr &url_addr);
332 NULL_CREATION_STRATEGY creation_strategy_;
333 NULL_ACTIVATION_STRATEGY activation_strategy_;
335 // Configure the Strategy Connector with a strategy that caches
336 // connection.
337 CACHED_CONNECT_STRATEGY *caching_connect_strategy_;
339 STRATEGY_CONNECTOR *strat_connector_;
341 CACHING_STRATEGY caching_strategy_;
346 * @class URL_Download_Visitor
348 * @brief Subclass for the URL validtion visitor.
350 * This class checks to make sure that the <HTTP_URL> is valid.
352 class URL_Download_Visitor : public URL_Visitor
354 public:
356 * Visit an <HTTP_URL> to make sure that it's valid. If the content
357 * type of the <HTTP_URL> is "text/html" and the <recursion> option
358 * is enabled then <visit> recursively checks each link embedded in
359 * the HTML page.
361 virtual int visit (HTTP_URL &http_url);
363 // @@
364 // virtual int visit (FTP_URL &http_url);
366 /// Cleanup the resources.
367 virtual int destroy ();
369 protected:
370 /// Factory Method that makes a <URL_Download_Visitation_Strategy_Factory>.
371 URL_Visitation_Strategy_Factory *make_visitation_strategy_factory (URL &);
375 * @class Auto_Destroyer
377 * @brief Simple class that ensures the <destroy> method is called on our
378 * <URL_*> objects when they go out of scope.
380 * This class is similar to an auto_ptr<> and should be used to
381 * simplify blocks of code that must create/destroy pointers to
382 * various <URL_*> related strategies and iterators.
384 template <class T>
385 class Auto_Destroyer
387 public:
388 Auto_Destroyer (T *t): t_ (t) {}
389 T *operator-> () { return this->t_; }
390 T *operator *() { return this->t_; }
391 void operator= (T *t)
393 if (this->t_ != 0)
394 this->t_->destroy ();
395 this->t_ = t;
397 ~Auto_Destroyer ()
399 if (this->t_ != 0)
400 t_->destroy ();
402 private:
403 T *t_;
406 #include /**/ "ace/post.h"
407 #endif /* _URL_VISITOR_H */