3 //=============================================================================
7 * @author Douglas C.Schmidt <d.schmidt@vanderbilt.edu> Kirthika Parameswaran <kirthika@cs.wustl.edu>
9 //=============================================================================
12 #ifndef _URL_VISITOR_H
13 #define _URL_VISITOR_H
14 #include /**/ "ace/pre.h"
16 #include "ace/Strategies_T.h"
18 #if !defined (ACE_LACKS_PRAGMA_ONCE)
20 #endif /* ACE_LACKS_PRAGMA_ONCE */
24 #include "Iterators.h"
25 #include "ace/Hash_Map_Manager_T.h"
26 #include "ace/Caching_Strategies_T.h"
27 #include "ace/Cached_Connect_Strategy_T.h"
30 // Forward declarations.
31 class URL_Validation_Visitor
;
34 * @class URL_Processing_Strategy
36 * @brief Abstract base class for the URL processing strategy.
38 class URL_Processing_Strategy
42 URL_Processing_Strategy (URL
&,
45 virtual ~URL_Processing_Strategy ();
47 /// Perform the strategy.
48 virtual int execute () = 0;
50 virtual int destroy ();
52 // Close down the resources.
55 /// A reference to the URL "context" that we're processing.
58 /// Iterator for the URL that we're processing.
59 URL_Iterator
&iterator_
;
63 * @class HTTP_Header_Processing_Strategy
65 * @brief Defines the HTTP header processing strategy.
67 class HTTP_Header_Processing_Strategy
: public URL_Processing_Strategy
71 HTTP_Header_Processing_Strategy (URL
&,
74 /// Perform the strategy for processing an HTTP header.
75 virtual int execute ();
79 * @class HTML_Body_Validation_Strategy
81 * @brief Defines the HTML body processing strategy.
83 * This class iterates through the body of an HTML file and
84 * recursively visits embedded links.
86 class HTML_Body_Validation_Strategy
: public URL_Processing_Strategy
90 HTML_Body_Validation_Strategy (URL
&,
92 URL_Validation_Visitor
&);
95 * Perform the strategy for processing an HTML file. This strategy
96 * iterates over the HTML file and recursively visits embedded links
97 * to process them, as well.
99 virtual int execute ();
102 /// This is the context of the visit.
103 URL_Validation_Visitor
&visitor_context_
;
107 * @class URL_Download_Strategy
109 * @brief Defines a URL downloading strategy.
111 * This class downloads a URL's contents into a temporary file.
113 class URL_Download_Strategy
: public URL_Processing_Strategy
117 URL_Download_Strategy (URL
&,
120 /// Perform the strategy for downloading a URL to a temporary file.
121 virtual int execute ();
125 * @class URL_Visitation_Strategy_Factory
127 * @brief Abstract Factory for the URL visitation strategy.
129 class URL_Visitation_Strategy_Factory
132 URL_Visitation_Strategy_Factory (URL
*);
135 virtual ~URL_Visitation_Strategy_Factory ();
137 // = Factory Methods.
138 /// Factory Method that makes the header iterator.
139 virtual URL_Iterator
*make_header_iterator () = 0;
141 /// Factory Method that makes the body iterator.
142 virtual URL_Iterator
*make_body_iterator () = 0;
144 /// Factory Method that makes the header processing strategy.
145 virtual URL_Processing_Strategy
*make_header_strategy (URL_Iterator
&) = 0;
147 /// Factory Method that makes the body processing strategy .
148 virtual URL_Processing_Strategy
*make_body_strategy (URL_Iterator
&) = 0;
150 /// Close down the resources.
151 virtual int destroy () = 0;
154 /// Stash the URL so we don't have to pass it around.
159 * @class URL_Download_Visitation_Strategy_Factory
161 * @brief Concrete Factory for the URL validation visitation strategy.
163 class URL_Download_Visitation_Strategy_Factory
: public URL_Visitation_Strategy_Factory
167 URL_Download_Visitation_Strategy_Factory (URL
*);
169 // = Factory Methods.
170 /// Factory Method that makes an <HTTP_Header_Iterator>.
171 virtual URL_Iterator
*make_header_iterator ();
173 /// Factory Method that makes an <HTML_Body_Iterator>.
174 virtual URL_Iterator
*make_body_iterator ();
176 /// Factory Method that makes the header processing strategy.
177 virtual URL_Processing_Strategy
*make_header_strategy (URL_Iterator
&);
179 /// Factory Method that makes the body processing strategy .
180 virtual URL_Processing_Strategy
*make_body_strategy (URL_Iterator
&);
182 /// Close down the resources.
183 virtual int destroy ();
187 * @class URL_Validation_Visitation_Strategy_Factory
189 * @brief Concrete Factory for the URL validation visitation strategy.
191 class URL_Validation_Visitation_Strategy_Factory
: public URL_Visitation_Strategy_Factory
195 URL_Validation_Visitation_Strategy_Factory (URL
*,
196 URL_Validation_Visitor
&);
198 // = Factory Methods.
199 /// Factory Method that makes an <HTTP_Header_Iterator>.
200 virtual URL_Iterator
*make_header_iterator ();
202 /// Factory Method that makes an <HTML_Body_Iterator>.
203 virtual URL_Iterator
*make_body_iterator ();
205 /// Factory Method that makes the header processing strategy.
206 virtual URL_Processing_Strategy
*make_header_strategy (URL_Iterator
&);
208 /// Factory Method that makes the body processing strategy .
209 virtual URL_Processing_Strategy
*make_body_strategy (URL_Iterator
&);
211 /// Close down the resources.
212 virtual int destroy ();
215 /// Context of the visitor.
216 URL_Validation_Visitor
&visitor_context_
;
222 * @brief Base class for the URL Visitor.
224 * This class plays the "visitor" role in the Visitor pattern.
229 virtual ~URL_Visitor ();
231 /// Visit an <HTTP_URL>.
232 virtual int visit (HTTP_URL
&http_url
) = 0;
235 // virtual int visit (FTP_URL &http_url) = 0;
237 /// Cleanup the resources.
238 virtual int destroy () = 0;
241 /// Make the appropriate <URL_Visitation_Strategy_Factory>.
242 virtual URL_Visitation_Strategy_Factory
*make_visitation_strategy_factory (URL
&) = 0;
245 typedef int ATTRIBUTES
;
246 typedef ACE_Svc_Handler
<ACE_SOCK_STREAM
, ACE_NULL_SYNCH
>
248 typedef std::pair
<Client_Svc_Handler
*, ATTRIBUTES
>
250 typedef ACE_Refcounted_Hash_Recyclable
<ACE_INET_Addr
>
252 typedef ACE_Hash
<ACE_ADDR
> H_KEY
;
253 typedef ACE_Equal_To
<ACE_ADDR
> C_KEYS
;
255 typedef ACE_Hash_Map_Manager_Ex
<ACE_ADDR
, CACHED_HANDLER
, H_KEY
, C_KEYS
, ACE_Null_Mutex
>
257 typedef ACE_Hash_Map_Iterator_Ex
<ACE_ADDR
, CACHED_HANDLER
, H_KEY
, C_KEYS
, ACE_Null_Mutex
>
259 typedef ACE_Hash_Map_Reverse_Iterator_Ex
<ACE_ADDR
, CACHED_HANDLER
, H_KEY
, C_KEYS
, ACE_Null_Mutex
>
260 HASH_MAP_REVERSE_ITERATOR
;
262 typedef ACE_Recyclable_Handler_Cleanup_Strategy
<ACE_ADDR
, CACHED_HANDLER
, HASH_MAP
>
264 typedef ACE_Recyclable_Handler_Caching_Utility
<ACE_ADDR
, CACHED_HANDLER
, HASH_MAP
, HASH_MAP_ITERATOR
, ATTRIBUTES
>
267 typedef ACE_LRU_Caching_Strategy
<ATTRIBUTES
, CACHING_UTILITY
>
268 LRU_CACHING_STRATEGY
;
270 typedef LRU_CACHING_STRATEGY
273 typedef ACE_Strategy_Connector
<Client_Svc_Handler
, ACE_SOCK_CONNECTOR
>
276 typedef ACE_NOOP_Creation_Strategy
<Client_Svc_Handler
>
277 NULL_CREATION_STRATEGY
;
279 typedef ACE_NOOP_Concurrency_Strategy
<Client_Svc_Handler
>
280 NULL_ACTIVATION_STRATEGY
;
282 typedef ACE_Cached_Connect_Strategy_Ex
<Client_Svc_Handler
, ACE_SOCK_CONNECTOR
, CACHING_STRATEGY
, ATTRIBUTES
, ACE_SYNCH_NULL_MUTEX
>
283 CACHED_CONNECT_STRATEGY
;
286 * @class URL_Validation_Visitor
288 * @brief Subclass that defines the URL validation visitor.
290 * This class checks to make sure that the <HTTP_URL> is valid.
291 * If the <HTTP_URL> is an <HTML> file, it can also be used to
292 * recursively check that all embedded links in this file are
295 class URL_Validation_Visitor
: public URL_Visitor
298 typedef ACE_Hash_Map_Manager
<ACE_URL_Addr
, URL_Status
, ACE_Null_Mutex
>
302 * Visit an <HTTP_URL> to make sure that it's valid. If the content
303 * type of the <HTTP_URL> is "text/html" and the <recursion> option
304 * is enabled then <visit> recursively checks each link embedded in
307 virtual int visit (HTTP_URL
&http_url
);
310 // virtual int visit (FTP_URL &http_url);
312 /// Cleanup the resources.
313 URL_Validation_Visitor ();
314 virtual int destroy ();
316 /// Returns a reference to the URL cache.
317 URL_CACHE
&url_cache ();
320 /// Factory Method that makes a
321 /// <URL_Validation_Visitation_Strategy_Factory>.
322 virtual ~URL_Validation_Visitor ();
323 virtual URL_Visitation_Strategy_Factory
*make_visitation_strategy_factory (URL
&);
325 /// Cache the status of URLs we've already validated.
326 URL_CACHE url_cache_
;
328 /// Check to see if the reply status of this <url_addr> is in the
329 /// cache. Returns 1 if so, 0 if not.
330 int in_cache (const ACE_URL_Addr
&url_addr
);
332 NULL_CREATION_STRATEGY creation_strategy_
;
333 NULL_ACTIVATION_STRATEGY activation_strategy_
;
335 // Configure the Strategy Connector with a strategy that caches
337 CACHED_CONNECT_STRATEGY
*caching_connect_strategy_
;
339 STRATEGY_CONNECTOR
*strat_connector_
;
341 CACHING_STRATEGY caching_strategy_
;
346 * @class URL_Download_Visitor
348 * @brief Subclass for the URL validtion visitor.
350 * This class checks to make sure that the <HTTP_URL> is valid.
352 class URL_Download_Visitor
: public URL_Visitor
356 * Visit an <HTTP_URL> to make sure that it's valid. If the content
357 * type of the <HTTP_URL> is "text/html" and the <recursion> option
358 * is enabled then <visit> recursively checks each link embedded in
361 virtual int visit (HTTP_URL
&http_url
);
364 // virtual int visit (FTP_URL &http_url);
366 /// Cleanup the resources.
367 virtual int destroy ();
370 /// Factory Method that makes a <URL_Download_Visitation_Strategy_Factory>.
371 URL_Visitation_Strategy_Factory
*make_visitation_strategy_factory (URL
&);
375 * @class Auto_Destroyer
377 * @brief Simple class that ensures the <destroy> method is called on our
378 * <URL_*> objects when they go out of scope.
380 * This class is similar to an auto_ptr<> and should be used to
381 * simplify blocks of code that must create/destroy pointers to
382 * various <URL_*> related strategies and iterators.
388 Auto_Destroyer (T
*t
): t_ (t
) {}
389 T
*operator-> () { return this->t_
; }
390 T
*operator *() { return this->t_
; }
391 void operator= (T
*t
)
394 this->t_
->destroy ();
406 #include /**/ "ace/post.h"
407 #endif /* _URL_VISITOR_H */