7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
16 * @package Zend_Feed_Reader
17 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
18 * @license http://framework.zend.com/license/new-bsd New BSD License
19 * @version $Id: Reader.php 17391 2009-08-05 11:27:52Z padraic $
25 require_once 'Zend/Feed.php';
28 * @see Zend_Feed_Reader_Feed_Rss
30 require_once 'Zend/Feed/Reader/Feed/Rss.php';
33 * @see Zend_Feed_Reader_Feed_Atom
35 require_once 'Zend/Feed/Reader/Feed/Atom.php';
39 * @package Zend_Feed_Reader
40 * @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
41 * @license http://framework.zend.com/license/new-bsd New BSD License
43 class Zend_Feed_Reader
48 const NAMESPACE_ATOM_03
= 'http://purl.org/atom/ns#';
49 const NAMESPACE_ATOM_10
= 'http://www.w3.org/2005/Atom';
50 const NAMESPACE_RDF
= 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
51 const NAMESPACE_RSS_090
= 'http://my.netscape.com/rdf/simple/0.9/';
52 const NAMESPACE_RSS_10
= 'http://purl.org/rss/1.0/';
57 const TYPE_ANY
= 'any';
58 const TYPE_ATOM_03
= 'atom-03';
59 const TYPE_ATOM_10
= 'atom-10';
60 const TYPE_ATOM_ANY
= 'atom';
61 const TYPE_RSS_090
= 'rss-090';
62 const TYPE_RSS_091
= 'rss-091';
63 const TYPE_RSS_091_NETSCAPE
= 'rss-091n';
64 const TYPE_RSS_091_USERLAND
= 'rss-091u';
65 const TYPE_RSS_092
= 'rss-092';
66 const TYPE_RSS_093
= 'rss-093';
67 const TYPE_RSS_094
= 'rss-094';
68 const TYPE_RSS_10
= 'rss-10';
69 const TYPE_RSS_20
= 'rss-20';
70 const TYPE_RSS_ANY
= 'rss';
75 * @var Zend_Cache_Core
77 protected static $_cache = null;
80 * HTTP client object to use for retrieving feeds
82 * @var Zend_Http_Client
84 protected static $_httpClient = null;
87 * Override HTTP PUT and DELETE request methods?
91 protected static $_httpMethodOverride = false;
93 protected static $_httpConditionalGet = false;
95 protected static $_pluginLoader = null;
97 protected static $_prefixPaths = array();
99 protected static $_extensions = array(
121 * @return Zend_Cache_Core
123 public static function getCache()
125 return self
::$_cache;
131 * @param Zend_Cache_Core $cache
134 public static function setCache(Zend_Cache_Core
$cache)
136 self
::$_cache = $cache;
140 * Set the HTTP client instance
142 * Sets the HTTP client object to use for retrieving the feeds.
144 * @param Zend_Http_Client $httpClient
147 public static function setHttpClient(Zend_Http_Client
$httpClient)
149 self
::$_httpClient = $httpClient;
154 * Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
156 * @return Zend_Http_Client_Abstract
158 public static function getHttpClient()
160 if (!self
::$_httpClient instanceof Zend_Http_Client
) {
162 * @see Zend_Http_Client
164 require_once 'Zend/Http/Client.php';
165 self
::$_httpClient = new Zend_Http_Client();
168 return self
::$_httpClient;
172 * Toggle using POST instead of PUT and DELETE HTTP methods
174 * Some feed implementations do not accept PUT and DELETE HTTP
175 * methods, or they can't be used because of proxies or other
176 * measures. This allows turning on using POST where PUT and
177 * DELETE would normally be used; in addition, an
178 * X-Method-Override header will be sent with a value of PUT or
179 * DELETE as appropriate.
181 * @param boolean $override Whether to override PUT and DELETE.
184 public static function setHttpMethodOverride($override = true)
186 self
::$_httpMethodOverride = $override;
190 * Get the HTTP override state
194 public static function getHttpMethodOverride()
196 return self
::$_httpMethodOverride;
200 * Set the flag indicating whether or not to use HTTP conditional GET
205 public static function useHttpConditionalGet($bool = true)
207 self
::$_httpConditionalGet = $bool;
211 * Import a feed by providing a URL
213 * @param string $url The URL to the feed
214 * @param string $etag OPTIONAL Last received ETag for this resource
215 * @param string $lastModified OPTIONAL Last-Modified value for this resource
216 * @return Zend_Feed_Reader_Feed_Interface
218 public static function import($uri, $etag = null, $lastModified = null)
220 $cache = self
::getCache();
223 $client = self
::getHttpClient();
224 $client->resetParameters();
225 $client->setHeaders('If-None-Match', null);
226 $client->setHeaders('If-Modified-Since', null);
227 $client->setUri($uri);
228 $cacheId = 'Zend_Feed_Reader_' . md5($uri);
230 if (self
::$_httpConditionalGet && $cache) {
231 $data = $cache->load($cacheId);
233 if (is_null($etag)) {
234 $etag = $cache->load($cacheId.'_etag');
236 if (is_null($lastModified)) {
237 $lastModified = $cache->load($cacheId.'_lastmodified');;
240 $client->setHeaders('If-None-Match', $etag);
243 $client->setHeaders('If-Modified-Since', $lastModified);
246 $response = $client->request('GET');
247 if ($response->getStatus() !== 200 && $response->getStatus() !== 304) {
248 require_once 'Zend/Feed/Exception.php';
249 throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
251 if ($response->getStatus() == 304) {
252 $responseXml = $data;
254 $responseXml = $response->getBody();
255 $cache->save($responseXml, $cacheId);
256 if ($response->getHeader('ETag')) {
257 $cache->save($response->getHeader('ETag'), $cacheId.'_etag');
259 if ($response->getHeader('Last-Modified')) {
260 $cache->save($response->getHeader('Last-Modified'), $cacheId.'_lastmodified');
263 return self
::importString($responseXml);
265 $data = $cache->load($cacheId);
266 if ($data !== false) {
267 return self
::importString($data);
269 $response = $client->request('GET');
270 if ($response->getStatus() !== 200) {
271 require_once 'Zend/Feed/Exception.php';
272 throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
274 $responseXml = $response->getBody();
275 $cache->save($responseXml, $cacheId);
276 return self
::importString($responseXml);
278 $response = $client->request('GET');
279 if ($response->getStatus() !== 200) {
280 require_once 'Zend/Feed/Exception.php';
281 throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
283 return self
::importString($response->getBody());
288 * Import a feed by providing a Zend_Feed_Abstract object
290 * @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
291 * @return Zend_Feed_Reader_Feed_Interface
293 public static function importFeed(Zend_Feed_Abstract
$feed)
295 $dom = $feed->getDOM()->ownerDocument
;
296 $type = self
::detectType($dom);
297 self
::_registerCoreExtensions();
298 if (substr($type, 0, 3) == 'rss') {
299 $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
301 $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
308 * Import a feed froma string
310 * @param string $string
311 * @return Zend_Feed_Reader_Feed_Interface
313 public static function importString($string)
315 $libxml_errflag = libxml_use_internal_errors(true);
316 $dom = new DOMDocument
;
317 $status = $dom->loadXML($string);
318 libxml_use_internal_errors($libxml_errflag);
321 // Build error message
322 $error = libxml_get_last_error();
323 if ($error && $error->message
) {
324 $errormsg = "DOMDocument cannot parse XML: {$error->message}";
326 $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
329 require_once 'Zend/Feed/Exception.php';
330 throw new Zend_Feed_Exception($errormsg);
333 $type = self
::detectType($dom);
335 self
::_registerCoreExtensions();
337 if (substr($type, 0, 3) == 'rss') {
338 $reader = new Zend_Feed_Reader_Feed_Rss($dom, $type);
340 $reader = new Zend_Feed_Reader_Feed_Atom($dom, $type);
346 * Imports a feed from a file located at $filename.
348 * @param string $filename
349 * @throws Zend_Feed_Exception
350 * @return Zend_Feed_Reader_FeedInterface
352 public static function importFile($filename)
354 @ini_set
('track_errors', 1);
355 $feed = @file_get_contents
($filename);
356 @ini_restore
('track_errors');
357 if ($feed === false) {
359 * @see Zend_Feed_Exception
361 require_once 'Zend/Feed/Exception.php';
362 throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
364 return self
::importString($feed);
367 public static function findFeedLinks($uri)
369 // Get the HTTP response from $uri and save the contents
370 $client = self
::getHttpClient();
371 $client->setUri($uri);
372 $response = $client->request();
373 if ($response->getStatus() !== 200) {
375 * @see Zend_Feed_Exception
377 require_once 'Zend/Feed/Exception.php';
378 throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
380 $responseHtml = $response->getBody();
381 @ini_set
('track_errors', 1);
382 $dom = new DOMDocument
;
383 $status = @$dom->loadHTML($responseHtml);
384 @ini_restore
('track_errors');
386 if (!isset($php_errormsg)) {
387 if (function_exists('xdebug_is_enabled')) {
388 $php_errormsg = '(error message not available, when XDebug is running)';
390 $php_errormsg = '(error message not available)';
393 require_once 'Zend/Feed/Exception.php';
394 throw new Zend_Feed_Exception("DOMDocument cannot parse XML: $php_errormsg");
396 $feedLinks = new stdClass
;
397 $links = $dom->getElementsByTagName('link');
398 foreach ($links as $link) {
399 if (strtolower($link->getAttribute('rel')) !== 'alternate'
400 ||
!$link->getAttribute('type') ||
!$link->getAttribute('href')) {
403 if (!isset($feedLinks->rss
) && $link->getAttribute('type') == 'application/rss+xml') {
404 $feedLinks->rss
= $link->getAttribute('href');
405 } elseif(!isset($feedLinks->atom
) && $link->getAttribute('type') == 'application/atom+xml') {
406 $feedLinks->atom
= $link->getAttribute('href');
407 } elseif(!isset($feedLinks->rdf
) && $link->getAttribute('type') == 'application/rdf+xml') {
408 $feedLinks->rdf
= $link->getAttribute('href');
410 if (isset($feedLinks->rss
) && isset($feedLinks->atom
) && isset($feedLinks->rdf
)) {
418 * Detect the feed type of the provided feed
420 * @param Zend_Feed_Abstract $feed A fully instantiated Zend_Feed object
423 public static function detectType($feed)
425 if ($feed instanceof Zend_Feed_Reader_FeedInterface
) {
426 $dom = $feed->getDomDocument();
427 } elseif($feed instanceof DomDocument
) {
429 } elseif(is_string($feed) && !empty($feed)) {
430 @ini_set
('track_errors', 1);
431 $dom = new DOMDocument
;
432 $status = @$doc->loadXML($string);
433 @ini_restore
('track_errors');
435 if (!isset($php_errormsg)) {
436 if (function_exists('xdebug_is_enabled')) {
437 $php_errormsg = '(error message not available, when XDebug is running)';
439 $php_errormsg = '(error message not available)';
442 require_once 'Zend/Feed/Exception.php';
443 throw new Zend_Feed_Exception("DOMDocument cannot parse XML: $php_errormsg");
446 require_once 'Zend/Feed/Exception.php';
447 throw new Zend_Feed_Exception('Invalid object/scalar provided: must be of type Zend_Feed_Reader_FeedInterface, DomDocument or string');
449 $xpath = new DOMXPath($dom);
451 if ($xpath->query('/rss')->length
) {
452 $type = self
::TYPE_RSS_ANY
;
453 $version = $xpath->evaluate('string(/rss/@version)');
455 if (strlen($version) > 0) {
458 $type = self
::TYPE_RSS_20
;
462 $type = self
::TYPE_RSS_094
;
466 $type = self
::TYPE_RSS_093
;
470 $type = self
::TYPE_RSS_092
;
474 $type = self
::TYPE_RSS_091
;
482 $xpath->registerNamespace('rdf', self
::NAMESPACE_RDF
);
484 if ($xpath->query('/rdf:RDF')->length
) {
485 $xpath->registerNamespace('rss', self
::NAMESPACE_RSS_10
);
487 if ($xpath->query('/rdf:RDF/rss:channel')->length
488 ||
$xpath->query('/rdf:RDF/rss:image')->length
489 ||
$xpath->query('/rdf:RDF/rss:item')->length
490 ||
$xpath->query('/rdf:RDF/rss:textinput')->length
492 return self
::TYPE_RSS_10
;
495 $xpath->registerNamespace('rss', self
::NAMESPACE_RSS_090
);
497 if ($xpath->query('/rdf:RDF/rss:channel')->length
498 ||
$xpath->query('/rdf:RDF/rss:image')->length
499 ||
$xpath->query('/rdf:RDF/rss:item')->length
500 ||
$xpath->query('/rdf:RDF/rss:textinput')->length
502 return self
::TYPE_RSS_090
;
506 $type = self
::TYPE_ATOM_ANY
;
507 $xpath->registerNamespace('atom', self
::NAMESPACE_ATOM_10
);
509 if ($xpath->query('//atom:feed')->length
) {
510 return self
::TYPE_ATOM_10
;
513 $xpath->registerNamespace('atom', self
::NAMESPACE_ATOM_03
);
515 if ($xpath->query('//atom:feed')->length
) {
516 return self
::TYPE_ATOM_03
;
519 return self
::TYPE_ANY
;
523 * Set plugin loader for use with Extensions
525 * @param Zend_Loader_PluginLoader_Interface $loader
527 public static function setPluginLoader(Zend_Loader_PluginLoader_Interface
$loader)
529 self
::$_pluginLoader = $loader;
533 * Get plugin loader for use with Extensions
535 * @return Zend_Loader_PluginLoader_Interface $loader
537 public static function getPluginLoader()
539 if (!isset(self
::$_pluginLoader)) {
540 require_once 'Zend/Loader/PluginLoader.php';
541 self
::$_pluginLoader = new Zend_Loader_PluginLoader(array(
542 'Zend_Feed_Reader_Extension_' => 'Zend/Feed/Reader/Extension/',
545 return self
::$_pluginLoader;
549 * Add prefix path for loading Extensions
551 * @param string $prefix
552 * @param string $path
555 public static function addPrefixPath($prefix, $path)
557 $prefix = rtrim($prefix, '_');
558 $path = rtrim($path, DIRECTORY_SEPARATOR
);
559 self
::getPluginLoader()->addPrefixPath($prefix, $path);
563 * Add multiple Extension prefix paths at once
568 public static function addPrefixPaths(array $spec)
570 if (isset($spec['prefix']) && isset($spec['path'])) {
571 self
::addPrefixPath($spec['prefix'], $spec['path']);
573 foreach ($spec as $prefixPath) {
574 if (isset($prefixPath['prefix']) && isset($prefixPath['path'])) {
575 self
::addPrefixPath($prefixPath['prefix'], $prefixPath['path']);
581 * Register an Extension by name
583 * @param string $name
585 * @throws Zend_Feed_Exception if unable to resolve Extension class
587 public static function registerExtension($name)
589 $feedName = $name . '_Feed';
590 $entryName = $name . '_Entry';
591 if (self
::isRegistered($name)) {
592 if (self
::getPluginLoader()->isLoaded($feedName) ||
593 self
::getPluginLoader()->isLoaded($entryName)) {
598 self
::getPluginLoader()->load($feedName);
599 self
::$_extensions['feed'][] = $feedName;
600 } catch (Zend_Loader_PluginLoader_Exception
$e) {
603 self
::getPluginLoader()->load($entryName);
604 self
::$_extensions['entry'][] = $entryName;
605 } catch (Zend_Loader_PluginLoader_Exception
$e) {
607 if (!self
::getPluginLoader()->isLoaded($feedName)
608 && !self
::getPluginLoader()->isLoaded($entryName)
610 require_once 'Zend/Feed/Exception.php';
611 throw new Zend_Feed_Exception('Could not load extension: ' . $name
612 . 'using Plugin Loader. Check prefix paths are configured and extension exists.');
617 * Is a given named Extension registered?
619 * @param string $extensionName
622 public static function isRegistered($extensionName)
624 $feedName = $extensionName . '_Feed';
625 $entryName = $extensionName . '_Entry';
626 if (in_array($feedName, self
::$_extensions['feed'])
627 ||
in_array($entryName, self
::$_extensions['entry'])
635 * Get a list of extensions
639 public static function getExtensions()
641 return self
::$_extensions;
645 * Reset class state to defaults
649 public static function reset()
651 self
::$_cache = null;
652 self
::$_httpClient = null;
653 self
::$_httpMethodOverride = false;
654 self
::$_httpConditionalGet = false;
655 self
::$_pluginLoader = null;
656 self
::$_prefixPaths = array();
657 self
::$_extensions = array(
678 * Register core (default) extensions
682 protected static function _registerCoreExtensions()
684 self
::registerExtension('DublinCore');
685 self
::registerExtension('Content');
686 self
::registerExtension('Atom');
687 self
::registerExtension('Slash');
688 self
::registerExtension('WellFormedWeb');
689 self
::registerExtension('Thread');
690 self
::registerExtension('Podcast');