From 1d7cf79f50302e9de104fff0a81e30ec9297c848 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sat, 17 Mar 2007 21:09:06 +0000 Subject: [PATCH] Add RSSGenerator, hooked up to index.xhtml - Full datetimes added to news items - New styling for news items - New config option: $web_path - Move DOMFilter->setup() invokation to FilterManager to remove duplication, the method is now public - Implement DOMFilter->getElementById() convenient XPath call - FilterManager removes all extra namespaces after DOM processing, so post-processors must not use them - Implement Page->getPath, which returns a webpath to the page git-svn-id: http://htmlpurifier.org/svnroot@826 48356398-32a2-884e-a903-53898d9a118a --- XHTMLCompiler.php | 2 +- XHTMLCompiler/DOMFilter.php | 13 ++- XHTMLCompiler/DOMFilter/Acronymizer.php | 1 - .../DOMFilter/GenerateTableOfContents.php | 2 - XHTMLCompiler/DOMFilter/Quoter.php | 1 - XHTMLCompiler/DOMFilter/RSSGenerator.php | 112 +++++++++++++++++++++ XHTMLCompiler/FilterManager.php | 5 +- XHTMLCompiler/Page.php | 9 ++ config.default.php | 6 ++ config.filters.php | 1 + 10 files changed, 145 insertions(+), 7 deletions(-) create mode 100644 XHTMLCompiler/DOMFilter/RSSGenerator.php diff --git a/XHTMLCompiler.php b/XHTMLCompiler.php index a865e7b..4a4bc83 100644 --- a/XHTMLCompiler.php +++ b/XHTMLCompiler.php @@ -56,7 +56,7 @@ class XHTMLCompiler // PLUGIN/CONFIGURATION FUNCTIONALITY protected $configKeys = array('allowed_dirs', 'directory_index', - 'indexed_dirs'); + 'indexed_dirs', 'web_path'); protected $config = array(); protected $filterManager; diff --git a/XHTMLCompiler/DOMFilter.php b/XHTMLCompiler/DOMFilter.php index fa4ee0c..75d9688 100644 --- a/XHTMLCompiler/DOMFilter.php +++ b/XHTMLCompiler/DOMFilter.php @@ -18,7 +18,7 @@ abstract class XHTMLCompiler_DOMFilter extends XHTMLCompiler_Filter /** * Performs common initialization of DOM and XPath */ - protected function setup($dom) { + public function setup($dom) { $this->dom = $dom; $this->xpath = new DOMXPath($dom); $this->xpath->registerNamespace('html', "http://www.w3.org/1999/xhtml"); @@ -45,6 +45,17 @@ abstract class XHTMLCompiler_DOMFilter extends XHTMLCompiler_Filter return $this->xpath->query($expr, $context); } + /** + * Retrieves the element with the specified ID + * @note This is necessary because DOM doesn't know what the ID + * of HTML documents is, and due to the usage of proprietary + * elements we cannot use schemaValidate() create the + * association. + */ + protected function getElementById($id) { + return $this->query("//*[@id='$id']")->item(0); + } + } ?> \ No newline at end of file diff --git a/XHTMLCompiler/DOMFilter/Acronymizer.php b/XHTMLCompiler/DOMFilter/Acronymizer.php index 793c293..66a1449 100644 --- a/XHTMLCompiler/DOMFilter/Acronymizer.php +++ b/XHTMLCompiler/DOMFilter/Acronymizer.php @@ -52,7 +52,6 @@ class XHTMLCompiler_DOMFilter_Acronymizer extends XHTMLCompiler_DOMFilter ); public function process(DOMDocument $dom, $page) { - $this->setup($dom); $nodes = $this->query("//html:acronym[not(@title)]"); foreach ($nodes as $node) { $acronym = $node->textContent; diff --git a/XHTMLCompiler/DOMFilter/GenerateTableOfContents.php b/XHTMLCompiler/DOMFilter/GenerateTableOfContents.php index 9682355..e7e9e44 100644 --- a/XHTMLCompiler/DOMFilter/GenerateTableOfContents.php +++ b/XHTMLCompiler/DOMFilter/GenerateTableOfContents.php @@ -11,8 +11,6 @@ class XHTMLCompiler_DOMFilter_GenerateTableOfContents extends XHTMLCompiler_DOMF public function process(DOMDocument $dom, $page) { - $this->setup($dom); - // test for ToC container, if not present don't bother $container = $this->query("//html:div[@id='toc']")->item(0); if (!$container) return; diff --git a/XHTMLCompiler/DOMFilter/Quoter.php b/XHTMLCompiler/DOMFilter/Quoter.php index 8549b04..26bafdb 100644 --- a/XHTMLCompiler/DOMFilter/Quoter.php +++ b/XHTMLCompiler/DOMFilter/Quoter.php @@ -27,7 +27,6 @@ class XHTMLCompiler_DOMFilter_Quoter extends XHTMLCompiler_DOMFilter } public function process(DOMDocument $dom, $page) { - $this->setup($dom); // first handle single-quotes $nodes = $this->query("//html:q//html:q"); diff --git a/XHTMLCompiler/DOMFilter/RSSGenerator.php b/XHTMLCompiler/DOMFilter/RSSGenerator.php new file mode 100644 index 0000000..b1155e9 --- /dev/null +++ b/XHTMLCompiler/DOMFilter/RSSGenerator.php @@ -0,0 +1,112 @@ +xpath->registerNamespace('rss', $this->namespace); + + // attempt to find declarations of the namespace + $nodes = $this->query("//attribute::*[namespace-uri() = '$this->namespace']"); + if (!$nodes->length) return; + + // grab the document's links to RSS feeds + // we require that the link have a href, a title and a type + // as well as an rss:for attribute specifying where to get the + // data from + $links = $this->query("//html:link[@href and @title and @type='application/rss+xml'][@rss:for]"); + + foreach ($links as $link) { + $this->generateRSS($link, $page); + } + + } + + /** + * Generates the RSS feed for a specific link in a document + * @param $link DOMElement we're generating feed for + * @param $page Page we're generating for + */ + protected function generateRSS($link, $page) { + // get data from the document + $location = $link->getAttribute('href'); + $title = $link->getAttribute('title'); + $path = $page->getPath(); + + // remove specialized attributes + $id = $link->getAttributeNS($this->namespace, 'for'); + $link->removeAttributeNS($this->namespace, 'for'); + $description = $link->getAttributeNS($this->namespace, 'description'); + $link->removeAttributeNS($this->namespace, 'description'); + + $lang = $this->dom->documentElement->getAttribute('xml:lang'); + + $data_source = $this->getElementById($id); + + // setup the RSS feed + $doc = new DOMDocument('1.0', 'UTF-8'); + $doc->formatOutput = true; + + $rss = $doc->createElement('rss'); + $rss->setAttribute('version', '2.0'); + $doc->appendChild($rss); + + $channel = $doc->createElement('channel'); + $rss->appendChild($channel); + + $channel->appendChild($doc->createElement('title', $title)); + $channel->appendChild($doc->createElement('link', $path)); + if ($lang) $channel->appendChild($doc->createElement('language', $lang)); + if ($description) $channel->appendChild($doc->createElement('description', $description)); + $channel->appendChild($doc->createElement('generator', 'XHTML Compiler')); + + // parse data source + $i = 0; + foreach ($data_source->childNodes as $src_item) { + if ($src_item->getAttribute('class') !== 'item') continue; + + $item_id = $src_item->getAttribute('id'); + if (!$item_id) { + $item_id = $id . '_item' . $i; + $src_item->setAttribute('id', $item_id); + $i++; + } + + $item = $doc->createElement('item'); + $item->appendChild($doc->createElement('link', $path . '#' . $item_id)); + $channel->appendChild($item); + + foreach ($src_item->childNodes as $element) { + switch ($element->getAttribute('class')) { + case 'title': + $item->appendChild($doc->createElement('title', + $element->textContent)); + break; + case 'date': + $item->appendChild($doc->createElement('pubDate', + $element->textContent)); + break; + case 'body': + $item->appendChild($doc->createElement('description', + $element->textContent)); + break; + } + } + } + + // save the feed + $doc->save($location); + + } + +} + +?> \ No newline at end of file diff --git a/XHTMLCompiler/FilterManager.php b/XHTMLCompiler/FilterManager.php index cfe687d..26bc4a8 100644 --- a/XHTMLCompiler/FilterManager.php +++ b/XHTMLCompiler/FilterManager.php @@ -96,13 +96,16 @@ class XHTMLCompiler_FilterManager $dom->loadXML($text); $dom->encoding = 'UTF-8'; foreach ($this->DOMFilters as $filter) { + $filter->setup($dom); $filter->process($dom, $page); } $text = $dom->saveXML(); + // remove all non-default namespace declarations + $text = preg_replace('/ xmlns:.+?=".+?"/', '', $text); foreach ($this->postTextFilters as $filter) { $text = $filter->process($text, $page); } - $text = str_replace(''."\n", '', $text); + $text = str_replace('< ?xml version="1.0" encoding="UTF-8"? >'."\n", '', $text); return $text; } diff --git a/XHTMLCompiler/Page.php b/XHTMLCompiler/Page.php index 672b4ec..c5ef568 100644 --- a/XHTMLCompiler/Page.php +++ b/XHTMLCompiler/Page.php @@ -96,6 +96,15 @@ class XHTMLCompiler_Page /** Returns relative path to source */ public function getSourcePath() { return $this->source->getName(); } + /** + * Returns a fully formed web path to the file + */ + public function getPath() { + $xc = XHTMLCompiler::getInstance(); + return 'http://' . $_SERVER['HTTP_HOST'] . + $xc->getConf('web_path') . '/' . $this->cache->getName(); + } + /** Returns contents of the cache/served file */ public function getCache() { return $this->cache->get(); } /** Returns contents of the source file */ diff --git a/config.default.php b/config.default.php index 948e5f2..1d5bcd4 100644 --- a/config.default.php +++ b/config.default.php @@ -35,4 +35,10 @@ $indexed_dirs = true; // directive. $directory_index = 'index.html'; +// ** Path to web root of the parent of XHTML Compiler's install library +// If you installed XHTML Compiler at /xhtml-compiler, nothing needs to +// be done. If you installed it at /subdir/xhtml-compiler, you need +// to set this variable to /subdir +$web_path = ''; + ?> \ No newline at end of file diff --git a/config.filters.php b/config.filters.php index b3bcae1..c2ed519 100644 --- a/config.filters.php +++ b/config.filters.php @@ -5,5 +5,6 @@ $filters->addDOMFilter('GenerateTableOfContents'); $filters->addDOMFilter('Acronymizer'); $filters->addDOMFilter('Quoter'); +$filters->addDOMFilter('RSSGenerator'); ?> \ No newline at end of file -- 2.11.4.GIT