What isnt surprising is the fact the URI comparison sucks; but that the effect was...
[beagle.git] / Filters / FilterMan.cs
blob2b726b2d84738cb51b3560238b2ddbdcb46b15f3
1 //
2 // Beagle
3 //
4 // FilterMan.cs : Trivial implementation of a man-page filter.
5 //
6 // Author :
7 // Michael Levy <mlevy@wardium.homeip.net>
8 //
9 // Copyright (C) 2004 Michael levy
12 // Permission is hereby granted, free of charge, to any person obtaining a
13 // copy of this software and associated documentation files (the "Software"),
14 // to deal in the Software without restriction, including without limitation
15 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 // and/or sell copies of the Software, and to permit persons to whom the
17 // Software is furnished to do so, subject to the following conditions:
19 // The above copyright notice and this permission notice shall be included in
20 // all copies or substantial portions of the Software.
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 // DEALINGS IN THE SOFTWARE.
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Text.RegularExpressions;
36 using Beagle.Daemon;
38 namespace Beagle.Filters {
40 public class FilterMan : Beagle.Daemon.Filter {
41 StreamReader reader;
43 public FilterMan ()
45 // Make this a general troff filter.
46 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-troff-man"));
47 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("text/x-troff-man"));
48 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-troff"));
49 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("text/x-troff"));
50 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("text/troff"));
52 SnippetMode = true;
55 FIXME:
56 Right now we don't handle pages with just one line like:
57 .so man3/strcpy.3
58 Which is in strncpy.3.gz and points to strcpy.3.gz
60 protected void ParseManFile (StreamReader reader)
62 string str;
64 The regular expression for a complete man header line is built to allow a suite of
65 non-spaces, or words separated by spaces which are encompassed in quotes
66 The regexp should be :
68 Regex headerRE = new Regex (@"^\.TH\s+" +
69 @"(?<title>(\S+|(""(\S+\s*)+"")))\s+" +
70 @"(?<section>\d+)\s+" +
71 @"(?<date>(\S+|(""(\S+\s*)+"")))\s+" +
72 @"(?<source>(\S+|(""(\S+\s*)+"")))\s+" +
73 @"(?<manual>(\S+|(""(\S+\s*)+"")))\s*" +
74 "$");
76 But there seem to be a number of broken man pages, and the current filter can be used
77 for general troff pages.
79 Regex headerRE = new Regex (@"^\.TH\s+" +
80 @"(?<title>(\S+|(""(\S+\s*)+"")))\s*");
82 while ((str = reader.ReadLine ()) != null) {
83 if (str.StartsWith (".\"")) {
84 /* Comment in man page */
85 continue;
86 } else if (str.StartsWith (".TH ")) {
87 MatchCollection matches = headerRE.Matches (str);
88 if (matches.Count != 1) {
89 Console.Error.WriteLine ("In title Expected 1 match but found {0} matches in '{1}'",
90 matches.Count, str);
91 continue;
93 foreach (Match theMatch in matches) {
94 AddProperty (Beagle.Property.New ("dc:title",
95 theMatch.Groups ["title"].ToString ()));
97 } else {
98 // A "regular" string
100 // FIXME: We need to strip out other macros
101 // (.SH for example)
102 AppendText (str);
107 Finished ();
110 override protected void DoOpen (FileInfo info)
112 Stream stream;
113 stream = new FileStream (info.FullName,
114 FileMode.Open,
115 FileAccess.Read,
116 FileShare.Read);
117 reader = new StreamReader (stream);
119 override protected void DoPull ()
121 ParseManFile (reader);