Dont throw EncodingFoundException unless asked to. Should remove the occassional...
[beagle.git] / Filters / FilterMan.cs
blob1dfca545a5935d2db64c3c5ee54af78c71422d2d
1 //
2 // Beagle
3 //
4 // FilterMan.cs : Trivial implementation of a man-page filter.
5 //
6 // Author :
7 // Michael Levy <mlevy@wardium.homeip.net>
8 //
9 // Copyright (C) 2004 Michael levy
12 // Permission is hereby granted, free of charge, to any person obtaining a
13 // copy of this software and associated documentation files (the "Software"),
14 // to deal in the Software without restriction, including without limitation
15 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 // and/or sell copies of the Software, and to permit persons to whom the
17 // Software is furnished to do so, subject to the following conditions:
19 // The above copyright notice and this permission notice shall be included in
20 // all copies or substantial portions of the Software.
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 // DEALINGS IN THE SOFTWARE.
31 using System;
32 using System.IO;
33 using System.Text;
34 using System.Text.RegularExpressions;
36 using Beagle.Daemon;
38 namespace Beagle.Filters {
40 public class FilterMan : Beagle.Daemon.Filter {
41 StreamReader reader;
43 public FilterMan ()
45 // Make this a general troff filter.
46 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-troff-man"));
47 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("text/x-troff-man"));
48 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/x-troff"));
49 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("text/x-troff"));
52 FIXME:
53 Right now we don't handle pages with just one line like:
54 .so man3/strcpy.3
55 Which is in strncpy.3.gz and points to strcpy.3.gz
57 protected void ParseManFile (StreamReader reader)
59 string str;
61 The regular expression for a complete man header line is built to allow a suite of
62 non-spaces, or words separated by spaces which are encompassed in quotes
63 The regexp should be :
65 Regex headerRE = new Regex (@"^\.TH\s+" +
66 @"(?<title>(\S+|(""(\S+\s*)+"")))\s+" +
67 @"(?<section>\d+)\s+" +
68 @"(?<date>(\S+|(""(\S+\s*)+"")))\s+" +
69 @"(?<source>(\S+|(""(\S+\s*)+"")))\s+" +
70 @"(?<manual>(\S+|(""(\S+\s*)+"")))\s*" +
71 "$");
73 But there seem to be a number of broken man pages, and the current filter can be used
74 for general troff pages.
76 Regex headerRE = new Regex (@"^\.TH\s+" +
77 @"(?<title>(\S+|(""(\S+\s*)+"")))\s*");
79 while ((str = reader.ReadLine ()) != null) {
80 if (str.StartsWith (".\"")) {
81 /* Comment in man page */
82 continue;
83 } else if (str.StartsWith (".TH ")) {
84 MatchCollection matches = headerRE.Matches (str);
85 if (matches.Count != 1) {
86 Console.Error.WriteLine ("In title Expected 1 match but found {0} matches in '{1}'",
87 matches.Count, str);
88 continue;
90 foreach (Match theMatch in matches) {
91 AddProperty (Beagle.Property.New ("dc:title",
92 theMatch.Groups ["title"].ToString ()));
94 } else {
95 // A "regular" string
97 // FIXME: We need to strip out other macros
98 // (.SH for example)
99 AppendText (str);
104 Finished ();
107 override protected void DoOpen (FileInfo info)
109 Stream stream;
110 stream = new FileStream (info.FullName,
111 FileMode.Open,
112 FileAccess.Read,
113 FileShare.Read);
114 reader = new StreamReader (stream);
116 override protected void DoPull ()
118 ParseManFile (reader);