4 // FilterMan.cs : Trivial implementation of a man-page filter.
7 // Michael Levy <mlevy@wardium.homeip.net>
9 // Copyright (C) 2004 Michael levy
12 // Permission is hereby granted, free of charge, to any person obtaining a
13 // copy of this software and associated documentation files (the "Software"),
14 // to deal in the Software without restriction, including without limitation
15 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 // and/or sell copies of the Software, and to permit persons to whom the
17 // Software is furnished to do so, subject to the following conditions:
19 // The above copyright notice and this permission notice shall be included in
20 // all copies or substantial portions of the Software.
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 // DEALINGS IN THE SOFTWARE.
34 using System
.Text
.RegularExpressions
;
38 namespace Beagle
.Filters
{
40 public class FilterMan
: Beagle
.Daemon
.Filter
{
45 // Make this a general troff filter.
46 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-troff-man"));
47 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("text/x-troff-man"));
48 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-troff"));
49 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("text/x-troff"));
53 Right now we don't handle pages with just one line like:
55 Which is in strncpy.3.gz and points to strcpy.3.gz
57 protected void ParseManFile (StreamReader reader
)
61 The regular expression for a complete man header line is built to allow a suite of
62 non-spaces, or words separated by spaces which are encompassed in quotes
63 The regexp should be :
65 Regex headerRE = new Regex (@"^\.TH\s+" +
66 @"(?<title>(\S+|(""(\S+\s*)+"")))\s+" +
67 @"(?<section>\d+)\s+" +
68 @"(?<date>(\S+|(""(\S+\s*)+"")))\s+" +
69 @"(?<source>(\S+|(""(\S+\s*)+"")))\s+" +
70 @"(?<manual>(\S+|(""(\S+\s*)+"")))\s*" +
73 But there seem to be a number of broken man pages, and the current filter can be used
74 for general troff pages.
76 Regex headerRE
= new Regex (@"^\.TH\s+" +
77 @"(?<title>(\S+|(""(\S+\s*)+"")))\s*");
79 while ((str
= reader
.ReadLine ()) != null) {
80 if (str
.StartsWith (".\"")) {
81 /* Comment in man page */
83 } else if (str
.StartsWith (".TH ")) {
84 MatchCollection matches
= headerRE
.Matches (str
);
85 if (matches
.Count
!= 1) {
86 Console
.Error
.WriteLine ("In title Expected 1 match but found {0} matches in '{1}'",
90 foreach (Match theMatch
in matches
) {
91 AddProperty (Beagle
.Property
.New ("dc:title",
92 theMatch
.Groups
["title"].ToString ()));
97 // FIXME: We need to strip out other macros
107 override protected void DoOpen (FileInfo info
)
110 stream
= new FileStream (info
.FullName
,
114 reader
= new StreamReader (stream
);
116 override protected void DoPull ()
118 ParseManFile (reader
);