4 // FilterMan.cs : Trivial implementation of a man-page filter.
7 // Michael Levy <mlevy@wardium.homeip.net>
9 // Copyright (C) 2004 Michael levy
12 // Permission is hereby granted, free of charge, to any person obtaining a
13 // copy of this software and associated documentation files (the "Software"),
14 // to deal in the Software without restriction, including without limitation
15 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 // and/or sell copies of the Software, and to permit persons to whom the
17 // Software is furnished to do so, subject to the following conditions:
19 // The above copyright notice and this permission notice shall be included in
20 // all copies or substantial portions of the Software.
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 // DEALINGS IN THE SOFTWARE.
34 using System
.Text
.RegularExpressions
;
38 namespace Beagle
.Filters
{
40 public class FilterMan
: Beagle
.Daemon
.Filter
{
45 // Make this a general troff filter.
46 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-troff-man"));
47 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("text/x-troff-man"));
48 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/x-troff"));
49 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("text/x-troff"));
50 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("text/troff"));
56 Right now we don't handle pages with just one line like:
58 Which is in strncpy.3.gz and points to strcpy.3.gz
60 protected void ParseManFile (StreamReader reader
)
64 The regular expression for a complete man header line is built to allow a suite of
65 non-spaces, or words separated by spaces which are encompassed in quotes
66 The regexp should be :
68 Regex headerRE = new Regex (@"^\.TH\s+" +
69 @"(?<title>(\S+|(""(\S+\s*)+"")))\s+" +
70 @"(?<section>\d+)\s+" +
71 @"(?<date>(\S+|(""(\S+\s*)+"")))\s+" +
72 @"(?<source>(\S+|(""(\S+\s*)+"")))\s+" +
73 @"(?<manual>(\S+|(""(\S+\s*)+"")))\s*" +
76 But there seem to be a number of broken man pages, and the current filter can be used
77 for general troff pages.
79 Regex headerRE
= new Regex (@"^\.TH\s+" +
80 @"(?<title>(\S+|(""(\S+\s*)+"")))\s*");
82 while ((str
= reader
.ReadLine ()) != null) {
83 if (str
.StartsWith (".\"")) {
84 /* Comment in man page */
86 } else if (str
.StartsWith (".TH ")) {
87 MatchCollection matches
= headerRE
.Matches (str
);
88 if (matches
.Count
!= 1) {
89 Console
.Error
.WriteLine ("In title Expected 1 match but found {0} matches in '{1}'",
93 foreach (Match theMatch
in matches
) {
94 AddProperty (Beagle
.Property
.New ("dc:title",
95 theMatch
.Groups
["title"].ToString ()));
100 // FIXME: We need to strip out other macros
110 override protected void DoOpen (FileInfo info
)
113 stream
= new FileStream (info
.FullName
,
117 reader
= new StreamReader (stream
);
119 override protected void DoPull ()
121 ParseManFile (reader
);