3 # A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96)
5 # Reads a text file given on standard input or named as first argument, and
6 # generates HTML 2.0 on standard output. Recognizes these constructions:
8 # HTML element pattern at the beginning of a line
10 # section heading (<number><period>)+<space>
11 # numbered list element <1-2 spaces>(<number><period>)+<space>
12 # unnumbered list element <0-2 spaces><hyphen or asterisk><space>
13 # preformatted section <more than two spaces>
15 # Heading level is determined by the number of (<number><period>) segments.
16 # Blank lines force a separation of elements; if none of the above four
17 # types is indicated, a new paragraph begins. A line beginning with many
18 # spaces is interpreted as a continuation (instead of preformatted) after
19 # a list element. Headings are anchored; paragraphs starting with "Q." are
20 # emphasized, and those marked with "A." get their first sentence emphasized.
22 # Hyperlinks are created from references to:
23 # URLs, explicitly marked using <URL:scheme://host...>
24 # other questions, of the form "question <number>(<period><number>)*"
25 # sections, of the form "section <number>".
27 import sys
, string
, regex
, regsub
, regex_syntax
28 regex
.set_syntax(regex_syntax
.RE_SYNTAX_AWK
)
30 # --------------------------------------------------------- regular expressions
31 orditemprog
= regex
.compile(' ?([1-9][0-9]*\.)+ +')
32 itemprog
= regex
.compile(' ? ?[-*] +')
33 headingprog
= regex
.compile('([1-9][0-9]*\.)+ +')
34 prefmtprog
= regex
.compile(' ')
35 blankprog
= regex
.compile('^[ \t\r\n]$')
36 questionprog
= regex
.compile(' *Q\. +')
37 answerprog
= regex
.compile(' *A\. +')
38 sentprog
= regex
.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)')
40 mailhdrprog
= regex
.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To'
41 '|Approved|Archive-Name|Version|Last-Modified): +', regex
.casefold
)
42 urlprog
= regex
.compile('<URL:([^&]+)>')
43 addrprog
= regex
.compile('<([^>@:]+@[^&@:]+)>')
44 qrefprog
= regex
.compile('question +([1-9](\.[0-9]+)*)')
45 srefprog
= regex
.compile('section +([1-9][0-9]*)')
46 entityprog
= regex
.compile('[&<>]')
48 # ------------------------------------------------------------ global variables
51 element
= content
= secnum
= version
= ''
53 # ----------------------------------------------------- for making nested lists
57 if body
[-1] == '</li>': del body
[-1]
63 body
.append(ollev
and '</ol></li>' or '</ol>')
65 # --------------------------------- output one element and convert its contents
66 def spew(clearol
=0, clearul
=0):
67 global content
, body
, ollev
, ullev
70 if entityprog
.search(content
) > -1:
71 content
= regsub
.gsub('&', '&', content
)
72 content
= regsub
.gsub('<', '<', content
)
73 content
= regsub
.gsub('>', '>', content
)
75 n
= questionprog
.match(content
)
77 content
= '<em>' + content
[n
:] + '</em>'
78 if ollev
: # question reference in index
79 fragid
= regsub
.gsub('^ +|\.? +$', '', secnum
)
80 content
= '<a href="#%s">%s</a>' % (fragid
, content
)
82 if element
[0] == 'h': # heading in the main text
83 fragid
= regsub
.gsub('^ +|\.? +$', '', secnum
)
84 content
= secnum
+ '<a name="%s">%s</a>' % (fragid
, content
)
86 n
= answerprog
.match(content
)
87 if n
> 0: # answer paragraph
88 content
= regsub
.sub(sentprog
, '<strong>\\1</strong>', content
[n
:])
90 body
.append('<' + element
+ '>' + content
)
91 body
.append('</' + element
+ '>')
94 while clearol
and ollev
: upol()
95 if clearul
and ullev
: body
.append('</ul>'); ullev
= 0
97 # ---------------------------------------------------------------- main program
98 faq
= len(sys
.argv
)>1 and sys
.argv
[1] and open(sys
.argv
[1]) or sys
.stdin
99 lines
= faq
.readlines()
102 if line
[2:9] == '=======': # <hr> will appear *before*
103 body
.append('<hr>') # the underlined heading
106 n
= orditemprog
.match(line
)
107 if n
> 0: # make ordered list item
110 level
= string
.count(secnum
, '.')
111 while level
> ollev
: dnol()
112 while level
< ollev
: upol()
113 element
, content
= 'li', line
[n
:]
116 n
= itemprog
.match(line
)
117 if n
> 0: # make unordered list item
119 if ullev
== 0: body
.append('<ul>'); ullev
= 1
120 element
, content
= 'li', line
[n
:]
123 n
= headingprog
.match(line
)
124 if n
> 0: # make heading element
125 spew('clear ol', 'clear ul')
127 sys
.stderr
.write(line
)
128 element
, content
= 'h%d' % string
.count(secnum
, '.'), line
[n
:]
132 if not secnum
: # haven't hit body yet
133 n
= mailhdrprog
.match(line
)
134 v
= version
and -1 or regex
.match('Version: ', line
)
135 if v
> 0 and not version
: version
= line
[v
:]
136 if n
<= 0 and element
!= 'li': # not pre if after a list item
137 n
= prefmtprog
.match(line
)
138 if n
> 0: # make preformatted element
140 content
= content
+ line
142 spew('clear ol', 'clear ul')
143 element
, content
= 'pre', line
146 if blankprog
.match(line
) > 0: # force a new element
149 elif element
: # continue current element
150 content
= content
+ line
151 else: # no element; make paragraph
152 spew('clear ol', 'clear ul')
153 element
, content
= 'p', line
155 spew() # output last element
157 body
= string
.joinfields(body
, '')
158 body
= regsub
.gsub(urlprog
, '<a href="\\1">\\1</a>', body
)
159 body
= regsub
.gsub(addrprog
, '<a href="mailto:\\1">\\1</a>', body
)
160 body
= regsub
.gsub(qrefprog
, '<a href="#\\1">question \\1</a>', body
)
161 body
= regsub
.gsub(srefprog
, '<a href="#\\1">section \\1</a>', body
)
163 print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>'
164 print '<head><title>Python Frequently-Asked Questions v' + version
165 print "</title></head><body>(This file was generated using Ping's"
166 print '<a href="faq2html.py">faq2html.py</a>.)'
167 print body
+ '</body></html>'