1 -----------------------------------------------------------------------------
3 -- This version is interesting because instead of striping comment lines, it
4 -- turns them into "-- " style comments. This allows using haddock markup
5 -- in literate scripts without having to use "> --" prefix.
8 -- Module : Distribution.Simple.PreProcess.Unlit
11 -- Maintainer : cabal-devel@haskell.org
12 -- Portability : portable
14 -- Remove the \"literal\" markups from a Haskell source file, including
15 -- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"
16 module Distribution
.Simple
.PreProcess
.Unlit
(unlit
, plain
) where
18 import Data
.List
(mapAccumL)
19 import Distribution
.Compat
.Prelude
20 import Distribution
.Simple
.Errors
21 import Distribution
.Utils
.Generic
(safeInit
, safeLast
, safeTail
)
37 plain
:: String -> String -> String
40 classify
:: String -> Classified
41 classify
('>' : s
) = BirdTrack s
42 classify
('#' : s
) = case tokens s
of
43 (line
: file
@('"' : _ : _) : _)
45 && safeLast file == Just '"' ->
46 -- this shouldn't fail as we tested for 'all isDigit'
47 Line
(fromMaybe (error $ "panic! read @Int " ++ show line
) $ readMaybe line
) (safeTail
(safeInit file
)) -- TODO:eradicateNoParse
50 tokens
= unfoldr $ \str
-> case lex str
of
51 (t
@(_
: _
), str
') : _
-> Just
(t
, str
')
54 |
"begin{code}" `
isPrefixOf` s
= BeginCode
55 |
"end{code}" `
isPrefixOf` s
= EndCode
56 classify s |
all isSpace s
= Blank s
57 classify s
= Ordinary s
59 -- So the weird exception for comment indenting is to make things work with
60 -- haddock, see classifyAndCheckForBirdTracks below.
61 unclassify
:: Bool -> Classified
-> String
62 unclassify _
(BirdTrack s
) = ' ' : s
63 unclassify _
(Blank s
) = s
64 unclassify _
(Ordinary s
) = s
65 unclassify _
(Line n file
) = "# " ++ show n
++ " " ++ show file
66 unclassify _
(CPP s
) = '#' : s
67 unclassify
True (Comment
"") = " --"
68 unclassify
True (Comment s
) = " -- " ++ s
69 unclassify
False (Comment
"") = "--"
70 unclassify
False (Comment s
) = "-- " ++ s
71 unclassify _ _
= internalError
73 -- | 'unlit' takes a filename (for error reports), and transforms the
74 -- given string, to eliminate the literate comments from the program text.
75 unlit
:: FilePath -> String -> Either String CabalException
77 let (usesBirdTracks
, classified
) =
78 classifyAndCheckForBirdTracks
82 (Left
. unlines . map (unclassify usesBirdTracks
))
88 -- So haddock requires comments and code to align, since it treats comments
89 -- as following the layout rule. This is a pain for us since bird track
90 -- style literate code typically gets indented by two since ">" is replaced
91 -- by " " and people usually use one additional space of indent ie
92 -- "> then the code". On the other hand we cannot just go and indent all
93 -- the comments by two since that does not work for latex style literate
94 -- code. So the hacky solution we use here is that if we see any bird track
95 -- style code then we'll indent all comments by two, otherwise by none.
96 -- Of course this will not work for mixed latex/bird track .lhs files but
97 -- nobody does that, it's silly and specifically recommended against in the
100 classifyAndCheckForBirdTracks
=
101 flip mapAccumL False $ \seenBirdTrack line
->
102 let classification
= classify line
103 in (seenBirdTrack || isBirdTrack classification
, classification
)
105 isBirdTrack
(BirdTrack _
) = True
106 isBirdTrack _
= False
108 checkErrors ls
= case [e | Error e
<- ls
] of
110 (message
: _
) -> Right
(UnlitException
(f
++ ":" ++ show n
++ ": " ++ message
))
112 (f
, n
) = errorPos file
1 ls
113 errorPos f n
[] = (f
, n
)
114 errorPos f n
(Error _
: _
) = (f
, n
)
115 errorPos _ _
(Line n
' f
' : ls
) = errorPos f
' n
' ls
116 errorPos f n
(_
: ls
) = errorPos f
(n
+ 1) ls
118 -- Here we model a state machine, with each state represented by
119 -- a local function. We only have four states (well, five,
120 -- if you count the error state), but the rules
121 -- to transition between then are not so simple.
122 -- Would it be simpler to have more states?
124 -- Each state represents the type of line that was last read
125 -- i.e. are we in a comment section, or a latex-code section,
126 -- or a bird-code section, etc?
127 reclassify
:: [Classified
] -> [Classified
]
128 reclassify
= blank
-- begin in blank state
131 latex
(EndCode
: ls
) = Blank
"" : comment ls
132 latex
(BeginCode
: _
) = [Error
"\\begin{code} in code section"]
133 latex
(BirdTrack l
: ls
) = Ordinary
('>' : l
) : latex ls
134 latex
(l
: ls
) = l
: latex ls
137 blank
(EndCode
: _
) = [Error
"\\end{code} without \\begin{code}"]
138 blank
(BeginCode
: ls
) = Blank
"" : latex ls
139 blank
(BirdTrack l
: ls
) = BirdTrack l
: bird ls
140 blank
(Ordinary l
: ls
) = Comment l
: comment ls
141 blank
(l
: ls
) = l
: blank ls
144 bird
(EndCode
: _
) = [Error
"\\end{code} without \\begin{code}"]
145 bird
(BeginCode
: ls
) = Blank
"" : latex ls
146 bird
(Blank l
: ls
) = Blank l
: blank ls
147 bird
(Ordinary _
: _
) = [Error
"program line before comment line"]
148 bird
(l
: ls
) = l
: bird ls
151 comment
(EndCode
: _
) = [Error
"\\end{code} without \\begin{code}"]
152 comment
(BeginCode
: ls
) = Blank
"" : latex ls
153 comment
(CPP l
: ls
) = CPP l
: comment ls
154 comment
(BirdTrack _
: _
) = [Error
"comment line before program line"]
155 -- a blank line and another ordinary line following a comment
156 -- will be treated as continuing the comment. Otherwise it's
157 -- then end of the comment, with a blank line.
158 comment
(Blank l
: ls
@(Ordinary _
: _
)) = Comment l
: comment ls
159 comment
(Blank l
: ls
) = Blank l
: blank ls
160 comment
(Line n f
: ls
) = Line n f
: comment ls
161 comment
(Ordinary l
: ls
) = Comment l
: comment ls
162 comment
(Comment _
: _
) = internalError
163 comment
(Error _
: _
) = internalError
165 -- Re-implementation of 'lines', for better efficiency (but decreased laziness).
166 -- Also, importantly, accepts non-standard DOS and Mac line ending characters.
167 inlines
:: String -> [String]
168 inlines xs
= lines' xs
id
170 lines' [] acc
= [acc
[]]
171 lines' ('\^M
' : '\n' : s
) acc
= acc
[] : lines' s
id -- DOS
172 lines' ('\^M
' : s
) acc
= acc
[] : lines' s
id -- MacOS
173 lines' ('\n' : s
) acc
= acc
[] : lines' s
id -- Unix
174 lines' (c
: s
) acc
= lines' s
(acc
. (c
:))
177 internalError
= error "unlit: internal error"