Support GHC 9.12
[cabal.git] / Cabal / src / Distribution / Simple / PreProcess / Unlit.hs
blobb4ed0ed41a609bab7d1214b3089b71bac0af9ea3
1 -----------------------------------------------------------------------------
3 -- This version is interesting because instead of striping comment lines, it
4 -- turns them into "-- " style comments. This allows using haddock markup
5 -- in literate scripts without having to use "> --" prefix.
7 -- |
8 -- Module : Distribution.Simple.PreProcess.Unlit
9 -- Copyright : ...
11 -- Maintainer : cabal-devel@haskell.org
12 -- Portability : portable
14 -- Remove the \"literal\" markups from a Haskell source file, including
15 -- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"
16 module Distribution.Simple.PreProcess.Unlit (unlit, plain) where
18 import Data.List (mapAccumL)
19 import Distribution.Compat.Prelude
20 import Distribution.Simple.Errors
21 import Distribution.Utils.Generic (safeInit, safeLast, safeTail)
22 import Prelude ()
24 data Classified
25 = BirdTrack String
26 | Blank String
27 | Ordinary String
28 | Line !Int String
29 | CPP String
30 | BeginCode
31 | EndCode
32 | -- output only:
33 Error String
34 | Comment String
36 -- | No unliteration.
37 plain :: String -> String -> String
38 plain _ hs = hs
40 classify :: String -> Classified
41 classify ('>' : s) = BirdTrack s
42 classify ('#' : s) = case tokens s of
43 (line : file@('"' : _ : _) : _)
44 | all isDigit line
45 && safeLast file == Just '"' ->
46 -- this shouldn't fail as we tested for 'all isDigit'
47 Line (fromMaybe (error $ "panic! read @Int " ++ show line) $ readMaybe line) (safeTail (safeInit file)) -- TODO:eradicateNoParse
48 _ -> CPP s
49 where
50 tokens = unfoldr $ \str -> case lex str of
51 (t@(_ : _), str') : _ -> Just (t, str')
52 _ -> Nothing
53 classify ('\\' : s)
54 | "begin{code}" `isPrefixOf` s = BeginCode
55 | "end{code}" `isPrefixOf` s = EndCode
56 classify s | all isSpace s = Blank s
57 classify s = Ordinary s
59 -- So the weird exception for comment indenting is to make things work with
60 -- haddock, see classifyAndCheckForBirdTracks below.
61 unclassify :: Bool -> Classified -> String
62 unclassify _ (BirdTrack s) = ' ' : s
63 unclassify _ (Blank s) = s
64 unclassify _ (Ordinary s) = s
65 unclassify _ (Line n file) = "# " ++ show n ++ " " ++ show file
66 unclassify _ (CPP s) = '#' : s
67 unclassify True (Comment "") = " --"
68 unclassify True (Comment s) = " -- " ++ s
69 unclassify False (Comment "") = "--"
70 unclassify False (Comment s) = "-- " ++ s
71 unclassify _ _ = internalError
73 -- | 'unlit' takes a filename (for error reports), and transforms the
74 -- given string, to eliminate the literate comments from the program text.
75 unlit :: FilePath -> String -> Either String CabalException
76 unlit file input =
77 let (usesBirdTracks, classified) =
78 classifyAndCheckForBirdTracks
79 . inlines
80 $ input
81 in either
82 (Left . unlines . map (unclassify usesBirdTracks))
83 Right
84 . checkErrors
85 . reclassify
86 $ classified
87 where
88 -- So haddock requires comments and code to align, since it treats comments
89 -- as following the layout rule. This is a pain for us since bird track
90 -- style literate code typically gets indented by two since ">" is replaced
91 -- by " " and people usually use one additional space of indent ie
92 -- "> then the code". On the other hand we cannot just go and indent all
93 -- the comments by two since that does not work for latex style literate
94 -- code. So the hacky solution we use here is that if we see any bird track
95 -- style code then we'll indent all comments by two, otherwise by none.
96 -- Of course this will not work for mixed latex/bird track .lhs files but
97 -- nobody does that, it's silly and specifically recommended against in the
98 -- H98 unlit spec.
100 classifyAndCheckForBirdTracks =
101 flip mapAccumL False $ \seenBirdTrack line ->
102 let classification = classify line
103 in (seenBirdTrack || isBirdTrack classification, classification)
105 isBirdTrack (BirdTrack _) = True
106 isBirdTrack _ = False
108 checkErrors ls = case [e | Error e <- ls] of
109 [] -> Left ls
110 (message : _) -> Right (UnlitException (f ++ ":" ++ show n ++ ": " ++ message))
111 where
112 (f, n) = errorPos file 1 ls
113 errorPos f n [] = (f, n)
114 errorPos f n (Error _ : _) = (f, n)
115 errorPos _ _ (Line n' f' : ls) = errorPos f' n' ls
116 errorPos f n (_ : ls) = errorPos f (n + 1) ls
118 -- Here we model a state machine, with each state represented by
119 -- a local function. We only have four states (well, five,
120 -- if you count the error state), but the rules
121 -- to transition between then are not so simple.
122 -- Would it be simpler to have more states?
124 -- Each state represents the type of line that was last read
125 -- i.e. are we in a comment section, or a latex-code section,
126 -- or a bird-code section, etc?
127 reclassify :: [Classified] -> [Classified]
128 reclassify = blank -- begin in blank state
129 where
130 latex [] = []
131 latex (EndCode : ls) = Blank "" : comment ls
132 latex (BeginCode : _) = [Error "\\begin{code} in code section"]
133 latex (BirdTrack l : ls) = Ordinary ('>' : l) : latex ls
134 latex (l : ls) = l : latex ls
136 blank [] = []
137 blank (EndCode : _) = [Error "\\end{code} without \\begin{code}"]
138 blank (BeginCode : ls) = Blank "" : latex ls
139 blank (BirdTrack l : ls) = BirdTrack l : bird ls
140 blank (Ordinary l : ls) = Comment l : comment ls
141 blank (l : ls) = l : blank ls
143 bird [] = []
144 bird (EndCode : _) = [Error "\\end{code} without \\begin{code}"]
145 bird (BeginCode : ls) = Blank "" : latex ls
146 bird (Blank l : ls) = Blank l : blank ls
147 bird (Ordinary _ : _) = [Error "program line before comment line"]
148 bird (l : ls) = l : bird ls
150 comment [] = []
151 comment (EndCode : _) = [Error "\\end{code} without \\begin{code}"]
152 comment (BeginCode : ls) = Blank "" : latex ls
153 comment (CPP l : ls) = CPP l : comment ls
154 comment (BirdTrack _ : _) = [Error "comment line before program line"]
155 -- a blank line and another ordinary line following a comment
156 -- will be treated as continuing the comment. Otherwise it's
157 -- then end of the comment, with a blank line.
158 comment (Blank l : ls@(Ordinary _ : _)) = Comment l : comment ls
159 comment (Blank l : ls) = Blank l : blank ls
160 comment (Line n f : ls) = Line n f : comment ls
161 comment (Ordinary l : ls) = Comment l : comment ls
162 comment (Comment _ : _) = internalError
163 comment (Error _ : _) = internalError
165 -- Re-implementation of 'lines', for better efficiency (but decreased laziness).
166 -- Also, importantly, accepts non-standard DOS and Mac line ending characters.
167 inlines :: String -> [String]
168 inlines xs = lines' xs id
169 where
170 lines' [] acc = [acc []]
171 lines' ('\^M' : '\n' : s) acc = acc [] : lines' s id -- DOS
172 lines' ('\^M' : s) acc = acc [] : lines' s id -- MacOS
173 lines' ('\n' : s) acc = acc [] : lines' s id -- Unix
174 lines' (c : s) acc = lines' s (acc . (c :))
176 internalError :: a
177 internalError = error "unlit: internal error"