1 -- This file is part of Diohsc
2 -- Copyright (C) 2020 Martin Bays <mbays@sdf.org>
4 -- This program is free software: you can redistribute it and/or modify
5 -- it under the terms of version 3 of the GNU General Public License as
6 -- published by the Free Software Foundation, or any later version.
8 -- You should have received a copy of the GNU General Public License
9 -- along with this program. If not, see http://www.gnu.org/licenses/.
12 {-# LANGUAGE OverloadedStrings #-}
15 module TextGemini
where
17 import Control
.Monad
.State
18 import Data
.Maybe (catMaybes, isJust, mapMaybe)
22 import qualified Data
.Text
.Lazy
as T
25 data Link
= Link
{ linkUri
:: URIRef
, linkDescription
:: T
.Text
}
26 deriving (Eq
,Ord
,Show)
30 | LinkLine
{ linkLineIndex
:: Int, linkLineLink
:: Link
}
33 | PreformattedLine T
.Text T
.Text
34 | HeadingLine
Int T
.Text
38 deriving (Eq
,Ord
,Show)
40 newtype GeminiDocument
= GeminiDocument
{ geminiDocumentLines
:: [GeminiLine
] }
41 deriving (Eq
,Ord
,Show)
43 extractLinks
:: GeminiDocument
-> [Link
]
44 extractLinks
(GeminiDocument ls
) = mapMaybe linkOfLine ls
46 linkOfLine
(LinkLine _ link
) = Just link
47 linkOfLine _
= Nothing
53 deriving (Eq
,Ord
,Show)
55 showPreOpt
:: PreOpt
-> String
56 showPreOpt PreOptAlt
= "alt"
57 showPreOpt PreOptPre
= "pre"
58 showPreOpt PreOptBoth
= "both"
60 data GemRenderOpts
= GemRenderOpts
63 , grOptsWrapWidth
:: Int
64 , grOptsLinkDescFirst
:: Bool
65 } deriving (Eq
,Ord
,Show)
67 printGemDoc
:: GemRenderOpts
-> (URIRef
-> T
.Text
) -> GeminiDocument
-> [T
.Text
]
68 printGemDoc
(GemRenderOpts ansi preOpt width linkDescFirst
)
69 showUri
(GeminiDocument doc
) = concatMap printLine doc
71 printLine
(TextLine line
) = wrapWith
"" False $ stripControl line
72 printLine
(AltTextLine line
)
73 | preOpt
== PreOptPre || T
.null line
= []
74 |
otherwise = (:[]) $ applyIf ansi withBoldStr
"`` " <> stripControl line
76 printLine PreformatToggleLine
= []
78 printLine
(PreformattedLine alt line
)
79 | preOpt
== PreOptAlt
&& not (T
.null alt
) = []
81 -- We allow \t and CSI escape sequences in preformatted text only
82 let sanitise
= applyIf
(not ansi
) (stripCSI
.) sanitiseForDisplay
83 in (:[]) $ applyIf ansi
((resetCode
<>) . withBoldStr
) "` " <> sanitise line
84 printLine
(HeadingLine level line
) =
85 wrapWith
(T
.take (fromIntegral level
) (T
.repeat '#') <> " ") False
87 ( applyIf
(level
/= 2) withUnderlineStr
88 . applyIf
(level
< 3) withBoldStr
) $ stripControl line
89 printLine
(ItemLine line
) = wrapWith
"* " False $ stripControl line
90 printLine
(QuoteLine line
) = wrapWith
"> " True $ stripControl line
91 printLine
(ErrorLine line
) = (:[]) $ applyIf ansi
(withColourStr Red
)
92 "! Formatting error in text/gemini: " <> line
93 printLine
(LinkLine n
(Link uri desc
)) =
94 wrapWith
(T
.pack
$ '[' : show (n
+1) ++ if n
+1 < 10 then "] " else "] ") False
95 $ (if T
.null desc
then id
96 else (if linkDescFirst
then id else flip) (\a b
-> a
<> " " <> b
)
97 $ applyIf ansi
(withColourStr Cyan
) (stripControl desc
))
100 wrapWith
:: T
.Text
-> Bool -> T
.Text
-> [T
.Text
]
101 wrapWith pre onAll line
=
102 concat . zipWith prependHeader lineHeaders
$ wrap
(width
- n
) line
104 n
= visibleLength pre
105 lineHeaders
= (pre
:) . repeat $
106 if onAll
then pre
else T
.replicate (fromIntegral n
) " "
107 splitWordHeader
= if n
> 0 then "|" <> T
.replicate (fromIntegral n
- 1) " " else ""
108 prependHeader header
(l
:ls
) = header
<> l
: ((splitWordHeader
<>) <$> ls
)
109 prependHeader _
[] = []
111 wrap
:: Int -> T
.Text
-> [[T
.Text
]]
112 wrap wrapWidth line
= wrap
' [] "" 0 $ T
.words line
115 ww
= max maxWCWidth wrapWidth
116 wrap
' ls l _
[] = [ls
<> [l
] ]
117 wrap
' ls l n
(w
:ws
) =
118 let nw
= visibleLength w
119 l
' = if T
.null l
then w
else l
<> " " <> w
120 n
' = n
+ nw
+ (if T
.null l
then 0 else 1)
121 in if nw
> ww
&& n
+ 1 < ww
122 then let (a
,b
) = splitAtVisible ww l
'
123 in wrap
' (ls
<> [a
]) "" 0 $ b
:ws
125 then (ls
<> [l
]:) $ wrap
' [] "" 0 $ w
:ws
126 else wrap
' ls l
' n
' ws
129 data GeminiParseState
= GeminiParseState
{ numLinks
:: Int, preformatted
:: Maybe T
.Text
}
131 initialParseState
:: GeminiParseState
132 initialParseState
= GeminiParseState
0 Nothing
134 parseGemini
:: T
.Text
-> GeminiDocument
135 parseGemini text
= GeminiDocument
. catMaybes $ evalState
136 (forM
(T
.lines text
) (parseLine
. stripTrailingCR
)) initialParseState
138 stripTrailingCR
= T
.dropWhileEnd
(== '\r')
139 parseLine
:: T
.Text
-> State GeminiParseState
(Maybe GeminiLine
)
141 pre
<- gets preformatted
142 case T
.take 1 line
of
143 "`" | T
.take 3 line
== "```", isJust pre
-> do
144 modify
$ \s
-> s
{ preformatted
= Nothing
}
145 return $ case T
.strip
$ T
.drop 3 line
of
147 _
-> Just
. ErrorLine
$ "Illegal non-empty text after closing '```'"
148 -- ^The spec says we MUST ignore any text on a "```" line closing a preformatted
149 -- block. This seems like a gaping extensibility hole to me, so I'm interpreting it
150 -- as not disallowing an error message.
151 "`" | T
.take 3 line
== "```" ->
152 let alt
= T
.strip
$ T
.drop 3 line
in do
153 modify
$ \s
-> s
{ preformatted
= Just alt
}
154 return . Just
. AltTextLine
$ alt
155 _ | Just alt
<- pre
->
156 return . Just
$ PreformattedLine alt line
157 "=" | T
.take 2 line
== "=>" ->
158 case parseLink
. T
.dropWhile isGemWhitespace
$ T
.drop 2 line
of
159 Nothing
-> return . Just
. ErrorLine
$ "Unparseable link line: " <> line
162 modify
$ \s
-> s
{ numLinks
= n
+ 1 }
163 return . Just
$ LinkLine n link
164 "#" | headers
<- T
.length . T
.takeWhile (== '#') $ line
,
165 headers
> 0 && headers
< 4 ->
166 return . Just
. HeadingLine
(fromIntegral headers
) .
167 T
.dropWhile isGemWhitespace
. T
.dropWhile (== '#') $ line
168 "*" | T
.take 2 line
== "* " ->
169 return . Just
. ItemLine
$ T
.drop 2 line
171 return . Just
. QuoteLine
$ T
.drop 1 line
173 return . Just
$ TextLine line
175 parseLink
:: T
.Text
-> Maybe Link
177 let uriText
= T
.takeWhile (not . isGemWhitespace
) linkInfo
178 desc
= T
.dropWhile isGemWhitespace
. T
.dropWhile (not . isGemWhitespace
) $ linkInfo
179 in (`Link` desc
) <$> parseUriReference
(
185 isGemWhitespace
:: Char -> Bool
186 isGemWhitespace
= (`
elem`
(" \t"::String))