main.myr

   1 use std
   2
   3 const main = {
   4         match std.fslurp(0)
   5         | `std.Err e:
   6                 std.fput(2, "Error: {}\n", e)
   7         | `std.Ok s:
   8                 regurgitate(digest(ingest(s)), 72)
   9         ;;
  10 }
  11
  12 type paragraph = struct
  13         first_line_prefix : char[:]
  14         gen_line_prefix : char[:]
  15         content : char[:]
  16         empty : bool
  17         merged : bool
  18 ;;
  19
  20 type state = union
  21         `Reading_prefix
  22         `Reading_line
  23         `Reading_line_last_was_ws
  24 ;;
  25
  26 /*
  27    The algorithm:
  28
  29    Read each line, and strip off the prefix (the whitespace, markers
  30    like -/•/·/#/45:/_1_/) from the content. Each line becomes its own
  31    paragraph. If the content is all whitespace, the paragraph is
  32    considered empty.
  33
  34    Now collapse all paragraphs. Empty paragraphs with equivalent
  35    prefixes (that is, up to whitespace) collapse together. Non-empty
  36    paragraphs with equivalent prefixes collapse together. If two
  37    adjacent, non-empty paragraphs A, B have non-equivalent prefixes, A
  38    is preceded by an empty paragraph (or nothing), B is anteceded by an
  39    empty paragraph (or nothing), AND A itself was not collapsed, then
  40    merge A and B, with A governing the first_line_prefix and B the
  41    gen_line_prefix.
  42
  43    Now output. That's easy, you made Ori take your stupid Unicode tables
  44    so you know what the cell width is.
  45  */
  46
  47 /* Turn input slop into paragraphs */
  48 const ingest = { str : byte[:]
  49         var s : state = `Reading_prefix
  50         var p : paragraph[:] = [][:]
  51         var p_cur : paragraph = [
  52                 .first_line_prefix = [][:],
  53                 .gen_line_prefix = [][:],
  54                 .content = [][:],
  55                 .empty = false,
  56                 .merged = false,
  57         ]
  58         for c : std.bychar(str)
  59                 if c == ('\r' : char)
  60                         continue /* dorks */
  61                 ;;
  62                 if c == ('\n' : char)
  63                         std.slpush(&p, p_cur)
  64                         p_cur = [
  65                                 .first_line_prefix = [][:],
  66                                 .gen_line_prefix = [][:],
  67                                 .content = [][:],
  68                                 .empty = false,
  69                                 .merged = false,
  70                         ]
  71                         s = `Reading_prefix
  72                         continue
  73                 ;;
  74
  75                 match s
  76                 | `Reading_prefix:
  77                         if is_textual_content(c)
  78                                 s = `Reading_line
  79                                 std.slpush(&p_cur.content, c)
  80                         else
  81                                 std.slpush(&p_cur.first_line_prefix, c)
  82                         ;;
  83                 | `Reading_line:
  84                         std.slpush(&p_cur.content, c)
  85                         if std.isblank(c)
  86                                 s = `Reading_line_last_was_ws
  87                         ;;
  88                 | `Reading_line_last_was_ws:
  89                         if !std.isblank(c)
  90                                 std.slpush(&p_cur.content, c)
  91                                 s = `Reading_line
  92                         ;;
  93                 ;;
  94         ;;
  95
  96         if p_cur.first_line_prefix.len > 0 || p_cur.content.len > 0
  97                 std.slpush(&p, p_cur)
  98         ;;
  99
 100         -> p
 101 }
 102
 103 /*
 104    I don't typically denote lists with ", ', or (. They should really be
 105    considered alphanumeric. TODO: exotic unicode should go here as well.
 106  */
 107 const is_textual_content = { c : char
 108         if std.isalpha(c)
 109                 -> true
 110         ;;
 111
 112         match c
 113         | '"':  -> true
 114         | '\'': -> true
 115         | '`':  -> true
 116         | '(':  -> true
 117         | ')':  -> true
 118         | _:    -> false
 119         ;;
 120 }
 121
 122 /* Do the paragraph joining thing */
 123 const digest = {p
 124         /* Mark as empty */
 125         for var j = 0; j < p.len; ++j
 126                 p[j].empty = (p[j].content.len == 0)
 127         ;;
 128
 129         /* Easy merges */
 130         for var j = 0; j + 1 < p.len; ++j
 131                 if p[j].empty == p[j + 1].empty && equiv_prefixes(p[j].first_line_prefix, p[j + 1].first_line_prefix)
 132                         if !p[j].merged
 133                                 p[j].gen_line_prefix = std.sldup(p[j + 1].first_line_prefix)
 134                         ;;
 135                         merge_para(&p, j, j + 1)
 136                         j--
 137                 ;;
 138         ;;
 139
 140
 141         /* Hard merges */
 142         for var j = 0; j + 1 < p.len; ++j
 143                 if j > 0 && !p[j - 1].empty
 144                         continue
 145                 ;;
 146
 147                 if j + 2 < p.len && !p[j + 2].empty
 148                         continue
 149                 ;;
 150
 151                 if p[j].empty || p[j + 1].empty || p[j].merged
 152                         continue
 153                 ;;
 154
 155                 p[j].gen_line_prefix = std.sldup(p[j + 1].first_line_prefix)
 156                 merge_para(&p, j, j + 1)
 157         ;;
 158
 159         /* The unmerged give no distinction to the first */
 160         for var j = 0; j < p.len; ++j
 161                 if !p[j].merged
 162                         p[j].gen_line_prefix = std.sldup(p[j].first_line_prefix)
 163                 ;;
 164         ;;
 165
 166         /* Finally, strip whitespace from the end of content */
 167         for var j = 0; j < p.len; ++j
 168                 var c = &p[j].content
 169                 while c#.len > 0 && std.isblank(c#[c#.len - 1])
 170                         std.sldel(c, c#.len - 1)
 171                 ;;
 172         ;;
 173
 174         -> p
 175 }
 176
 177 const regurgitate = {p, max
 178         var sb : std.strbuf# = std.mksb()
 179         for a : p
 180                 var cur_pos = 0
 181                 if a.empty
 182                         /* maybe we can get away with dropping the prefix? */
 183                         var need_prefix = false
 184                         for c : a.first_line_prefix
 185                                 if !std.isblank(c)
 186                                         need_prefix = true
 187                                         break
 188                                 ;;
 189                         ;;
 190                         if !need_prefix
 191                                 std.sbputc(sb, '\n')
 192                                 continue
 193                         ;;
 194
 195                         /* Oh well, just handle it normally */
 196                 ;;
 197
 198                 /* initial prefix */
 199                 for c : a.first_line_prefix
 200                         std.sbputc(sb, c)
 201                         cur_pos += std.cellwidth(c)
 202                 ;;
 203
 204                 /* precalculate this */
 205                 var gen_prefix_len = 0
 206                 for c : a.gen_line_prefix
 207                         gen_prefix_len += std.cellwidth(c)
 208                 ;;
 209
 210                 var st, sn, e, wt, wn
 211                 var j = 0
 212                 while j < a.content.len
 213                         (st, sn, e, wt, wn) = hypothetical_forward(a.content, j)
 214                         if cur_pos + wt > max && gen_prefix_len + wn <= max
 215                                 std.sbputc(sb, '\n')
 216                                 for c : a.gen_line_prefix
 217                                         std.sbputc(sb, c)
 218                                 ;;
 219                                 for var k = sn; k < e; ++k
 220                                         std.sbputc(sb, a.content[k])
 221                                 ;;
 222                                 cur_pos = gen_prefix_len + wn
 223                         else
 224                                 for var k = st; k < e; ++k
 225                                         std.sbputc(sb, a.content[k])
 226                                 ;;
 227                                 cur_pos+= wt
 228                         ;;
 229
 230                         j = e
 231                 ;;
 232
 233                 std.sbputc(sb, ('\n' : char))
 234         ;;
 235
 236         std.writeall(1, std.sbfin(sb))
 237 }
 238
 239 const equiv_prefixes = {a, b
 240         var ak = 0
 241         var bk = 0
 242         while true
 243                 while ak < a.len && std.isblank(a[ak])
 244                         ak++
 245                 ;;
 246
 247                 while bk < b.len && std.isblank(b[bk])
 248                         bk++
 249                 ;;
 250
 251                 if (ak < a.len) != (bk < b.len)
 252                         -> false
 253                 elif ak < a.len
 254                         if a[ak] != b[bk]
 255                                 -> false
 256                         ;;
 257                 else
 258                         break
 259                 ;;
 260
 261                 ak++
 262                 bk++
 263         ;;
 264
 265         -> true
 266 }
 267
 268 const clean = {p
 269         std.slfree(p.first_line_prefix)
 270         std.slfree(p.gen_line_prefix)
 271         std.slfree(p.content)
 272 }
 273
 274 const merge_para = {p, j, k
 275         if (p#[j].content.len > 0 && !std.isblank(p#[j].content[p#[j].content.len - 1]))
 276                 /* TODO: what if you use U+3000 instead of ' '? Huh? */
 277                 std.slpush(&(p#[j].content), (' ' : char))
 278         ;;
 279         std.sljoin(&(p#[j].content), p#[k].content)
 280         clean(p#[k])
 281         std.sldel(p, k)
 282         p#[j].merged = true
 283 }
 284
 285 const hypothetical_forward = {c, j
 286         var start_if_this_line = j
 287         var start_if_next_line = j
 288         var end = j
 289         var width_if_this_line = 0
 290         var width_if_next_line = 0
 291         var past_first_blanks = false
 292
 293         while end < c.len
 294                 /*
 295                    By the normalization in ingest() we should only have
 296                    one blank separating non-blanks. Still, let's be damn
 297                    sure.
 298                  */
 299                 if !past_first_blanks
 300                         if!std.isblank(c[end])
 301                                 past_first_blanks = true
 302                                 start_if_next_line = end
 303                         else
 304                                 width_if_this_line += std.cellwidth(c[end])
 305                         ;;
 306                 ;;
 307
 308                 if past_first_blanks
 309                         if std.isblank(c[end])
 310                                 break
 311                         ;;
 312                         width_if_this_line += std.cellwidth(c[end])
 313                         width_if_next_line += std.cellwidth(c[end])
 314                 ;;
 315
 316                 end++
 317         ;;
 318
 319         -> (start_if_this_line, start_if_next_line, end, width_if_this_line, width_if_next_line)
 320 }