tools/docgen.zig

   1 const std = @import("std");
   2 const builtin = @import("builtin");
   3 const io = std.io;
   4 const fs = std.fs;
   5 const process = std.process;
   6 const ChildProcess = std.process.Child;
   7 const Progress = std.Progress;
   8 const print = std.debug.print;
   9 const mem = std.mem;
  10 const testing = std.testing;
  11 const Allocator = std.mem.Allocator;
  12 const getExternalExecutor = std.zig.system.getExternalExecutor;
  13 const fatal = std.zig.fatal;
  14
  15 const max_doc_file_size = 10 * 1024 * 1024;
  16
  17 const obj_ext = builtin.object_format.fileExt(builtin.cpu.arch);
  18
  19 const usage =
  20     \\Usage: docgen [options] input output
  21     \\
  22     \\   Generates an HTML document from a docgen template.
  23     \\
  24     \\Options:
  25     \\   --code-dir dir         Path to directory containing code example outputs
  26     \\   -h, --help             Print this help and exit
  27     \\
  28 ;
  29
  30 pub fn main() !void {
  31     var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
  32     defer arena_instance.deinit();
  33
  34     const arena = arena_instance.allocator();
  35
  36     var args_it = try process.argsWithAllocator(arena);
  37     if (!args_it.skip()) @panic("expected self arg");
  38
  39     var opt_code_dir: ?[]const u8 = null;
  40     var opt_input: ?[]const u8 = null;
  41     var opt_output: ?[]const u8 = null;
  42
  43     while (args_it.next()) |arg| {
  44         if (mem.startsWith(u8, arg, "-")) {
  45             if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) {
  46                 const stdout = io.getStdOut().writer();
  47                 try stdout.writeAll(usage);
  48                 process.exit(0);
  49             } else if (mem.eql(u8, arg, "--code-dir")) {
  50                 if (args_it.next()) |param| {
  51                     opt_code_dir = param;
  52                 } else {
  53                     fatal("expected parameter after --code-dir", .{});
  54                 }
  55             } else {
  56                 fatal("unrecognized option: '{s}'", .{arg});
  57             }
  58         } else if (opt_input == null) {
  59             opt_input = arg;
  60         } else if (opt_output == null) {
  61             opt_output = arg;
  62         } else {
  63             fatal("unexpected positional argument: '{s}'", .{arg});
  64         }
  65     }
  66     const input_path = opt_input orelse fatal("missing input file", .{});
  67     const output_path = opt_output orelse fatal("missing output file", .{});
  68     const code_dir_path = opt_code_dir orelse fatal("missing --code-dir argument", .{});
  69
  70     var in_file = try fs.cwd().openFile(input_path, .{});
  71     defer in_file.close();
  72
  73     var out_file = try fs.cwd().createFile(output_path, .{});
  74     defer out_file.close();
  75
  76     var code_dir = try fs.cwd().openDir(code_dir_path, .{});
  77     defer code_dir.close();
  78
  79     const input_file_bytes = try in_file.reader().readAllAlloc(arena, max_doc_file_size);
  80
  81     var buffered_writer = io.bufferedWriter(out_file.writer());
  82
  83     var tokenizer = Tokenizer.init(input_path, input_file_bytes);
  84     var toc = try genToc(arena, &tokenizer);
  85
  86     try genHtml(arena, &tokenizer, &toc, code_dir, buffered_writer.writer());
  87     try buffered_writer.flush();
  88 }
  89
  90 const Token = struct {
  91     id: Id,
  92     start: usize,
  93     end: usize,
  94
  95     const Id = enum {
  96         invalid,
  97         content,
  98         bracket_open,
  99         tag_content,
 100         separator,
 101         bracket_close,
 102         eof,
 103     };
 104 };
 105
 106 const Tokenizer = struct {
 107     buffer: []const u8,
 108     index: usize,
 109     state: State,
 110     source_file_name: []const u8,
 111
 112     const State = enum {
 113         start,
 114         l_bracket,
 115         hash,
 116         tag_name,
 117         eof,
 118     };
 119
 120     fn init(source_file_name: []const u8, buffer: []const u8) Tokenizer {
 121         return Tokenizer{
 122             .buffer = buffer,
 123             .index = 0,
 124             .state = .start,
 125             .source_file_name = source_file_name,
 126         };
 127     }
 128
 129     fn next(self: *Tokenizer) Token {
 130         var result = Token{
 131             .id = .eof,
 132             .start = self.index,
 133             .end = undefined,
 134         };
 135         while (self.index < self.buffer.len) : (self.index += 1) {
 136             const c = self.buffer[self.index];
 137             switch (self.state) {
 138                 .start => switch (c) {
 139                     '{' => {
 140                         self.state = .l_bracket;
 141                     },
 142                     else => {
 143                         result.id = .content;
 144                     },
 145                 },
 146                 .l_bracket => switch (c) {
 147                     '#' => {
 148                         if (result.id != .eof) {
 149                             self.index -= 1;
 150                             self.state = .start;
 151                             break;
 152                         } else {
 153                             result.id = .bracket_open;
 154                             self.index += 1;
 155                             self.state = .tag_name;
 156                             break;
 157                         }
 158                     },
 159                     else => {
 160                         result.id = .content;
 161                         self.state = .start;
 162                     },
 163                 },
 164                 .tag_name => switch (c) {
 165                     '|' => {
 166                         if (result.id != .eof) {
 167                             break;
 168                         } else {
 169                             result.id = .separator;
 170                             self.index += 1;
 171                             break;
 172                         }
 173                     },
 174                     '#' => {
 175                         self.state = .hash;
 176                     },
 177                     else => {
 178                         result.id = .tag_content;
 179                     },
 180                 },
 181                 .hash => switch (c) {
 182                     '}' => {
 183                         if (result.id != .eof) {
 184                             self.index -= 1;
 185                             self.state = .tag_name;
 186                             break;
 187                         } else {
 188                             result.id = .bracket_close;
 189                             self.index += 1;
 190                             self.state = .start;
 191                             break;
 192                         }
 193                     },
 194                     else => {
 195                         result.id = .tag_content;
 196                         self.state = .tag_name;
 197                     },
 198                 },
 199                 .eof => unreachable,
 200             }
 201         } else {
 202             switch (self.state) {
 203                 .start, .l_bracket, .eof => {},
 204                 else => {
 205                     result.id = .invalid;
 206                 },
 207             }
 208             self.state = .eof;
 209         }
 210         result.end = self.index;
 211         return result;
 212     }
 213
 214     const Location = struct {
 215         line: usize,
 216         column: usize,
 217         line_start: usize,
 218         line_end: usize,
 219     };
 220
 221     fn getTokenLocation(self: *Tokenizer, token: Token) Location {
 222         var loc = Location{
 223             .line = 0,
 224             .column = 0,
 225             .line_start = 0,
 226             .line_end = 0,
 227         };
 228         for (self.buffer, 0..) |c, i| {
 229             if (i == token.start) {
 230                 loc.line_end = i;
 231                 while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
 232                 return loc;
 233             }
 234             if (c == '\n') {
 235                 loc.line += 1;
 236                 loc.column = 0;
 237                 loc.line_start = i + 1;
 238             } else {
 239                 loc.column += 1;
 240             }
 241         }
 242         return loc;
 243     }
 244 };
 245
 246 fn parseError(tokenizer: *Tokenizer, token: Token, comptime fmt: []const u8, args: anytype) anyerror {
 247     const loc = tokenizer.getTokenLocation(token);
 248     const args_prefix = .{ tokenizer.source_file_name, loc.line + 1, loc.column + 1 };
 249     print("{s}:{d}:{d}: error: " ++ fmt ++ "\n", args_prefix ++ args);
 250     if (loc.line_start <= loc.line_end) {
 251         print("{s}\n", .{tokenizer.buffer[loc.line_start..loc.line_end]});
 252         {
 253             var i: usize = 0;
 254             while (i < loc.column) : (i += 1) {
 255                 print(" ", .{});
 256             }
 257         }
 258         {
 259             const caret_count = @min(token.end, loc.line_end) - token.start;
 260             var i: usize = 0;
 261             while (i < caret_count) : (i += 1) {
 262                 print("~", .{});
 263             }
 264         }
 265         print("\n", .{});
 266     }
 267     return error.ParseError;
 268 }
 269
 270 fn assertToken(tokenizer: *Tokenizer, token: Token, id: Token.Id) !void {
 271     if (token.id != id) {
 272         return parseError(tokenizer, token, "expected {s}, found {s}", .{ @tagName(id), @tagName(token.id) });
 273     }
 274 }
 275
 276 fn eatToken(tokenizer: *Tokenizer, id: Token.Id) !Token {
 277     const token = tokenizer.next();
 278     try assertToken(tokenizer, token, id);
 279     return token;
 280 }
 281
 282 const HeaderOpen = struct {
 283     name: []const u8,
 284     url: []const u8,
 285     n: usize,
 286 };
 287
 288 const SeeAlsoItem = struct {
 289     name: []const u8,
 290     token: Token,
 291 };
 292
 293 const Code = struct {
 294     name: []const u8,
 295     token: Token,
 296 };
 297
 298 const Link = struct {
 299     url: []const u8,
 300     name: []const u8,
 301     token: Token,
 302 };
 303
 304 const SyntaxBlock = struct {
 305     source_type: SourceType,
 306     name: []const u8,
 307     source_token: Token,
 308
 309     const SourceType = enum {
 310         zig,
 311         c,
 312         peg,
 313         javascript,
 314     };
 315 };
 316
 317 const Node = union(enum) {
 318     Content: []const u8,
 319     Nav,
 320     Builtin: Token,
 321     HeaderOpen: HeaderOpen,
 322     SeeAlso: []const SeeAlsoItem,
 323     Code: Code,
 324     Link: Link,
 325     InlineSyntax: Token,
 326     Shell: Token,
 327     SyntaxBlock: SyntaxBlock,
 328 };
 329
 330 const Toc = struct {
 331     nodes: []Node,
 332     toc: []u8,
 333     urls: std.StringHashMap(Token),
 334 };
 335
 336 const Action = enum {
 337     open,
 338     close,
 339 };
 340
 341 fn genToc(allocator: Allocator, tokenizer: *Tokenizer) !Toc {
 342     var urls = std.StringHashMap(Token).init(allocator);
 343     errdefer urls.deinit();
 344
 345     var header_stack_size: usize = 0;
 346     var last_action: Action = .open;
 347     var last_columns: ?u8 = null;
 348
 349     var toc_buf = std.ArrayList(u8).init(allocator);
 350     defer toc_buf.deinit();
 351
 352     var toc = toc_buf.writer();
 353
 354     var nodes = std.ArrayList(Node).init(allocator);
 355     defer nodes.deinit();
 356
 357     try toc.writeByte('\n');
 358
 359     while (true) {
 360         const token = tokenizer.next();
 361         switch (token.id) {
 362             .eof => {
 363                 if (header_stack_size != 0) {
 364                     return parseError(tokenizer, token, "unbalanced headers", .{});
 365                 }
 366                 try toc.writeAll("    </ul>\n");
 367                 break;
 368             },
 369             .content => {
 370                 try nodes.append(Node{ .Content = tokenizer.buffer[token.start..token.end] });
 371             },
 372             .bracket_open => {
 373                 const tag_token = try eatToken(tokenizer, .tag_content);
 374                 const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];
 375
 376                 if (mem.eql(u8, tag_name, "nav")) {
 377                     _ = try eatToken(tokenizer, .bracket_close);
 378
 379                     try nodes.append(Node.Nav);
 380                 } else if (mem.eql(u8, tag_name, "builtin")) {
 381                     _ = try eatToken(tokenizer, .bracket_close);
 382                     try nodes.append(Node{ .Builtin = tag_token });
 383                 } else if (mem.eql(u8, tag_name, "header_open")) {
 384                     _ = try eatToken(tokenizer, .separator);
 385                     const content_token = try eatToken(tokenizer, .tag_content);
 386                     const content = tokenizer.buffer[content_token.start..content_token.end];
 387                     var columns: ?u8 = null;
 388                     while (true) {
 389                         const bracket_tok = tokenizer.next();
 390                         switch (bracket_tok.id) {
 391                             .bracket_close => break,
 392                             .separator => continue,
 393                             .tag_content => {
 394                                 const param = tokenizer.buffer[bracket_tok.start..bracket_tok.end];
 395                                 if (mem.eql(u8, param, "2col")) {
 396                                     columns = 2;
 397                                 } else {
 398                                     return parseError(
 399                                         tokenizer,
 400                                         bracket_tok,
 401                                         "unrecognized header_open param: {s}",
 402                                         .{param},
 403                                     );
 404                                 }
 405                             },
 406                             else => return parseError(tokenizer, bracket_tok, "invalid header_open token", .{}),
 407                         }
 408                     }
 409
 410                     header_stack_size += 1;
 411
 412                     const urlized = try urlize(allocator, content);
 413                     try nodes.append(Node{
 414                         .HeaderOpen = HeaderOpen{
 415                             .name = content,
 416                             .url = urlized,
 417                             .n = header_stack_size + 1, // highest-level section headers start at h2
 418                         },
 419                     });
 420                     if (try urls.fetchPut(urlized, tag_token)) |kv| {
 421                         parseError(tokenizer, tag_token, "duplicate header url: #{s}", .{urlized}) catch {};
 422                         parseError(tokenizer, kv.value, "other tag here", .{}) catch {};
 423                         return error.ParseError;
 424                     }
 425                     if (last_action == .open) {
 426                         try toc.writeByte('\n');
 427                         try toc.writeByteNTimes(' ', header_stack_size * 4);
 428                         if (last_columns) |n| {
 429                             try toc.print("<ul style=\"columns: {}\">\n", .{n});
 430                         } else {
 431                             try toc.writeAll("<ul>\n");
 432                         }
 433                     } else {
 434                         last_action = .open;
 435                     }
 436                     last_columns = columns;
 437                     try toc.writeByteNTimes(' ', 4 + header_stack_size * 4);
 438                     try toc.print("<li><a id=\"toc-{s}\" href=\"#{s}\">{s}</a>", .{ urlized, urlized, content });
 439                 } else if (mem.eql(u8, tag_name, "header_close")) {
 440                     if (header_stack_size == 0) {
 441                         return parseError(tokenizer, tag_token, "unbalanced close header", .{});
 442                     }
 443                     header_stack_size -= 1;
 444                     _ = try eatToken(tokenizer, .bracket_close);
 445
 446                     if (last_action == .close) {
 447                         try toc.writeByteNTimes(' ', 8 + header_stack_size * 4);
 448                         try toc.writeAll("</ul></li>\n");
 449                     } else {
 450                         try toc.writeAll("</li>\n");
 451                         last_action = .close;
 452                     }
 453                 } else if (mem.eql(u8, tag_name, "see_also")) {
 454                     var list = std.ArrayList(SeeAlsoItem).init(allocator);
 455                     errdefer list.deinit();
 456
 457                     while (true) {
 458                         const see_also_tok = tokenizer.next();
 459                         switch (see_also_tok.id) {
 460                             .tag_content => {
 461                                 const content = tokenizer.buffer[see_also_tok.start..see_also_tok.end];
 462                                 try list.append(SeeAlsoItem{
 463                                     .name = content,
 464                                     .token = see_also_tok,
 465                                 });
 466                             },
 467                             .separator => {},
 468                             .bracket_close => {
 469                                 try nodes.append(Node{ .SeeAlso = try list.toOwnedSlice() });
 470                                 break;
 471                             },
 472                             else => return parseError(tokenizer, see_also_tok, "invalid see_also token", .{}),
 473                         }
 474                     }
 475                 } else if (mem.eql(u8, tag_name, "link")) {
 476                     _ = try eatToken(tokenizer, .separator);
 477                     const name_tok = try eatToken(tokenizer, .tag_content);
 478                     const name = tokenizer.buffer[name_tok.start..name_tok.end];
 479
 480                     const url_name = blk: {
 481                         const tok = tokenizer.next();
 482                         switch (tok.id) {
 483                             .bracket_close => break :blk name,
 484                             .separator => {
 485                                 const explicit_text = try eatToken(tokenizer, .tag_content);
 486                                 _ = try eatToken(tokenizer, .bracket_close);
 487                                 break :blk tokenizer.buffer[explicit_text.start..explicit_text.end];
 488                             },
 489                             else => return parseError(tokenizer, tok, "invalid link token", .{}),
 490                         }
 491                     };
 492
 493                     try nodes.append(Node{
 494                         .Link = Link{
 495                             .url = try urlize(allocator, url_name),
 496                             .name = name,
 497                             .token = name_tok,
 498                         },
 499                     });
 500                 } else if (mem.eql(u8, tag_name, "code")) {
 501                     _ = try eatToken(tokenizer, .separator);
 502                     const name_tok = try eatToken(tokenizer, .tag_content);
 503                     _ = try eatToken(tokenizer, .bracket_close);
 504                     try nodes.append(.{
 505                         .Code = .{
 506                             .name = tokenizer.buffer[name_tok.start..name_tok.end],
 507                             .token = name_tok,
 508                         },
 509                     });
 510                 } else if (mem.eql(u8, tag_name, "syntax")) {
 511                     _ = try eatToken(tokenizer, .bracket_close);
 512                     const content_tok = try eatToken(tokenizer, .content);
 513                     _ = try eatToken(tokenizer, .bracket_open);
 514                     const end_syntax_tag = try eatToken(tokenizer, .tag_content);
 515                     const end_tag_name = tokenizer.buffer[end_syntax_tag.start..end_syntax_tag.end];
 516                     if (!mem.eql(u8, end_tag_name, "endsyntax")) {
 517                         return parseError(
 518                             tokenizer,
 519                             end_syntax_tag,
 520                             "invalid token inside syntax: {s}",
 521                             .{end_tag_name},
 522                         );
 523                     }
 524                     _ = try eatToken(tokenizer, .bracket_close);
 525                     try nodes.append(Node{ .InlineSyntax = content_tok });
 526                 } else if (mem.eql(u8, tag_name, "shell_samp")) {
 527                     _ = try eatToken(tokenizer, .bracket_close);
 528                     const content_tok = try eatToken(tokenizer, .content);
 529                     _ = try eatToken(tokenizer, .bracket_open);
 530                     const end_syntax_tag = try eatToken(tokenizer, .tag_content);
 531                     const end_tag_name = tokenizer.buffer[end_syntax_tag.start..end_syntax_tag.end];
 532                     if (!mem.eql(u8, end_tag_name, "end_shell_samp")) {
 533                         return parseError(
 534                             tokenizer,
 535                             end_syntax_tag,
 536                             "invalid token inside syntax: {s}",
 537                             .{end_tag_name},
 538                         );
 539                     }
 540                     _ = try eatToken(tokenizer, .bracket_close);
 541                     try nodes.append(Node{ .Shell = content_tok });
 542                 } else if (mem.eql(u8, tag_name, "syntax_block")) {
 543                     _ = try eatToken(tokenizer, .separator);
 544                     const source_type_tok = try eatToken(tokenizer, .tag_content);
 545                     var name: []const u8 = "sample_code";
 546                     const maybe_sep = tokenizer.next();
 547                     switch (maybe_sep.id) {
 548                         .separator => {
 549                             const name_tok = try eatToken(tokenizer, .tag_content);
 550                             name = tokenizer.buffer[name_tok.start..name_tok.end];
 551                             _ = try eatToken(tokenizer, .bracket_close);
 552                         },
 553                         .bracket_close => {},
 554                         else => return parseError(tokenizer, token, "invalid token", .{}),
 555                     }
 556                     const source_type_str = tokenizer.buffer[source_type_tok.start..source_type_tok.end];
 557                     var source_type: SyntaxBlock.SourceType = undefined;
 558                     if (mem.eql(u8, source_type_str, "zig")) {
 559                         source_type = SyntaxBlock.SourceType.zig;
 560                     } else if (mem.eql(u8, source_type_str, "c")) {
 561                         source_type = SyntaxBlock.SourceType.c;
 562                     } else if (mem.eql(u8, source_type_str, "peg")) {
 563                         source_type = SyntaxBlock.SourceType.peg;
 564                     } else if (mem.eql(u8, source_type_str, "javascript")) {
 565                         source_type = SyntaxBlock.SourceType.javascript;
 566                     } else {
 567                         return parseError(tokenizer, source_type_tok, "unrecognized code kind: {s}", .{source_type_str});
 568                     }
 569                     const source_token = while (true) {
 570                         const content_tok = try eatToken(tokenizer, .content);
 571                         _ = try eatToken(tokenizer, .bracket_open);
 572                         const end_code_tag = try eatToken(tokenizer, .tag_content);
 573                         const end_tag_name = tokenizer.buffer[end_code_tag.start..end_code_tag.end];
 574                         if (mem.eql(u8, end_tag_name, "end_syntax_block")) {
 575                             _ = try eatToken(tokenizer, .bracket_close);
 576                             break content_tok;
 577                         } else {
 578                             return parseError(
 579                                 tokenizer,
 580                                 end_code_tag,
 581                                 "invalid token inside code_begin: {s}",
 582                                 .{end_tag_name},
 583                             );
 584                         }
 585                         _ = try eatToken(tokenizer, .bracket_close);
 586                     };
 587                     try nodes.append(Node{ .SyntaxBlock = SyntaxBlock{ .source_type = source_type, .name = name, .source_token = source_token } });
 588                 } else {
 589                     return parseError(tokenizer, tag_token, "unrecognized tag name: {s}", .{tag_name});
 590                 }
 591             },
 592             else => return parseError(tokenizer, token, "invalid token", .{}),
 593         }
 594     }
 595
 596     return Toc{
 597         .nodes = try nodes.toOwnedSlice(),
 598         .toc = try toc_buf.toOwnedSlice(),
 599         .urls = urls,
 600     };
 601 }
 602
 603 fn urlize(allocator: Allocator, input: []const u8) ![]u8 {
 604     var buf = std.ArrayList(u8).init(allocator);
 605     defer buf.deinit();
 606
 607     const out = buf.writer();
 608     for (input) |c| {
 609         switch (c) {
 610             'a'...'z', 'A'...'Z', '_', '-', '0'...'9' => {
 611                 try out.writeByte(c);
 612             },
 613             ' ' => {
 614                 try out.writeByte('-');
 615             },
 616             else => {},
 617         }
 618     }
 619     return try buf.toOwnedSlice();
 620 }
 621
 622 fn escapeHtml(allocator: Allocator, input: []const u8) ![]u8 {
 623     var buf = std.ArrayList(u8).init(allocator);
 624     defer buf.deinit();
 625
 626     const out = buf.writer();
 627     try writeEscaped(out, input);
 628     return try buf.toOwnedSlice();
 629 }
 630
 631 fn writeEscaped(out: anytype, input: []const u8) !void {
 632     for (input) |c| {
 633         try switch (c) {
 634             '&' => out.writeAll("&amp;"),
 635             '<' => out.writeAll("&lt;"),
 636             '>' => out.writeAll("&gt;"),
 637             '"' => out.writeAll("&quot;"),
 638             else => out.writeByte(c),
 639         };
 640     }
 641 }
 642
 643 // Returns true if number is in slice.
 644 fn in(slice: []const u8, number: u8) bool {
 645     for (slice) |n| {
 646         if (number == n) return true;
 647     }
 648     return false;
 649 }
 650
 651 const builtin_types = [_][]const u8{
 652     "f16",          "f32",     "f64",        "f80",          "f128",
 653     "c_longdouble", "c_short", "c_ushort",   "c_int",        "c_uint",
 654     "c_long",       "c_ulong", "c_longlong", "c_ulonglong",  "c_char",
 655     "anyopaque",    "void",    "bool",       "isize",        "usize",
 656     "noreturn",     "type",    "anyerror",   "comptime_int", "comptime_float",
 657 };
 658
 659 fn isType(name: []const u8) bool {
 660     for (builtin_types) |t| {
 661         if (mem.eql(u8, t, name))
 662             return true;
 663     }
 664     return false;
 665 }
 666
 667 fn writeEscapedLines(out: anytype, text: []const u8) !void {
 668     return writeEscaped(out, text);
 669 }
 670
 671 fn tokenizeAndPrintRaw(
 672     allocator: Allocator,
 673     docgen_tokenizer: *Tokenizer,
 674     out: anytype,
 675     source_token: Token,
 676     raw_src: []const u8,
 677 ) !void {
 678     const src_non_terminated = mem.trim(u8, raw_src, " \r\n");
 679     const src = try allocator.dupeZ(u8, src_non_terminated);
 680
 681     try out.writeAll("<code>");
 682     var tokenizer = std.zig.Tokenizer.init(src);
 683     var index: usize = 0;
 684     var next_tok_is_fn = false;
 685     while (true) {
 686         const prev_tok_was_fn = next_tok_is_fn;
 687         next_tok_is_fn = false;
 688
 689         const token = tokenizer.next();
 690         if (mem.indexOf(u8, src[index..token.loc.start], "//")) |comment_start_off| {
 691             // render one comment
 692             const comment_start = index + comment_start_off;
 693             const comment_end_off = mem.indexOf(u8, src[comment_start..token.loc.start], "\n");
 694             const comment_end = if (comment_end_off) |o| comment_start + o else token.loc.start;
 695
 696             try writeEscapedLines(out, src[index..comment_start]);
 697             try out.writeAll("<span class=\"tok-comment\">");
 698             try writeEscaped(out, src[comment_start..comment_end]);
 699             try out.writeAll("</span>");
 700             index = comment_end;
 701             tokenizer.index = index;
 702             continue;
 703         }
 704
 705         try writeEscapedLines(out, src[index..token.loc.start]);
 706         switch (token.tag) {
 707             .eof => break,
 708
 709             .keyword_addrspace,
 710             .keyword_align,
 711             .keyword_and,
 712             .keyword_asm,
 713             .keyword_async,
 714             .keyword_await,
 715             .keyword_break,
 716             .keyword_catch,
 717             .keyword_comptime,
 718             .keyword_const,
 719             .keyword_continue,
 720             .keyword_defer,
 721             .keyword_else,
 722             .keyword_enum,
 723             .keyword_errdefer,
 724             .keyword_error,
 725             .keyword_export,
 726             .keyword_extern,
 727             .keyword_for,
 728             .keyword_if,
 729             .keyword_inline,
 730             .keyword_noalias,
 731             .keyword_noinline,
 732             .keyword_nosuspend,
 733             .keyword_opaque,
 734             .keyword_or,
 735             .keyword_orelse,
 736             .keyword_packed,
 737             .keyword_anyframe,
 738             .keyword_pub,
 739             .keyword_resume,
 740             .keyword_return,
 741             .keyword_linksection,
 742             .keyword_callconv,
 743             .keyword_struct,
 744             .keyword_suspend,
 745             .keyword_switch,
 746             .keyword_test,
 747             .keyword_threadlocal,
 748             .keyword_try,
 749             .keyword_union,
 750             .keyword_unreachable,
 751             .keyword_usingnamespace,
 752             .keyword_var,
 753             .keyword_volatile,
 754             .keyword_allowzero,
 755             .keyword_while,
 756             .keyword_anytype,
 757             => {
 758                 try out.writeAll("<span class=\"tok-kw\">");
 759                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
 760                 try out.writeAll("</span>");
 761             },
 762
 763             .keyword_fn => {
 764                 try out.writeAll("<span class=\"tok-kw\">");
 765                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
 766                 try out.writeAll("</span>");
 767                 next_tok_is_fn = true;
 768             },
 769
 770             .string_literal,
 771             .multiline_string_literal_line,
 772             .char_literal,
 773             => {
 774                 try out.writeAll("<span class=\"tok-str\">");
 775                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
 776                 try out.writeAll("</span>");
 777             },
 778
 779             .builtin => {
 780                 try out.writeAll("<span class=\"tok-builtin\">");
 781                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
 782                 try out.writeAll("</span>");
 783             },
 784
 785             .doc_comment,
 786             .container_doc_comment,
 787             => {
 788                 try out.writeAll("<span class=\"tok-comment\">");
 789                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
 790                 try out.writeAll("</span>");
 791             },
 792
 793             .identifier => {
 794                 const tok_bytes = src[token.loc.start..token.loc.end];
 795                 if (mem.eql(u8, tok_bytes, "undefined") or
 796                     mem.eql(u8, tok_bytes, "null") or
 797                     mem.eql(u8, tok_bytes, "true") or
 798                     mem.eql(u8, tok_bytes, "false"))
 799                 {
 800                     try out.writeAll("<span class=\"tok-null\">");
 801                     try writeEscaped(out, tok_bytes);
 802                     try out.writeAll("</span>");
 803                 } else if (prev_tok_was_fn) {
 804                     try out.writeAll("<span class=\"tok-fn\">");
 805                     try writeEscaped(out, tok_bytes);
 806                     try out.writeAll("</span>");
 807                 } else {
 808                     const is_int = blk: {
 809                         if (src[token.loc.start] != 'i' and src[token.loc.start] != 'u')
 810                             break :blk false;
 811                         var i = token.loc.start + 1;
 812                         if (i == token.loc.end)
 813                             break :blk false;
 814                         while (i != token.loc.end) : (i += 1) {
 815                             if (src[i] < '0' or src[i] > '9')
 816                                 break :blk false;
 817                         }
 818                         break :blk true;
 819                     };
 820                     if (is_int or isType(tok_bytes)) {
 821                         try out.writeAll("<span class=\"tok-type\">");
 822                         try writeEscaped(out, tok_bytes);
 823                         try out.writeAll("</span>");
 824                     } else {
 825                         try writeEscaped(out, tok_bytes);
 826                     }
 827                 }
 828             },
 829
 830             .number_literal => {
 831                 try out.writeAll("<span class=\"tok-number\">");
 832                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
 833                 try out.writeAll("</span>");
 834             },
 835
 836             .bang,
 837             .pipe,
 838             .pipe_pipe,
 839             .pipe_equal,
 840             .equal,
 841             .equal_equal,
 842             .equal_angle_bracket_right,
 843             .bang_equal,
 844             .l_paren,
 845             .r_paren,
 846             .semicolon,
 847             .percent,
 848             .percent_equal,
 849             .l_brace,
 850             .r_brace,
 851             .l_bracket,
 852             .r_bracket,
 853             .period,
 854             .period_asterisk,
 855             .ellipsis2,
 856             .ellipsis3,
 857             .caret,
 858             .caret_equal,
 859             .plus,
 860             .plus_plus,
 861             .plus_equal,
 862             .plus_percent,
 863             .plus_percent_equal,
 864             .plus_pipe,
 865             .plus_pipe_equal,
 866             .minus,
 867             .minus_equal,
 868             .minus_percent,
 869             .minus_percent_equal,
 870             .minus_pipe,
 871             .minus_pipe_equal,
 872             .asterisk,
 873             .asterisk_equal,
 874             .asterisk_asterisk,
 875             .asterisk_percent,
 876             .asterisk_percent_equal,
 877             .asterisk_pipe,
 878             .asterisk_pipe_equal,
 879             .arrow,
 880             .colon,
 881             .slash,
 882             .slash_equal,
 883             .comma,
 884             .ampersand,
 885             .ampersand_equal,
 886             .question_mark,
 887             .angle_bracket_left,
 888             .angle_bracket_left_equal,
 889             .angle_bracket_angle_bracket_left,
 890             .angle_bracket_angle_bracket_left_equal,
 891             .angle_bracket_angle_bracket_left_pipe,
 892             .angle_bracket_angle_bracket_left_pipe_equal,
 893             .angle_bracket_right,
 894             .angle_bracket_right_equal,
 895             .angle_bracket_angle_bracket_right,
 896             .angle_bracket_angle_bracket_right_equal,
 897             .tilde,
 898             => try writeEscaped(out, src[token.loc.start..token.loc.end]),
 899
 900             .invalid, .invalid_periodasterisks => return parseError(
 901                 docgen_tokenizer,
 902                 source_token,
 903                 "syntax error",
 904                 .{},
 905             ),
 906         }
 907         index = token.loc.end;
 908     }
 909     try out.writeAll("</code>");
 910 }
 911
 912 fn tokenizeAndPrint(
 913     allocator: Allocator,
 914     docgen_tokenizer: *Tokenizer,
 915     out: anytype,
 916     source_token: Token,
 917 ) !void {
 918     const raw_src = docgen_tokenizer.buffer[source_token.start..source_token.end];
 919     return tokenizeAndPrintRaw(allocator, docgen_tokenizer, out, source_token, raw_src);
 920 }
 921
 922 fn printSourceBlock(allocator: Allocator, docgen_tokenizer: *Tokenizer, out: anytype, syntax_block: SyntaxBlock) !void {
 923     const source_type = @tagName(syntax_block.source_type);
 924
 925     try out.print("<figure><figcaption class=\"{s}-cap\"><cite class=\"file\">{s}</cite></figcaption><pre>", .{ source_type, syntax_block.name });
 926     switch (syntax_block.source_type) {
 927         .zig => try tokenizeAndPrint(allocator, docgen_tokenizer, out, syntax_block.source_token),
 928         else => {
 929             const raw_source = docgen_tokenizer.buffer[syntax_block.source_token.start..syntax_block.source_token.end];
 930             const trimmed_raw_source = mem.trim(u8, raw_source, " \r\n");
 931
 932             try out.writeAll("<code>");
 933             try writeEscapedLines(out, trimmed_raw_source);
 934             try out.writeAll("</code>");
 935         },
 936     }
 937     try out.writeAll("</pre></figure>");
 938 }
 939
 940 fn printShell(out: anytype, shell_content: []const u8, escape: bool) !void {
 941     const trimmed_shell_content = mem.trim(u8, shell_content, " \r\n");
 942     try out.writeAll("<figure><figcaption class=\"shell-cap\">Shell</figcaption><pre><samp>");
 943     var cmd_cont: bool = false;
 944     var iter = std.mem.splitScalar(u8, trimmed_shell_content, '\n');
 945     while (iter.next()) |orig_line| {
 946         const line = mem.trimRight(u8, orig_line, " \r");
 947         if (!cmd_cont and line.len > 1 and mem.eql(u8, line[0..2], "$ ") and line[line.len - 1] != '\\') {
 948             try out.writeAll("$ <kbd>");
 949             const s = std.mem.trimLeft(u8, line[1..], " ");
 950             if (escape) {
 951                 try writeEscaped(out, s);
 952             } else {
 953                 try out.writeAll(s);
 954             }
 955             try out.writeAll("</kbd>" ++ "\n");
 956         } else if (!cmd_cont and line.len > 1 and mem.eql(u8, line[0..2], "$ ") and line[line.len - 1] == '\\') {
 957             try out.writeAll("$ <kbd>");
 958             const s = std.mem.trimLeft(u8, line[1..], " ");
 959             if (escape) {
 960                 try writeEscaped(out, s);
 961             } else {
 962                 try out.writeAll(s);
 963             }
 964             try out.writeAll("\n");
 965             cmd_cont = true;
 966         } else if (line.len > 0 and line[line.len - 1] != '\\' and cmd_cont) {
 967             if (escape) {
 968                 try writeEscaped(out, line);
 969             } else {
 970                 try out.writeAll(line);
 971             }
 972             try out.writeAll("</kbd>" ++ "\n");
 973             cmd_cont = false;
 974         } else {
 975             if (escape) {
 976                 try writeEscaped(out, line);
 977             } else {
 978                 try out.writeAll(line);
 979             }
 980             try out.writeAll("\n");
 981         }
 982     }
 983
 984     try out.writeAll("</samp></pre></figure>");
 985 }
 986
 987 fn genHtml(
 988     allocator: Allocator,
 989     tokenizer: *Tokenizer,
 990     toc: *Toc,
 991     code_dir: std.fs.Dir,
 992     out: anytype,
 993 ) !void {
 994     for (toc.nodes) |node| {
 995         switch (node) {
 996             .Content => |data| {
 997                 try out.writeAll(data);
 998             },
 999             .Link => |info| {
1000                 if (!toc.urls.contains(info.url)) {
1001                     return parseError(tokenizer, info.token, "url not found: {s}", .{info.url});
1002                 }
1003                 try out.print("<a href=\"#{s}\">{s}</a>", .{ info.url, info.name });
1004             },
1005             .Nav => {
1006                 try out.writeAll(toc.toc);
1007             },
1008             .Builtin => |tok| {
1009                 try out.writeAll("<figure><figcaption class=\"zig-cap\"><cite>@import(\"builtin\")</cite></figcaption><pre>");
1010                 const builtin_code = @embedFile("builtin"); // 😎
1011                 try tokenizeAndPrintRaw(allocator, tokenizer, out, tok, builtin_code);
1012                 try out.writeAll("</pre></figure>");
1013             },
1014             .HeaderOpen => |info| {
1015                 try out.print(
1016                     "<h{d} id=\"{s}\"><a href=\"#toc-{s}\">{s}</a> <a class=\"hdr\" href=\"#{s}\">§</a></h{d}>\n",
1017                     .{ info.n, info.url, info.url, info.name, info.url, info.n },
1018                 );
1019             },
1020             .SeeAlso => |items| {
1021                 try out.writeAll("<p>See also:</p><ul>\n");
1022                 for (items) |item| {
1023                     const url = try urlize(allocator, item.name);
1024                     if (!toc.urls.contains(url)) {
1025                         return parseError(tokenizer, item.token, "url not found: {s}", .{url});
1026                     }
1027                     try out.print("<li><a href=\"#{s}\">{s}</a></li>\n", .{ url, item.name });
1028                 }
1029                 try out.writeAll("</ul>\n");
1030             },
1031             .InlineSyntax => |content_tok| {
1032                 try tokenizeAndPrint(allocator, tokenizer, out, content_tok);
1033             },
1034             .Shell => |content_tok| {
1035                 const raw_shell_content = tokenizer.buffer[content_tok.start..content_tok.end];
1036                 try printShell(out, raw_shell_content, true);
1037             },
1038             .SyntaxBlock => |syntax_block| {
1039                 try printSourceBlock(allocator, tokenizer, out, syntax_block);
1040             },
1041             .Code => |code| {
1042                 const out_basename = try std.fmt.allocPrint(allocator, "{s}.out", .{
1043                     fs.path.stem(code.name),
1044                 });
1045                 defer allocator.free(out_basename);
1046
1047                 const contents = code_dir.readFileAlloc(allocator, out_basename, std.math.maxInt(u32)) catch |err| {
1048                     return parseError(tokenizer, code.token, "unable to open '{s}': {s}", .{
1049                         out_basename, @errorName(err),
1050                     });
1051                 };
1052                 defer allocator.free(contents);
1053
1054                 try out.writeAll(contents);
1055             },
1056         }
1057     }
1058 }