Merge pull request #22777 from mlugg/some-bugs
[zig.git] / tools / docgen.zig
blob1e9c409fa4af19b64a1aa0acf2a2c776b7596237
1 const std = @import("std");
2 const builtin = @import("builtin");
3 const io = std.io;
4 const fs = std.fs;
5 const process = std.process;
6 const ChildProcess = std.process.Child;
7 const Progress = std.Progress;
8 const print = std.debug.print;
9 const mem = std.mem;
10 const testing = std.testing;
11 const Allocator = std.mem.Allocator;
12 const getExternalExecutor = std.zig.system.getExternalExecutor;
13 const fatal = std.zig.fatal;
15 const max_doc_file_size = 10 * 1024 * 1024;
17 const obj_ext = builtin.object_format.fileExt(builtin.cpu.arch);
19 const usage =
20     \\Usage: docgen [options] input output
21     \\
22     \\   Generates an HTML document from a docgen template.
23     \\
24     \\Options:
25     \\   --code-dir dir         Path to directory containing code example outputs
26     \\   -h, --help             Print this help and exit
27     \\
30 pub fn main() !void {
31     var arena_instance = std.heap.ArenaAllocator.init(std.heap.page_allocator);
32     defer arena_instance.deinit();
34     const arena = arena_instance.allocator();
36     var args_it = try process.argsWithAllocator(arena);
37     if (!args_it.skip()) @panic("expected self arg");
39     var opt_code_dir: ?[]const u8 = null;
40     var opt_input: ?[]const u8 = null;
41     var opt_output: ?[]const u8 = null;
43     while (args_it.next()) |arg| {
44         if (mem.startsWith(u8, arg, "-")) {
45             if (mem.eql(u8, arg, "-h") or mem.eql(u8, arg, "--help")) {
46                 const stdout = io.getStdOut().writer();
47                 try stdout.writeAll(usage);
48                 process.exit(0);
49             } else if (mem.eql(u8, arg, "--code-dir")) {
50                 if (args_it.next()) |param| {
51                     opt_code_dir = param;
52                 } else {
53                     fatal("expected parameter after --code-dir", .{});
54                 }
55             } else {
56                 fatal("unrecognized option: '{s}'", .{arg});
57             }
58         } else if (opt_input == null) {
59             opt_input = arg;
60         } else if (opt_output == null) {
61             opt_output = arg;
62         } else {
63             fatal("unexpected positional argument: '{s}'", .{arg});
64         }
65     }
66     const input_path = opt_input orelse fatal("missing input file", .{});
67     const output_path = opt_output orelse fatal("missing output file", .{});
68     const code_dir_path = opt_code_dir orelse fatal("missing --code-dir argument", .{});
70     var in_file = try fs.cwd().openFile(input_path, .{});
71     defer in_file.close();
73     var out_file = try fs.cwd().createFile(output_path, .{});
74     defer out_file.close();
76     var code_dir = try fs.cwd().openDir(code_dir_path, .{});
77     defer code_dir.close();
79     const input_file_bytes = try in_file.reader().readAllAlloc(arena, max_doc_file_size);
81     var buffered_writer = io.bufferedWriter(out_file.writer());
83     var tokenizer = Tokenizer.init(input_path, input_file_bytes);
84     var toc = try genToc(arena, &tokenizer);
86     try genHtml(arena, &tokenizer, &toc, code_dir, buffered_writer.writer());
87     try buffered_writer.flush();
90 const Token = struct {
91     id: Id,
92     start: usize,
93     end: usize,
95     const Id = enum {
96         invalid,
97         content,
98         bracket_open,
99         tag_content,
100         separator,
101         bracket_close,
102         eof,
103     };
106 const Tokenizer = struct {
107     buffer: []const u8,
108     index: usize,
109     state: State,
110     source_file_name: []const u8,
112     const State = enum {
113         start,
114         l_bracket,
115         hash,
116         tag_name,
117         eof,
118     };
120     fn init(source_file_name: []const u8, buffer: []const u8) Tokenizer {
121         return Tokenizer{
122             .buffer = buffer,
123             .index = 0,
124             .state = .start,
125             .source_file_name = source_file_name,
126         };
127     }
129     fn next(self: *Tokenizer) Token {
130         var result = Token{
131             .id = .eof,
132             .start = self.index,
133             .end = undefined,
134         };
135         while (self.index < self.buffer.len) : (self.index += 1) {
136             const c = self.buffer[self.index];
137             switch (self.state) {
138                 .start => switch (c) {
139                     '{' => {
140                         self.state = .l_bracket;
141                     },
142                     else => {
143                         result.id = .content;
144                     },
145                 },
146                 .l_bracket => switch (c) {
147                     '#' => {
148                         if (result.id != .eof) {
149                             self.index -= 1;
150                             self.state = .start;
151                             break;
152                         } else {
153                             result.id = .bracket_open;
154                             self.index += 1;
155                             self.state = .tag_name;
156                             break;
157                         }
158                     },
159                     else => {
160                         result.id = .content;
161                         self.state = .start;
162                     },
163                 },
164                 .tag_name => switch (c) {
165                     '|' => {
166                         if (result.id != .eof) {
167                             break;
168                         } else {
169                             result.id = .separator;
170                             self.index += 1;
171                             break;
172                         }
173                     },
174                     '#' => {
175                         self.state = .hash;
176                     },
177                     else => {
178                         result.id = .tag_content;
179                     },
180                 },
181                 .hash => switch (c) {
182                     '}' => {
183                         if (result.id != .eof) {
184                             self.index -= 1;
185                             self.state = .tag_name;
186                             break;
187                         } else {
188                             result.id = .bracket_close;
189                             self.index += 1;
190                             self.state = .start;
191                             break;
192                         }
193                     },
194                     else => {
195                         result.id = .tag_content;
196                         self.state = .tag_name;
197                     },
198                 },
199                 .eof => unreachable,
200             }
201         } else {
202             switch (self.state) {
203                 .start, .l_bracket, .eof => {},
204                 else => {
205                     result.id = .invalid;
206                 },
207             }
208             self.state = .eof;
209         }
210         result.end = self.index;
211         return result;
212     }
214     const Location = struct {
215         line: usize,
216         column: usize,
217         line_start: usize,
218         line_end: usize,
219     };
221     fn getTokenLocation(self: *Tokenizer, token: Token) Location {
222         var loc = Location{
223             .line = 0,
224             .column = 0,
225             .line_start = 0,
226             .line_end = 0,
227         };
228         for (self.buffer, 0..) |c, i| {
229             if (i == token.start) {
230                 loc.line_end = i;
231                 while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
232                 return loc;
233             }
234             if (c == '\n') {
235                 loc.line += 1;
236                 loc.column = 0;
237                 loc.line_start = i + 1;
238             } else {
239                 loc.column += 1;
240             }
241         }
242         return loc;
243     }
246 fn parseError(tokenizer: *Tokenizer, token: Token, comptime fmt: []const u8, args: anytype) anyerror {
247     const loc = tokenizer.getTokenLocation(token);
248     const args_prefix = .{ tokenizer.source_file_name, loc.line + 1, loc.column + 1 };
249     print("{s}:{d}:{d}: error: " ++ fmt ++ "\n", args_prefix ++ args);
250     if (loc.line_start <= loc.line_end) {
251         print("{s}\n", .{tokenizer.buffer[loc.line_start..loc.line_end]});
252         {
253             var i: usize = 0;
254             while (i < loc.column) : (i += 1) {
255                 print(" ", .{});
256             }
257         }
258         {
259             const caret_count = @min(token.end, loc.line_end) - token.start;
260             var i: usize = 0;
261             while (i < caret_count) : (i += 1) {
262                 print("~", .{});
263             }
264         }
265         print("\n", .{});
266     }
267     return error.ParseError;
270 fn assertToken(tokenizer: *Tokenizer, token: Token, id: Token.Id) !void {
271     if (token.id != id) {
272         return parseError(tokenizer, token, "expected {s}, found {s}", .{ @tagName(id), @tagName(token.id) });
273     }
276 fn eatToken(tokenizer: *Tokenizer, id: Token.Id) !Token {
277     const token = tokenizer.next();
278     try assertToken(tokenizer, token, id);
279     return token;
282 const HeaderOpen = struct {
283     name: []const u8,
284     url: []const u8,
285     n: usize,
288 const SeeAlsoItem = struct {
289     name: []const u8,
290     token: Token,
293 const Code = struct {
294     name: []const u8,
295     token: Token,
298 const Link = struct {
299     url: []const u8,
300     name: []const u8,
301     token: Token,
304 const SyntaxBlock = struct {
305     source_type: SourceType,
306     name: []const u8,
307     source_token: Token,
309     const SourceType = enum {
310         zig,
311         c,
312         peg,
313         javascript,
314     };
317 const Node = union(enum) {
318     Content: []const u8,
319     Nav,
320     Builtin: Token,
321     HeaderOpen: HeaderOpen,
322     SeeAlso: []const SeeAlsoItem,
323     Code: Code,
324     Link: Link,
325     InlineSyntax: Token,
326     Shell: Token,
327     SyntaxBlock: SyntaxBlock,
330 const Toc = struct {
331     nodes: []Node,
332     toc: []u8,
333     urls: std.StringHashMap(Token),
336 const Action = enum {
337     open,
338     close,
341 fn genToc(allocator: Allocator, tokenizer: *Tokenizer) !Toc {
342     var urls = std.StringHashMap(Token).init(allocator);
343     errdefer urls.deinit();
345     var header_stack_size: usize = 0;
346     var last_action: Action = .open;
347     var last_columns: ?u8 = null;
349     var toc_buf = std.ArrayList(u8).init(allocator);
350     defer toc_buf.deinit();
352     var toc = toc_buf.writer();
354     var nodes = std.ArrayList(Node).init(allocator);
355     defer nodes.deinit();
357     try toc.writeByte('\n');
359     while (true) {
360         const token = tokenizer.next();
361         switch (token.id) {
362             .eof => {
363                 if (header_stack_size != 0) {
364                     return parseError(tokenizer, token, "unbalanced headers", .{});
365                 }
366                 try toc.writeAll("    </ul>\n");
367                 break;
368             },
369             .content => {
370                 try nodes.append(Node{ .Content = tokenizer.buffer[token.start..token.end] });
371             },
372             .bracket_open => {
373                 const tag_token = try eatToken(tokenizer, .tag_content);
374                 const tag_name = tokenizer.buffer[tag_token.start..tag_token.end];
376                 if (mem.eql(u8, tag_name, "nav")) {
377                     _ = try eatToken(tokenizer, .bracket_close);
379                     try nodes.append(Node.Nav);
380                 } else if (mem.eql(u8, tag_name, "builtin")) {
381                     _ = try eatToken(tokenizer, .bracket_close);
382                     try nodes.append(Node{ .Builtin = tag_token });
383                 } else if (mem.eql(u8, tag_name, "header_open")) {
384                     _ = try eatToken(tokenizer, .separator);
385                     const content_token = try eatToken(tokenizer, .tag_content);
386                     const content = tokenizer.buffer[content_token.start..content_token.end];
387                     var columns: ?u8 = null;
388                     while (true) {
389                         const bracket_tok = tokenizer.next();
390                         switch (bracket_tok.id) {
391                             .bracket_close => break,
392                             .separator => continue,
393                             .tag_content => {
394                                 const param = tokenizer.buffer[bracket_tok.start..bracket_tok.end];
395                                 if (mem.eql(u8, param, "2col")) {
396                                     columns = 2;
397                                 } else {
398                                     return parseError(
399                                         tokenizer,
400                                         bracket_tok,
401                                         "unrecognized header_open param: {s}",
402                                         .{param},
403                                     );
404                                 }
405                             },
406                             else => return parseError(tokenizer, bracket_tok, "invalid header_open token", .{}),
407                         }
408                     }
410                     header_stack_size += 1;
412                     const urlized = try urlize(allocator, content);
413                     try nodes.append(Node{
414                         .HeaderOpen = HeaderOpen{
415                             .name = content,
416                             .url = urlized,
417                             .n = header_stack_size + 1, // highest-level section headers start at h2
418                         },
419                     });
420                     if (try urls.fetchPut(urlized, tag_token)) |kv| {
421                         parseError(tokenizer, tag_token, "duplicate header url: #{s}", .{urlized}) catch {};
422                         parseError(tokenizer, kv.value, "other tag here", .{}) catch {};
423                         return error.ParseError;
424                     }
425                     if (last_action == .open) {
426                         try toc.writeByte('\n');
427                         try toc.writeByteNTimes(' ', header_stack_size * 4);
428                         if (last_columns) |n| {
429                             try toc.print("<ul style=\"columns: {}\">\n", .{n});
430                         } else {
431                             try toc.writeAll("<ul>\n");
432                         }
433                     } else {
434                         last_action = .open;
435                     }
436                     last_columns = columns;
437                     try toc.writeByteNTimes(' ', 4 + header_stack_size * 4);
438                     try toc.print("<li><a id=\"toc-{s}\" href=\"#{s}\">{s}</a>", .{ urlized, urlized, content });
439                 } else if (mem.eql(u8, tag_name, "header_close")) {
440                     if (header_stack_size == 0) {
441                         return parseError(tokenizer, tag_token, "unbalanced close header", .{});
442                     }
443                     header_stack_size -= 1;
444                     _ = try eatToken(tokenizer, .bracket_close);
446                     if (last_action == .close) {
447                         try toc.writeByteNTimes(' ', 8 + header_stack_size * 4);
448                         try toc.writeAll("</ul></li>\n");
449                     } else {
450                         try toc.writeAll("</li>\n");
451                         last_action = .close;
452                     }
453                 } else if (mem.eql(u8, tag_name, "see_also")) {
454                     var list = std.ArrayList(SeeAlsoItem).init(allocator);
455                     errdefer list.deinit();
457                     while (true) {
458                         const see_also_tok = tokenizer.next();
459                         switch (see_also_tok.id) {
460                             .tag_content => {
461                                 const content = tokenizer.buffer[see_also_tok.start..see_also_tok.end];
462                                 try list.append(SeeAlsoItem{
463                                     .name = content,
464                                     .token = see_also_tok,
465                                 });
466                             },
467                             .separator => {},
468                             .bracket_close => {
469                                 try nodes.append(Node{ .SeeAlso = try list.toOwnedSlice() });
470                                 break;
471                             },
472                             else => return parseError(tokenizer, see_also_tok, "invalid see_also token", .{}),
473                         }
474                     }
475                 } else if (mem.eql(u8, tag_name, "link")) {
476                     _ = try eatToken(tokenizer, .separator);
477                     const name_tok = try eatToken(tokenizer, .tag_content);
478                     const name = tokenizer.buffer[name_tok.start..name_tok.end];
480                     const url_name = blk: {
481                         const tok = tokenizer.next();
482                         switch (tok.id) {
483                             .bracket_close => break :blk name,
484                             .separator => {
485                                 const explicit_text = try eatToken(tokenizer, .tag_content);
486                                 _ = try eatToken(tokenizer, .bracket_close);
487                                 break :blk tokenizer.buffer[explicit_text.start..explicit_text.end];
488                             },
489                             else => return parseError(tokenizer, tok, "invalid link token", .{}),
490                         }
491                     };
493                     try nodes.append(Node{
494                         .Link = Link{
495                             .url = try urlize(allocator, url_name),
496                             .name = name,
497                             .token = name_tok,
498                         },
499                     });
500                 } else if (mem.eql(u8, tag_name, "code")) {
501                     _ = try eatToken(tokenizer, .separator);
502                     const name_tok = try eatToken(tokenizer, .tag_content);
503                     _ = try eatToken(tokenizer, .bracket_close);
504                     try nodes.append(.{
505                         .Code = .{
506                             .name = tokenizer.buffer[name_tok.start..name_tok.end],
507                             .token = name_tok,
508                         },
509                     });
510                 } else if (mem.eql(u8, tag_name, "syntax")) {
511                     _ = try eatToken(tokenizer, .bracket_close);
512                     const content_tok = try eatToken(tokenizer, .content);
513                     _ = try eatToken(tokenizer, .bracket_open);
514                     const end_syntax_tag = try eatToken(tokenizer, .tag_content);
515                     const end_tag_name = tokenizer.buffer[end_syntax_tag.start..end_syntax_tag.end];
516                     if (!mem.eql(u8, end_tag_name, "endsyntax")) {
517                         return parseError(
518                             tokenizer,
519                             end_syntax_tag,
520                             "invalid token inside syntax: {s}",
521                             .{end_tag_name},
522                         );
523                     }
524                     _ = try eatToken(tokenizer, .bracket_close);
525                     try nodes.append(Node{ .InlineSyntax = content_tok });
526                 } else if (mem.eql(u8, tag_name, "shell_samp")) {
527                     _ = try eatToken(tokenizer, .bracket_close);
528                     const content_tok = try eatToken(tokenizer, .content);
529                     _ = try eatToken(tokenizer, .bracket_open);
530                     const end_syntax_tag = try eatToken(tokenizer, .tag_content);
531                     const end_tag_name = tokenizer.buffer[end_syntax_tag.start..end_syntax_tag.end];
532                     if (!mem.eql(u8, end_tag_name, "end_shell_samp")) {
533                         return parseError(
534                             tokenizer,
535                             end_syntax_tag,
536                             "invalid token inside syntax: {s}",
537                             .{end_tag_name},
538                         );
539                     }
540                     _ = try eatToken(tokenizer, .bracket_close);
541                     try nodes.append(Node{ .Shell = content_tok });
542                 } else if (mem.eql(u8, tag_name, "syntax_block")) {
543                     _ = try eatToken(tokenizer, .separator);
544                     const source_type_tok = try eatToken(tokenizer, .tag_content);
545                     var name: []const u8 = "sample_code";
546                     const maybe_sep = tokenizer.next();
547                     switch (maybe_sep.id) {
548                         .separator => {
549                             const name_tok = try eatToken(tokenizer, .tag_content);
550                             name = tokenizer.buffer[name_tok.start..name_tok.end];
551                             _ = try eatToken(tokenizer, .bracket_close);
552                         },
553                         .bracket_close => {},
554                         else => return parseError(tokenizer, token, "invalid token", .{}),
555                     }
556                     const source_type_str = tokenizer.buffer[source_type_tok.start..source_type_tok.end];
557                     var source_type: SyntaxBlock.SourceType = undefined;
558                     if (mem.eql(u8, source_type_str, "zig")) {
559                         source_type = SyntaxBlock.SourceType.zig;
560                     } else if (mem.eql(u8, source_type_str, "c")) {
561                         source_type = SyntaxBlock.SourceType.c;
562                     } else if (mem.eql(u8, source_type_str, "peg")) {
563                         source_type = SyntaxBlock.SourceType.peg;
564                     } else if (mem.eql(u8, source_type_str, "javascript")) {
565                         source_type = SyntaxBlock.SourceType.javascript;
566                     } else {
567                         return parseError(tokenizer, source_type_tok, "unrecognized code kind: {s}", .{source_type_str});
568                     }
569                     const source_token = while (true) {
570                         const content_tok = try eatToken(tokenizer, .content);
571                         _ = try eatToken(tokenizer, .bracket_open);
572                         const end_code_tag = try eatToken(tokenizer, .tag_content);
573                         const end_tag_name = tokenizer.buffer[end_code_tag.start..end_code_tag.end];
574                         if (mem.eql(u8, end_tag_name, "end_syntax_block")) {
575                             _ = try eatToken(tokenizer, .bracket_close);
576                             break content_tok;
577                         } else {
578                             return parseError(
579                                 tokenizer,
580                                 end_code_tag,
581                                 "invalid token inside code_begin: {s}",
582                                 .{end_tag_name},
583                             );
584                         }
585                         _ = try eatToken(tokenizer, .bracket_close);
586                     };
587                     try nodes.append(Node{ .SyntaxBlock = SyntaxBlock{ .source_type = source_type, .name = name, .source_token = source_token } });
588                 } else {
589                     return parseError(tokenizer, tag_token, "unrecognized tag name: {s}", .{tag_name});
590                 }
591             },
592             else => return parseError(tokenizer, token, "invalid token", .{}),
593         }
594     }
596     return Toc{
597         .nodes = try nodes.toOwnedSlice(),
598         .toc = try toc_buf.toOwnedSlice(),
599         .urls = urls,
600     };
603 fn urlize(allocator: Allocator, input: []const u8) ![]u8 {
604     var buf = std.ArrayList(u8).init(allocator);
605     defer buf.deinit();
607     const out = buf.writer();
608     for (input) |c| {
609         switch (c) {
610             'a'...'z', 'A'...'Z', '_', '-', '0'...'9' => {
611                 try out.writeByte(c);
612             },
613             ' ' => {
614                 try out.writeByte('-');
615             },
616             else => {},
617         }
618     }
619     return try buf.toOwnedSlice();
622 fn escapeHtml(allocator: Allocator, input: []const u8) ![]u8 {
623     var buf = std.ArrayList(u8).init(allocator);
624     defer buf.deinit();
626     const out = buf.writer();
627     try writeEscaped(out, input);
628     return try buf.toOwnedSlice();
631 fn writeEscaped(out: anytype, input: []const u8) !void {
632     for (input) |c| {
633         try switch (c) {
634             '&' => out.writeAll("&amp;"),
635             '<' => out.writeAll("&lt;"),
636             '>' => out.writeAll("&gt;"),
637             '"' => out.writeAll("&quot;"),
638             else => out.writeByte(c),
639         };
640     }
643 // Returns true if number is in slice.
644 fn in(slice: []const u8, number: u8) bool {
645     for (slice) |n| {
646         if (number == n) return true;
647     }
648     return false;
651 const builtin_types = [_][]const u8{
652     "f16",          "f32",     "f64",        "f80",          "f128",
653     "c_longdouble", "c_short", "c_ushort",   "c_int",        "c_uint",
654     "c_long",       "c_ulong", "c_longlong", "c_ulonglong",  "c_char",
655     "anyopaque",    "void",    "bool",       "isize",        "usize",
656     "noreturn",     "type",    "anyerror",   "comptime_int", "comptime_float",
659 fn isType(name: []const u8) bool {
660     for (builtin_types) |t| {
661         if (mem.eql(u8, t, name))
662             return true;
663     }
664     return false;
667 fn writeEscapedLines(out: anytype, text: []const u8) !void {
668     return writeEscaped(out, text);
671 fn tokenizeAndPrintRaw(
672     allocator: Allocator,
673     docgen_tokenizer: *Tokenizer,
674     out: anytype,
675     source_token: Token,
676     raw_src: []const u8,
677 ) !void {
678     const src_non_terminated = mem.trim(u8, raw_src, " \r\n");
679     const src = try allocator.dupeZ(u8, src_non_terminated);
681     try out.writeAll("<code>");
682     var tokenizer = std.zig.Tokenizer.init(src);
683     var index: usize = 0;
684     var next_tok_is_fn = false;
685     while (true) {
686         const prev_tok_was_fn = next_tok_is_fn;
687         next_tok_is_fn = false;
689         const token = tokenizer.next();
690         if (mem.indexOf(u8, src[index..token.loc.start], "//")) |comment_start_off| {
691             // render one comment
692             const comment_start = index + comment_start_off;
693             const comment_end_off = mem.indexOf(u8, src[comment_start..token.loc.start], "\n");
694             const comment_end = if (comment_end_off) |o| comment_start + o else token.loc.start;
696             try writeEscapedLines(out, src[index..comment_start]);
697             try out.writeAll("<span class=\"tok-comment\">");
698             try writeEscaped(out, src[comment_start..comment_end]);
699             try out.writeAll("</span>");
700             index = comment_end;
701             tokenizer.index = index;
702             continue;
703         }
705         try writeEscapedLines(out, src[index..token.loc.start]);
706         switch (token.tag) {
707             .eof => break,
709             .keyword_addrspace,
710             .keyword_align,
711             .keyword_and,
712             .keyword_asm,
713             .keyword_async,
714             .keyword_await,
715             .keyword_break,
716             .keyword_catch,
717             .keyword_comptime,
718             .keyword_const,
719             .keyword_continue,
720             .keyword_defer,
721             .keyword_else,
722             .keyword_enum,
723             .keyword_errdefer,
724             .keyword_error,
725             .keyword_export,
726             .keyword_extern,
727             .keyword_for,
728             .keyword_if,
729             .keyword_inline,
730             .keyword_noalias,
731             .keyword_noinline,
732             .keyword_nosuspend,
733             .keyword_opaque,
734             .keyword_or,
735             .keyword_orelse,
736             .keyword_packed,
737             .keyword_anyframe,
738             .keyword_pub,
739             .keyword_resume,
740             .keyword_return,
741             .keyword_linksection,
742             .keyword_callconv,
743             .keyword_struct,
744             .keyword_suspend,
745             .keyword_switch,
746             .keyword_test,
747             .keyword_threadlocal,
748             .keyword_try,
749             .keyword_union,
750             .keyword_unreachable,
751             .keyword_usingnamespace,
752             .keyword_var,
753             .keyword_volatile,
754             .keyword_allowzero,
755             .keyword_while,
756             .keyword_anytype,
757             => {
758                 try out.writeAll("<span class=\"tok-kw\">");
759                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
760                 try out.writeAll("</span>");
761             },
763             .keyword_fn => {
764                 try out.writeAll("<span class=\"tok-kw\">");
765                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
766                 try out.writeAll("</span>");
767                 next_tok_is_fn = true;
768             },
770             .string_literal,
771             .multiline_string_literal_line,
772             .char_literal,
773             => {
774                 try out.writeAll("<span class=\"tok-str\">");
775                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
776                 try out.writeAll("</span>");
777             },
779             .builtin => {
780                 try out.writeAll("<span class=\"tok-builtin\">");
781                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
782                 try out.writeAll("</span>");
783             },
785             .doc_comment,
786             .container_doc_comment,
787             => {
788                 try out.writeAll("<span class=\"tok-comment\">");
789                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
790                 try out.writeAll("</span>");
791             },
793             .identifier => {
794                 const tok_bytes = src[token.loc.start..token.loc.end];
795                 if (mem.eql(u8, tok_bytes, "undefined") or
796                     mem.eql(u8, tok_bytes, "null") or
797                     mem.eql(u8, tok_bytes, "true") or
798                     mem.eql(u8, tok_bytes, "false"))
799                 {
800                     try out.writeAll("<span class=\"tok-null\">");
801                     try writeEscaped(out, tok_bytes);
802                     try out.writeAll("</span>");
803                 } else if (prev_tok_was_fn) {
804                     try out.writeAll("<span class=\"tok-fn\">");
805                     try writeEscaped(out, tok_bytes);
806                     try out.writeAll("</span>");
807                 } else {
808                     const is_int = blk: {
809                         if (src[token.loc.start] != 'i' and src[token.loc.start] != 'u')
810                             break :blk false;
811                         var i = token.loc.start + 1;
812                         if (i == token.loc.end)
813                             break :blk false;
814                         while (i != token.loc.end) : (i += 1) {
815                             if (src[i] < '0' or src[i] > '9')
816                                 break :blk false;
817                         }
818                         break :blk true;
819                     };
820                     if (is_int or isType(tok_bytes)) {
821                         try out.writeAll("<span class=\"tok-type\">");
822                         try writeEscaped(out, tok_bytes);
823                         try out.writeAll("</span>");
824                     } else {
825                         try writeEscaped(out, tok_bytes);
826                     }
827                 }
828             },
830             .number_literal => {
831                 try out.writeAll("<span class=\"tok-number\">");
832                 try writeEscaped(out, src[token.loc.start..token.loc.end]);
833                 try out.writeAll("</span>");
834             },
836             .bang,
837             .pipe,
838             .pipe_pipe,
839             .pipe_equal,
840             .equal,
841             .equal_equal,
842             .equal_angle_bracket_right,
843             .bang_equal,
844             .l_paren,
845             .r_paren,
846             .semicolon,
847             .percent,
848             .percent_equal,
849             .l_brace,
850             .r_brace,
851             .l_bracket,
852             .r_bracket,
853             .period,
854             .period_asterisk,
855             .ellipsis2,
856             .ellipsis3,
857             .caret,
858             .caret_equal,
859             .plus,
860             .plus_plus,
861             .plus_equal,
862             .plus_percent,
863             .plus_percent_equal,
864             .plus_pipe,
865             .plus_pipe_equal,
866             .minus,
867             .minus_equal,
868             .minus_percent,
869             .minus_percent_equal,
870             .minus_pipe,
871             .minus_pipe_equal,
872             .asterisk,
873             .asterisk_equal,
874             .asterisk_asterisk,
875             .asterisk_percent,
876             .asterisk_percent_equal,
877             .asterisk_pipe,
878             .asterisk_pipe_equal,
879             .arrow,
880             .colon,
881             .slash,
882             .slash_equal,
883             .comma,
884             .ampersand,
885             .ampersand_equal,
886             .question_mark,
887             .angle_bracket_left,
888             .angle_bracket_left_equal,
889             .angle_bracket_angle_bracket_left,
890             .angle_bracket_angle_bracket_left_equal,
891             .angle_bracket_angle_bracket_left_pipe,
892             .angle_bracket_angle_bracket_left_pipe_equal,
893             .angle_bracket_right,
894             .angle_bracket_right_equal,
895             .angle_bracket_angle_bracket_right,
896             .angle_bracket_angle_bracket_right_equal,
897             .tilde,
898             => try writeEscaped(out, src[token.loc.start..token.loc.end]),
900             .invalid, .invalid_periodasterisks => return parseError(
901                 docgen_tokenizer,
902                 source_token,
903                 "syntax error",
904                 .{},
905             ),
906         }
907         index = token.loc.end;
908     }
909     try out.writeAll("</code>");
912 fn tokenizeAndPrint(
913     allocator: Allocator,
914     docgen_tokenizer: *Tokenizer,
915     out: anytype,
916     source_token: Token,
917 ) !void {
918     const raw_src = docgen_tokenizer.buffer[source_token.start..source_token.end];
919     return tokenizeAndPrintRaw(allocator, docgen_tokenizer, out, source_token, raw_src);
922 fn printSourceBlock(allocator: Allocator, docgen_tokenizer: *Tokenizer, out: anytype, syntax_block: SyntaxBlock) !void {
923     const source_type = @tagName(syntax_block.source_type);
925     try out.print("<figure><figcaption class=\"{s}-cap\"><cite class=\"file\">{s}</cite></figcaption><pre>", .{ source_type, syntax_block.name });
926     switch (syntax_block.source_type) {
927         .zig => try tokenizeAndPrint(allocator, docgen_tokenizer, out, syntax_block.source_token),
928         else => {
929             const raw_source = docgen_tokenizer.buffer[syntax_block.source_token.start..syntax_block.source_token.end];
930             const trimmed_raw_source = mem.trim(u8, raw_source, " \r\n");
932             try out.writeAll("<code>");
933             try writeEscapedLines(out, trimmed_raw_source);
934             try out.writeAll("</code>");
935         },
936     }
937     try out.writeAll("</pre></figure>");
940 fn printShell(out: anytype, shell_content: []const u8, escape: bool) !void {
941     const trimmed_shell_content = mem.trim(u8, shell_content, " \r\n");
942     try out.writeAll("<figure><figcaption class=\"shell-cap\">Shell</figcaption><pre><samp>");
943     var cmd_cont: bool = false;
944     var iter = std.mem.splitScalar(u8, trimmed_shell_content, '\n');
945     while (iter.next()) |orig_line| {
946         const line = mem.trimRight(u8, orig_line, " \r");
947         if (!cmd_cont and line.len > 1 and mem.eql(u8, line[0..2], "$ ") and line[line.len - 1] != '\\') {
948             try out.writeAll("$ <kbd>");
949             const s = std.mem.trimLeft(u8, line[1..], " ");
950             if (escape) {
951                 try writeEscaped(out, s);
952             } else {
953                 try out.writeAll(s);
954             }
955             try out.writeAll("</kbd>" ++ "\n");
956         } else if (!cmd_cont and line.len > 1 and mem.eql(u8, line[0..2], "$ ") and line[line.len - 1] == '\\') {
957             try out.writeAll("$ <kbd>");
958             const s = std.mem.trimLeft(u8, line[1..], " ");
959             if (escape) {
960                 try writeEscaped(out, s);
961             } else {
962                 try out.writeAll(s);
963             }
964             try out.writeAll("\n");
965             cmd_cont = true;
966         } else if (line.len > 0 and line[line.len - 1] != '\\' and cmd_cont) {
967             if (escape) {
968                 try writeEscaped(out, line);
969             } else {
970                 try out.writeAll(line);
971             }
972             try out.writeAll("</kbd>" ++ "\n");
973             cmd_cont = false;
974         } else {
975             if (escape) {
976                 try writeEscaped(out, line);
977             } else {
978                 try out.writeAll(line);
979             }
980             try out.writeAll("\n");
981         }
982     }
984     try out.writeAll("</samp></pre></figure>");
987 fn genHtml(
988     allocator: Allocator,
989     tokenizer: *Tokenizer,
990     toc: *Toc,
991     code_dir: std.fs.Dir,
992     out: anytype,
993 ) !void {
994     for (toc.nodes) |node| {
995         switch (node) {
996             .Content => |data| {
997                 try out.writeAll(data);
998             },
999             .Link => |info| {
1000                 if (!toc.urls.contains(info.url)) {
1001                     return parseError(tokenizer, info.token, "url not found: {s}", .{info.url});
1002                 }
1003                 try out.print("<a href=\"#{s}\">{s}</a>", .{ info.url, info.name });
1004             },
1005             .Nav => {
1006                 try out.writeAll(toc.toc);
1007             },
1008             .Builtin => |tok| {
1009                 try out.writeAll("<figure><figcaption class=\"zig-cap\"><cite>@import(\"builtin\")</cite></figcaption><pre>");
1010                 const builtin_code = @embedFile("builtin"); // ๐Ÿ˜Ž
1011                 try tokenizeAndPrintRaw(allocator, tokenizer, out, tok, builtin_code);
1012                 try out.writeAll("</pre></figure>");
1013             },
1014             .HeaderOpen => |info| {
1015                 try out.print(
1016                     "<h{d} id=\"{s}\"><a href=\"#toc-{s}\">{s}</a> <a class=\"hdr\" href=\"#{s}\">ยง</a></h{d}>\n",
1017                     .{ info.n, info.url, info.url, info.name, info.url, info.n },
1018                 );
1019             },
1020             .SeeAlso => |items| {
1021                 try out.writeAll("<p>See also:</p><ul>\n");
1022                 for (items) |item| {
1023                     const url = try urlize(allocator, item.name);
1024                     if (!toc.urls.contains(url)) {
1025                         return parseError(tokenizer, item.token, "url not found: {s}", .{url});
1026                     }
1027                     try out.print("<li><a href=\"#{s}\">{s}</a></li>\n", .{ url, item.name });
1028                 }
1029                 try out.writeAll("</ul>\n");
1030             },
1031             .InlineSyntax => |content_tok| {
1032                 try tokenizeAndPrint(allocator, tokenizer, out, content_tok);
1033             },
1034             .Shell => |content_tok| {
1035                 const raw_shell_content = tokenizer.buffer[content_tok.start..content_tok.end];
1036                 try printShell(out, raw_shell_content, true);
1037             },
1038             .SyntaxBlock => |syntax_block| {
1039                 try printSourceBlock(allocator, tokenizer, out, syntax_block);
1040             },
1041             .Code => |code| {
1042                 const out_basename = try std.fmt.allocPrint(allocator, "{s}.out", .{
1043                     fs.path.stem(code.name),
1044                 });
1045                 defer allocator.free(out_basename);
1047                 const contents = code_dir.readFileAlloc(allocator, out_basename, std.math.maxInt(u32)) catch |err| {
1048                     return parseError(tokenizer, code.token, "unable to open '{s}': {s}", .{
1049                         out_basename, @errorName(err),
1050                     });
1051                 };
1052                 defer allocator.free(contents);
1054                 try out.writeAll(contents);
1055             },
1056         }
1057     }