munge links to handle following links from an old state
[gemrepl.git] / main.c
blob822bf24466deeb0f859bd54bab592c40f4ec195e
1 /* Copyright 2021, Martin Bays <mbays@sdf.org>
2 * SPDX-License-Identifier: GPL-3.0-or-later */
3 #include <fcntl.h>
4 #include <getopt.h>
5 #include <poll.h>
6 #include <pthread.h>
7 #include <signal.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <time.h>
14 #include <sys/types.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
18 #include "gemscgi.h"
20 /* If you increase this too far, you may run into file descriptor limits */
21 #define MAX_CHILDREN 256
23 #define SESSION_ID_LEN 8
25 typedef struct Child {
26 bool exists;
27 bool newborn;
28 pthread_mutex_t *mutex; // initialised if child->exists, maybe also if not
29 char sess_id[SESSION_ID_LEN];
30 char owner[64];
31 uint64_t last_active;
32 pid_t pid;
33 int in;
34 int out;
35 int flag;
36 bool reading;
37 int serial;
39 bool nolink;
40 bool plain;
41 } Child;
43 typedef enum output_format
44 { gemtext
45 , pre
46 , unwrapped
47 , raw
48 } output_format;
50 typedef struct State {
51 const char *command;
52 char *const *args;
53 output_format format;
55 int max_children;
56 int read_timeout;
57 int pause_timeout;
58 bool nolink;
59 bool single_session;
61 int num_children;
62 Child children[MAX_CHILDREN];
63 } State;
65 static bool spawn(const char *command, char *const *args, const char *query,
66 Child *child, int socket)
68 int infds[2], outfds[2], flagfds[2];
69 if (pipe(infds) == -1 || pipe(outfds) == -1 || pipe(flagfds) == -1) {
70 perror("pipe");
71 return false;
74 const pid_t pid = fork();
75 if (pid == -1) {
76 perror("fork");
77 return false;
80 if (pid == 0) {
81 // child
82 close(socket);
83 close(infds[1]);
84 close(outfds[0]);
85 close(flagfds[0]);
86 dup2(infds[0], 0);
87 dup2(outfds[1], 1);
88 dup2(outfds[1], 2);
89 dup2(flagfds[1], 3);
90 setbuffer(stdin, NULL, 0);
91 setbuffer(stdout, NULL, 0);
92 setbuffer(fdopen(3, "w"), NULL, 0);
93 setsid();
95 char tlsenv[64+16+1];
96 snprintf(tlsenv, 64+16+1, "TLS_CLIENT_HASH=%s", child->owner);
97 putenv(tlsenv);
99 if (query != NULL) {
100 char qenv[1024+16+1];
101 snprintf(qenv, 1024+16+1, "SPAWN_PARAMETER=%s", query);
102 putenv(qenv);
105 execvp(command, args);
106 exit(1);
107 } else {
108 // parent
109 close(infds[0]);
110 close(outfds[1]);
111 close(flagfds[1]);
112 child->pid = pid;
113 child->in = infds[1];
114 child->out = outfds[0];
115 child->flag = flagfds[0];
116 fcntl(child->in, F_SETFD, FD_CLOEXEC);
117 fcntl(child->out, F_SETFD, FD_CLOEXEC);
118 fcntl(child->flag, F_SETFD, FD_CLOEXEC);
119 setbuffer(fdopen(infds[1], "w"), NULL, 0);
120 setbuffer(fdopen(outfds[0], "r"), NULL, 0);
121 setbuffer(fdopen(flagfds[0], "r"), NULL, 0);
124 return true;
127 static bool write_all(int fd, const char* buf, int n)
129 while (n > 0) {
130 int w = write(fd, buf, n);
131 if (w < 0) return false;
132 buf += w;
133 n -= w;
135 return true;
138 static void set_child_last_active(Child *child)
140 struct timespec clock_mono;
141 clock_gettime(CLOCK_MONOTONIC, &clock_mono);
142 child->last_active = clock_mono.tv_sec;
145 /* Write anything written timelily on `in` to `out`.
146 * Streaming will cease if there is nothing to read on `in` for `read_timeout`
147 * ms, or after `pause_timeout` ms if something has been read, or after '<' is
148 * read from `flag` without a subsequent '>'.
149 * Return -1 on read error, 0 on HUP, else 1. */
150 static int stream_text(int in, int flag, int out,
151 bool escape_pre,
152 bool escape_all,
153 int munge_links_serial,
154 bool *child_reading,
155 int read_timeout, int pause_timeout) {
156 char buf[256];
157 struct pollfd pfd[2] = { { in, POLLIN | POLLHUP, 0 }, {flag, POLLIN, 0 } };
158 int backticks = 0;
159 char escape = 0;
160 int munge = 0;
161 bool read_something = false;
163 /* Note we set no total maximum time or output size limit; we leave it to
164 * the user to e.g. set a ulimit to handle runaway processes. */
165 while (true) {
166 poll(pfd, 2, *child_reading ? 20 :
167 read_something ? read_timeout : pause_timeout);
169 if (pfd[0].revents & POLLIN) {
170 read_something = true;
171 const int r = read(in, buf, 256 - 1);
172 if (r < 0) return false;
173 buf[r] = 0;
175 const char *b = buf;
176 while (*b) {
177 if ((escape_pre || escape_all) && backticks >= 0) {
178 if (*b == '`') {
179 escape = 0;
180 ++backticks;
181 if (backticks == 3) {
182 write(out, " ```", 4);
183 backticks = -1;
185 ++b;
186 continue;
187 } else while (--backticks >= 0) write(out, "`", 1);
190 if (munge_links_serial > 0) {
191 // Replace "=>[whitespace]?foo" at start of a line with
192 // "=>[whitespace?!l+[serial]?foo"
193 if ((munge == 0 && *b == '=') ||
194 (munge == 1 && *b == '>') ||
195 (munge == 2 && (*b == ' ' || *b == '\t'))) {
196 munge++;
197 } else if (munge == 3 && (*b == '?')) {
198 dprintf(out, "?!l+%d", munge_links_serial);
199 munge = -1;
200 } else if (*b == '\n') {
201 munge = 0;
202 } else {
203 munge = -1;
205 } else if (escape_all && escape > 0) {
206 if (escape == '\n') {
207 if (*b == '#' || *b == '>') {
208 write(out, " ", 1);
209 } else if (*b == '=' || *b == '*') {
210 escape = *b;
211 ++b;
212 continue;
214 } else {
215 if ((escape == '=' && *b == '>')
216 || (escape == '*' && *b == ' ')) {
217 write(out, " ", 1);
219 write(out, &escape, 1);
221 escape = 0;
225 write(out, b, 1);
226 ++b;
228 } else if (pfd[1].revents & POLLIN) {
229 const int r = read(flag, buf, 256);
230 for (int i = 0; i < r; ++i ) {
231 if (buf[i] == '<') *child_reading = true;
232 if (buf[i] == '>') *child_reading = false;
234 } else break;
236 while (--backticks >= 0) write(out, "`", 1);
237 if (escape > 0 && escape != '\n') write(out, &escape, 1);
238 return (!(pfd[0].revents & POLLHUP));
241 #define put(s) write_all(socket, s, strlen(s))
242 #define putn(s,n) write_all(socket, s, n)
244 static Child *get_session(State *state, const Request_Info *request_info, int socket)
247 if (request_info->tls_client_hash == NULL) {
248 put("60 Client certificate required\r\n");
249 return NULL;
252 if (request_info->path_info == NULL || strlen(request_info->path_info) <= 1) {
253 if (state->single_session) {
254 for (int i = 0; i < state->num_children; ++i) {
255 Child *const c = &state->children[i];
256 if (c->mutex != NULL && pthread_mutex_trylock(c->mutex) != 0) continue;
257 bool found = (c->exists &&
258 0 == strncmp(c->owner, request_info->tls_client_hash, 64));
259 if (c->mutex != NULL) pthread_mutex_unlock(c->mutex);
260 if (found) {
261 put("30 ");
262 put(request_info->script_path);
263 put("/");
264 putn(c->sess_id, SESSION_ID_LEN);
265 put("\r\n");
266 return NULL;
271 Child *slot = NULL;
272 uint64_t last_active = UINT64_MAX;
273 for (int i = 0; i < state->num_children; ++i) {
274 Child *const c = &state->children[i];
275 if (c->mutex != NULL && pthread_mutex_trylock(c->mutex) != 0) continue;
276 if (c->exists) {
277 if (last_active > c->last_active) {
278 slot = c;
279 last_active = c->last_active;
281 } else if (slot == NULL || last_active < UINT64_MAX) slot = c;
282 if (c->mutex != NULL) pthread_mutex_unlock(c->mutex);
285 if (slot == NULL || (last_active < UINT64_MAX && state->num_children < state->max_children)) {
286 slot = &state->children[state->num_children++];
289 Child *const child = slot;
290 if (child->mutex != NULL) pthread_mutex_lock(child->mutex);
292 if (child->exists) {
293 // TODO: would be nice to queue a regretful message for the owner
294 // of the child we're killing...
295 close(child->in);
296 close(child->out);
297 close(child->flag);
298 kill(child->pid, 9);
299 child->exists = false;
302 memset(child, 0, sizeof(Child));
304 strncpy(child->owner, request_info->tls_client_hash, 64);
305 for (int i = 0; i < SESSION_ID_LEN; ++i) {
306 child->sess_id[i] = 'A' + random()%26 + (random()%2 ? ('a'-'A') : 0);
309 if (!spawn(state->command, state->args, request_info->query_string_decoded, child, socket)) {
310 put("40 Spawn failure.\r\n");
311 if (child->mutex != NULL) pthread_mutex_unlock(child->mutex);
312 return NULL;
315 if (child->mutex == NULL) {
316 child->mutex = malloc(sizeof(pthread_mutex_t));
317 if (child->mutex == NULL) {
318 put("40 Spawn failure (malloc).\r\n");
319 return NULL;
322 if (pthread_mutex_init(child->mutex, NULL) != 0) {
323 put("40 Spawn failure (mutex_init).\r\n");
324 free(child->mutex);
325 child->mutex = NULL;
326 return NULL;
329 // Note: we never destroy the mutex, because we never know that it
330 // would be safe to do so.
332 pthread_mutex_lock(child->mutex);
335 child->exists = true;
336 child->newborn = true;
337 set_child_last_active(child);
339 child->nolink = state->nolink;
341 put("30 ");
342 put(request_info->script_path);
343 put("/");
344 putn(child->sess_id, SESSION_ID_LEN);
345 put("\r\n");
346 pthread_mutex_unlock(child->mutex);
347 return NULL;
350 if (0 == strncmp(request_info->path_info, "/list", strlen(request_info->path_info))) {
351 put("20 text/gemini\r\n");
352 bool found = false;
353 for (int i = 0; i < state->num_children; ++i) {
354 Child *const c = &state->children[i];
355 if (c->mutex != NULL && pthread_mutex_trylock(c->mutex) != 0) continue;
356 if (c->exists &&
357 0 == strncmp(c->owner, request_info->tls_client_hash, 64)) {
358 if (!found) {
359 found = true;
360 put("20 text/gemini\r\n");
362 put("=> ");
363 put(request_info->script_path);
364 put("/");
365 putn(c->sess_id, SESSION_ID_LEN);
366 put(" Resume session\n");
368 if (c->mutex != NULL) pthread_mutex_unlock(c->mutex);
370 if (!found) put("No sessions found.\n");
371 return NULL;
374 if (strlen(request_info->path_info) != 1+SESSION_ID_LEN) {
375 put("51 Bad session id.\r\n");
376 return NULL;
379 // drop initial '/'
380 const char *sess_id = request_info->path_info + 1;
382 /* Find child with this sess_id.
383 * For simplicity, in particular for the mutex handling, we use a static
384 * array of children rather than allocating dynamically, and don't sort.
385 * This could be optimised. */
386 Child *child = NULL;
387 for (int i = 0; child == NULL && i < state->num_children; ++i) {
388 Child *const c = &state->children[i];
389 if (c->mutex != NULL && pthread_mutex_trylock(c->mutex) != 0) continue;
390 if (c->exists &&
391 0 == strncmp(c->sess_id, sess_id, SESSION_ID_LEN)) {
392 child = c;
394 if (c->mutex != NULL) pthread_mutex_unlock(c->mutex);
397 if (child == NULL) {
398 put("20 text/gemini\r\nSession not found.\n=> ");
399 put(request_info->script_path);
400 put(" Start new session\n");
401 return NULL;
404 pthread_mutex_lock(child->mutex);
405 const char* owner = child->owner;
406 pthread_mutex_unlock(child->mutex);
408 if (0 != strncmp(owner, request_info->tls_client_hash, 64)) {
409 put("61 Wrong certificate for session.\r\n");
410 return NULL;
413 return child;
416 static void do_command(const State* state, Child *child, const char* q, int socket) {
417 if (*q == '!') {
418 ++q;
419 if (*q == '?') {
420 put("10\r\n");
421 return;
422 } else if (0 == strncmp(q, "help", strlen(q))) {
423 put("20 text/gemini\r\n");
424 put("An input line not beginning with '!' will be passed to the process.\n");
425 put("\n");
426 put("# gemrepl meta commands\n");
427 put("=> ?!help !help: This help\n");
428 put("=> ?!kill !kill: kill process\n");
429 if (state->format != raw) {
430 put("=> ?!nolink !nolink: suppress input link\n");
431 put("=> ?!showlink !showlink: show input link\n");
432 put("=> ?!plain !plain: use text/plain for responses\n");
433 put("=> ?!gemtext !gemtext: use text/gemini for responses (default)\n");
435 put("=> ?!C !C: pass ^C (SIGINT) to process\n");
436 put("=> ?!? !?: Prompt for input\n");
437 put("=> ?!! !!: Literal '!'\n");
438 return;
439 } else if (0 == strncmp(q, "kill", strlen(q))) {
440 kill(-child->pid, SIGKILL);
441 q += strlen(q);
442 } else if (0 == strncmp(q, "C", strlen(q))) {
443 kill(-child->pid, SIGINT);
444 q += strlen(q);
445 } else if (0 == strncmp(q, "nolink", strlen(q))) {
446 // TODO: might be better to have this be a permanent option
447 // attached to the cert rather than the child.
448 child->nolink = true;
449 put("20 text/gemini\r\n");
450 put("Input links disabled.\n");
451 put("=> ?!showlink Re-enable input links\n");
452 return;
453 } else if (0 == strncmp(q, "showlink", strlen(q))) {
454 child->nolink = false;
455 put("20 text/gemini\r\n");
456 put("Input links enabled.\n");
457 put("=> ?!? Input command\n");
458 return;
459 } else if (0 == strncmp(q, "plain", strlen(q))) {
460 child->plain = true;
461 put("20 text/gemini\r\n");
462 put("Plaintext mode enabled.\n");
463 put("=> ?!gemtext Re-enable gemtext\n");
464 return;
465 } else if (0 == strncmp(q, "gemtext", strlen(q))) {
466 child->plain = false;
467 put("20 text/gemini\r\n");
468 put("Gemtext mode enabled.\n");
469 put("=> ?!? Input command\n");
470 return;
471 } else if (0 == strncmp(q, "l+", strlen("l+"))) {
472 q += 2;
473 if (child->serial == atoi(q)) {
474 while (*q && *q != '?') {
475 q++;
477 } else {
478 put("20 text/gemini\r\n");
479 put("That link is stale.\n");
480 put("=> ?!? Input command\n");
481 return;
483 } else if (*q != '!') {
484 put("40 Unknown gemrepl meta-command (use '!!' for a literal '!')\r\n");
485 return;
487 } else if (strchr(q, '\n') != NULL) {
488 put("40 Input may not include embedded newlines\r\n");
489 return;
492 child->serial++;
494 if (state->format != raw) {
495 if (child->plain) put("20 text/plain\r\n");
496 else put("20 text/gemini\r\n");
498 if (child->newborn) {
499 put("[gemrepl: child spawned. Input \"!help\" for meta-commands]\n");
502 if (!(child->nolink || child->plain)) put("=> ?!? Input command\n");
505 if (!child->newborn) kill(-child->pid, SIGCONT);
507 int qlen = strlen(q);
508 if (!child->newborn) {
509 bool succ = (write(child->in, q, qlen) == qlen
510 && write(child->in, "\n", 1) == 1);
511 if (!succ) {
512 put("[gemrepl: error when writing to child]\n");
515 child->reading = false;
518 if (state->format == pre && !child->plain) put("```\n");
519 const int succ = stream_text(child->out, child->flag, socket,
520 state->format == pre && !child->plain,
521 state->format == unwrapped && !child->plain,
522 state->format == gemtext ? child->serial : 0,
523 &child->reading,
524 state->read_timeout,
525 state->pause_timeout);
526 if (state->format == pre && !child->plain) put("\n```\n");
528 if (succ < 0) put("[gemrepl: error when reading from child]\n");
529 else if (succ == 0) {
530 // got HUP; sleep briefly to give child a chance to exit
531 usleep(50000);
534 set_child_last_active(child);
535 child->newborn = false;
537 if (waitpid(child->pid, NULL, WNOHANG) == child->pid) {
538 if (state->format != raw) {
539 put("[gemrepl: child process terminated]\n");
540 put("=> . Start new session\n");
542 close(child->in);
543 close(child->out);
544 close(child->flag);
545 child->exists = false;
546 } else {
547 kill(-child->pid, SIGSTOP);
551 // Wrap arguments of do_command into a single struct, for use with
552 // pthread_create.
553 typedef struct Do_Command_Arg {
554 const State* state;
555 Child *child;
556 const char* q;
557 int socket;
558 } Do_Command_Arg;
560 static void *do_command_thread(void *object)
562 Do_Command_Arg *arg = (Do_Command_Arg *)object;
564 pthread_mutex_lock(arg->child->mutex);
565 do_command(arg->state, arg->child, arg->q, arg->socket);
566 pthread_mutex_unlock(arg->child->mutex);
568 close(arg->socket);
569 free(arg);
570 return NULL;
573 void respond(void *object, const Request_Info *request_info, int socket)
575 State *state = (State *)object;
577 Child *child = get_session(state, request_info, socket);
579 if (child == NULL) {
580 close(socket);
581 return;
584 Do_Command_Arg *arg = malloc(sizeof(Do_Command_Arg));
585 if (arg == NULL) {
586 close(socket);
587 return;
589 arg->state = state;
590 arg->child = child;
591 arg->q = request_info->query_string_decoded;
592 arg->socket = socket;
594 pthread_t tid;
595 pthread_create(&tid, NULL, do_command_thread, arg);
598 /* How long in ms to wait for child to output something */
599 #define DEF_READ_TIMEOUT 3000
601 /* How long in ms child can pause between writes before we consider it to have
602 * finished writing */
603 #define DEF_PAUSE_TIMEOUT 300
605 static void usage()
607 printf("Usage: gemrepl [OPTION]... -s PATH COMMAND [ARG]...\n");
608 printf(" -h --help This help\n");
609 printf(" -s PATH --socket=PATH Path for socket file, which will be created\n");
610 printf(" -m NUM --max-children=NUM Max concurrent children to spawn (%d)\n", MAX_CHILDREN);
611 printf(" -t MS --read-timeout=MS Time to wait for child to start writing (%d)\n", DEF_READ_TIMEOUT);
612 printf(" -T MS --pause-timeout=MS Silence period after which child is paused (%d)\n", DEF_PAUSE_TIMEOUT);
613 printf(" -S --synchronous Disable timeouts. Use fd 3 instead (see docs).\n");
614 printf(" -L --no-link Don't write input links.\n");
615 printf(" -1 --single-session Allow only one session per user.\n");
616 printf(" -f FMT --format=FMT Format of output of command. Possible formats:\n");
617 printf(" gemtext: text/gemini (default)\n");
618 printf(" pre: preformatted text\n");
619 printf(" unwrapped: plain text without hard wrapping\n");
620 printf(" raw: gemini protocol output, including response headers\n");
624 /* state as global variable, so we can clean up on termination */
625 State state = {};
627 static void cleanup(int sig) {
628 for (int i = 0; i < state.num_children; ++i) {
629 Child *const c = &state.children[i];
630 if (c->exists) {
631 close(c->in);
632 close(c->out);
633 close(c->flag);
634 kill(-c->pid, SIGKILL);
635 waitpid(c->pid, NULL, 0);
638 exit(1);
641 int main(int argc, char **argv)
643 if (argc < 2) {
644 usage();
645 exit(1);
648 state.max_children = MAX_CHILDREN;
649 state.read_timeout = DEF_READ_TIMEOUT;
650 state.pause_timeout = DEF_PAUSE_TIMEOUT;
651 state.format = gemtext;
653 const struct option longoptions[] =
654 { { "help", 0, NULL, 'h' }
655 , { "socket", 1, NULL, 's' }
656 , { "format", 1, NULL, 'f' }
657 , { "no-link", 1, NULL, 'L' }
658 , { "max-children", 1, NULL, 'm' }
659 , { "read-timeout", 1, NULL, 't' }
660 , { "pause-timeout", 1, NULL, 'T' }
661 , { "synchronous", 0, NULL, 'S' }
662 , { "single-session", 0, NULL, '1' }
663 , { 0,0,0,0 }
665 int o;
666 const char *socketname = NULL;
667 while (-1 != (o = getopt_long(argc, argv, "+1hs:f:Lm:t:T:S", longoptions, NULL))) {
668 switch (o) {
669 case 'h':
670 case '?':
671 usage();
672 exit((o=='?'));
673 case 's':
674 socketname = optarg;
675 break;
676 case '1':
677 state.single_session = true;
678 break;
679 case 'f':
680 if (0 == strcmp(optarg, "gemtext")) state.format=gemtext;
681 else if (0 == strcmp(optarg, "pre")) state.format=pre;
682 else if (0 == strcmp(optarg, "unwrapped")) state.format=unwrapped;
683 else if (0 == strcmp(optarg, "raw")) state.format=raw;
684 else {
685 printf("Unknown format.\n");
686 exit(1);
688 break;
689 case 'L':
690 state.nolink = true;
691 break;
692 case 'm':
693 state.max_children = atoi(optarg);
694 if (state.max_children <= 0 || state.max_children > MAX_CHILDREN) {
695 printf("Bad value for max children.\n");
696 printf("You may need to increase MAX_CHILDREN in the source.\n");
697 exit(1);
699 break;
700 case 't':
701 state.read_timeout = atoi(optarg);
702 break;
703 case 'T':
704 state.pause_timeout = atoi(optarg);
705 break;
706 case 'S':
707 state.read_timeout = -1;
708 state.pause_timeout = -1;
709 break;
713 if (argv[optind] == NULL || socketname == NULL) {
714 usage();
715 exit(1);
718 state.command = argv[optind];
719 state.args = &argv[optind];
721 srandom(time(NULL));
723 struct sigaction act = {};
724 act.sa_handler = cleanup;
725 sigaction(SIGTERM, &act, NULL);
726 sigaction(SIGINT, &act, NULL);
727 act.sa_handler = SIG_IGN;
728 sigaction(SIGPIPE, &act, NULL);
730 runSCGI(socketname, respond, &state);