1 /* Gopher access protocol (RFC 1436) */
3 /* Based on version of HTGopher.c in the lynx tree.
10 * 26 Sep 90 Adapted from other accesses (News, HTTP) TBL
11 * 29 Nov 91 Downgraded to C, for portable implementation.
12 * 10 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Added a
13 * form-based CSO/PH gateway. Can be invoked via a
14 * "cso://host[:port]/" or "gopher://host:105/2"
15 * URL. If a gopher URL is used with a query token
16 * ('?'), the old ISINDEX procedure will be used
17 * instead of the form-based gateway.
18 * 15 Mar 96 Foteos Macrides (macrides@sci.wfbr.edu). Pass
19 * port 79, gtype 0 gopher URLs to the finger
33 #include "cache/cache.h"
34 #include "intl/gettext/libintl.h"
35 #include "main/module.h"
36 #include "network/connection.h"
37 #include "network/socket.h"
38 #include "protocol/common.h"
39 #include "protocol/gopher/gopher.h"
40 #include "protocol/protocol.h"
41 #include "protocol/uri.h"
42 #include "util/conv.h"
43 #include "util/memory.h"
44 #include "util/string.h"
46 struct module gopher_protocol_module
= struct_module(
47 /* name: */ N_("Gopher"),
50 /* submodules: */ NULL
,
57 /* Gopher entity types */
59 GOPHER_UNKNOWN
= 0 , /* Special fall-back entity */
61 GOPHER_DIRECTORY
= '1',
64 GOPHER_MACBINHEX
= '4',
65 GOPHER_PCBINARY
= '5',
66 GOPHER_UUENCODED
= '6',
71 GOPHER_HTML
= 'h', /* HTML */
72 GOPHER_CHTML
= 'H', /* HTML */
75 GOPHER_WWW
= 'w', /* W3 address */
78 GOPHER_INFO
= 'i', /* Information or separator line */
79 GOPHER_DUPLICATE
= '+',
80 GOPHER_PLUS_IMAGE
= ':', /* Addition from Gopher Plus */
81 GOPHER_PLUS_MOVIE
= ';',
82 GOPHER_PLUS_SOUND
= '<',
83 GOPHER_PLUS_PDF
= 'P',
86 /* Default Gopher Node type is directory listing */
87 #define DEFAULT_GOPHER_ENTITY GOPHER_DIRECTORY
89 #define entity_needs_gopher_access(entity) \
90 ((entity) != GOPHER_TELNET \
91 && (entity) != GOPHER_TN3270 \
92 && (entity) != GOPHER_WWW)
94 struct gopher_entity_info
{
95 enum gopher_entity type
;
96 unsigned char *description
;
97 unsigned char *content_type
;
100 /* This table provides some hard-coded associations between entity type
101 * and MIME type. A NULL MIME type in this table indicates
102 * that the MIME type should be deduced from the extension.
104 * - Lynx uses "text/plain" for GOPHER_FILE, but it can be anything.
105 * - Lynx uses "image/gif" for GOPHER_IMAGE and GOPHER_PLUS_IMAGE,
106 * but they really can be anything.
107 * - GOPHER_BINARY can be, for example, a tar ball, so using
108 * "application/octet-stream" is a bad idea.
110 static struct gopher_entity_info gopher_entity_info
[] = {
111 { GOPHER_BINARY
, " (BINARY)", NULL
},
112 { GOPHER_CHTML
, " (CHTML)", "text/html" },
113 { GOPHER_CSO
, " (CSO)", "text/html" },
114 { GOPHER_DIRECTORY
, " (DIRECTORY)", "text/html" },
115 { GOPHER_FILE
, " (FILE)", NULL
/* "text/plain" */ },
116 { GOPHER_GIF
, " (GIF IMAGE)", "image/gif" },
117 { GOPHER_HTML
, " (HTML)", "text/html" },
118 { GOPHER_IMAGE
, " (IMAGE)", NULL
/* "image/gif" */ },
119 { GOPHER_INDEX
, " (INDEX)", "text/html" },
120 { GOPHER_MACBINHEX
, "(BINARY HEX)", "application/octet-stream" },
121 { GOPHER_MIME
, " (MIME)", "application/octet-stream" },
122 { GOPHER_PCBINARY
, " (PCBINARY)", "application/octet-stream" },
123 { GOPHER_PLUS_IMAGE
, " (IMAGE+)", NULL
/* "image/gif" */ },
124 { GOPHER_PLUS_MOVIE
, " (MOVIE)", "video/mpeg" },
125 { GOPHER_PLUS_PDF
, " (PDF)", "application/pdf" },
126 { GOPHER_PLUS_SOUND
, " (SOUND+)", "audio/basic" },
127 { GOPHER_SOUND
, " (SOUND)", "audio/basic" },
128 { GOPHER_TELNET
, " (TELNET)", NULL
},
129 { GOPHER_TN3270
, " (TN3270)", NULL
},
130 { GOPHER_UUENCODED
, " (UUENCODED)", "application/octet-stream" },
131 { GOPHER_WWW
, "(W3 ADDRESS)", NULL
},
133 { GOPHER_INFO
, " ", NULL
},
134 { GOPHER_ERROR
, NULL
, NULL
},
135 /* XXX: Keep GOPHER_UNKNOWN last so it is easy to access. */
136 { GOPHER_UNKNOWN
, " ", "application/octet-stream" },
139 static struct gopher_entity_info
*
140 get_gopher_entity_info(enum gopher_entity type
)
144 for (entry
= 0; entry
< sizeof_array(gopher_entity_info
) - 1; entry
++)
145 if (gopher_entity_info
[entry
].type
== type
)
146 return &gopher_entity_info
[entry
];
148 assert(gopher_entity_info
[entry
].type
== GOPHER_UNKNOWN
);
150 return &gopher_entity_info
[entry
];
153 static unsigned char *
154 get_gopher_entity_description(enum gopher_entity type
)
156 struct gopher_entity_info
*info
= get_gopher_entity_info(type
);
158 return info
? info
->description
: NULL
;
162 struct gopher_connection_info
{
163 struct gopher_entity_info
*entity
;
166 unsigned char command
[1];
169 /* De-escape a selector into a command. */
170 /* The % hex escapes are converted. Otherwise, the string is copied. */
172 add_uri_decoded(struct string
*command
, unsigned char *string
, int length
,
175 int oldlen
= command
->length
;
182 /* Remove plus signs 921006 */
183 if (!add_string_replace(command
, string
, length
, '+', ' '))
186 } else if (!add_bytes_to_string(command
, string
, length
)) {
190 assert(command
->length
> oldlen
);
191 /* FIXME: Decoding the whole command string should not be a problem,
192 * and I don't remember why I didn't do that in the first place.
194 decode_uri(command
->source
+ oldlen
);
196 /* Evil decode_uri_string() modifies the string */
197 command
->length
= strlen(command
->source
);
200 static struct connection_state
init_gopher_index_cache_entry(struct connection
*conn
);
202 static struct connection_state
203 add_gopher_command(struct connection
*conn
, struct string
*command
,
204 enum gopher_entity entity
,
205 unsigned char *selector
, int selectorlen
)
207 unsigned char *query
;
210 if (!init_string(command
))
211 return connection_state(S_OUT_OF_MEM
);
213 /* Look for search string */
214 query
= memchr(selector
, '?', selectorlen
);
216 /* Check if no search is required */
217 if (!query
|| !query
[1]) {
219 if (query
) selectorlen
-= 1;
224 querylen
= selector
+ selectorlen
- query
;
226 selectorlen
-= querylen
+ 1;
227 if (querylen
>= 7 && !c_strncasecmp(query
, "search=", 7)) {
235 /* No search required? */
237 done_string(command
);
238 return init_gopher_index_cache_entry(conn
);
241 add_uri_decoded(command
, selector
, selectorlen
, 0);
242 add_char_to_string(command
, '\t');
243 add_uri_decoded(command
, query
, querylen
, 1);
247 /* No search required */
249 done_string(command
);
250 /* Display "cover page" */
252 return init_gopher_cso_cache_entry(conn
);
254 return connection_state(S_GOPHER_CSO_ERROR
);
257 add_uri_decoded(command
, selector
, selectorlen
, 0);
258 add_to_string(command
, "query ");
259 add_uri_decoded(command
, query
, querylen
, 1);
264 add_uri_decoded(command
, selector
, selectorlen
, 0);
267 add_crlf_to_string(command
);
269 return connection_state(S_CONN
);
272 static struct connection_state
273 init_gopher_connection_info(struct connection
*conn
)
275 struct gopher_connection_info
*gopher
;
276 struct connection_state state
;
277 struct string command
;
278 enum gopher_entity entity
= DEFAULT_GOPHER_ENTITY
;
279 unsigned char *selector
= conn
->uri
->data
;
280 int selectorlen
= conn
->uri
->datalen
;
281 struct gopher_entity_info
*entity_info
;
284 /* Get entity type, and selector string. */
285 /* Pick up gopher_entity */
286 if (selectorlen
> 1 && selector
[1] == '/') {
287 entity
= *selector
++;
291 /* This is probably a hack. It serves as a work around when no entity is
292 * available in the Gopher URI. Instead of segfaulting later the content
293 * will be served as application/octet-stream. However, it could
294 * possible break handling Gopher URIs with entities which are really
295 * unknown because parts of the real Gopher entity character is added to
296 * the selector. A possible work around is to always expect a '/'
297 * _after_ the Gopher entity. If the <entity-char> '/' combo is not
298 * found assume that the whole URI data part is the selector. */
299 entity_info
= get_gopher_entity_info(entity
);
300 if (entity_info
->type
== GOPHER_UNKNOWN
&& entity
!= GOPHER_UNKNOWN
) {
305 state
= add_gopher_command(conn
, &command
, entity
, selector
, selectorlen
);
306 if (!is_in_state(state
, S_CONN
))
309 /* Atleast the command should contain \r\n to ask the server
311 assert(command
.length
>= 2);
313 assert(conn
->info
== NULL
);
314 assert(conn
->done
== NULL
);
316 done_string(&command
);
317 return connection_state(S_INTERNAL
);
320 size
= sizeof(*gopher
) + command
.length
;
321 gopher
= mem_calloc(1, size
);
323 done_string(&command
);
324 return connection_state(S_OUT_OF_MEM
);
327 gopher
->entity
= entity_info
;
328 gopher
->commandlen
= command
.length
;
330 memcpy(gopher
->command
, command
.source
, command
.length
);
331 done_string(&command
);
335 return connection_state(S_CONN
);
339 /* Add a link. The title of the destination is set, as there is no way of
340 * knowing what the title is when we arrive.
342 * text points to the text to be put into the file, 0 terminated.
343 * addr points to the hypertext reference address 0 terminated.
347 add_gopher_link(struct string
*buffer
, const unsigned char *text
,
348 const unsigned char *addr
)
350 add_format_to_string(buffer
, "<a href=\"%s\">%s</a>",
355 add_gopher_search_field(struct string
*buffer
, const unsigned char *text
,
356 const unsigned char *addr
)
358 add_format_to_string(buffer
,
359 "<form action=\"%s\">"
363 "<td><input maxlength=\"256\" name=\"search\" value=\"\"></td>"
364 "<td><input type=submit value=\"Search\"></td>"
371 add_gopher_description(struct string
*buffer
, enum gopher_entity entity
)
373 unsigned char *description
= get_gopher_entity_description(entity
);
378 add_to_string(buffer
, "<b>");
379 add_to_string(buffer
, description
);
380 add_to_string(buffer
, "</b> ");
384 encode_selector_string(struct string
*buffer
, unsigned char *selector
)
386 unsigned char *slashes
;
388 /* Rather hackishly only convert slashes if there are
389 * two successive ones. */
390 while ((slashes
= strstr(selector
, "//"))) {
392 encode_uri_string(buffer
, selector
, -1, 0);
393 encode_uri_string(buffer
, "//", 2, 1);
395 selector
= slashes
+ 2;
398 encode_uri_string(buffer
, selector
, -1, 0);
402 add_gopher_menu_line(struct string
*buffer
, unsigned char *line
)
404 /* Gopher menu fields */
405 unsigned char *name
= line
;
406 unsigned char *selector
= NULL
;
407 unsigned char *host
= NULL
;
408 unsigned char *port
= NULL
;
409 enum gopher_entity entity
= *name
++;
412 add_char_to_string(buffer
, '\n');
417 selector
= strchr(name
, ASCII_TAB
);
422 /* Gopher+ Type=0+ objects can be binary, and will have
423 * 9 or 5 beginning their selector. Make sure we don't
424 * trash the terminal by treating them as text. - FM */
425 if (entity
== GOPHER_FILE
426 && (*selector
== GOPHER_BINARY
||
427 *selector
== GOPHER_PCBINARY
))
431 host
= selector
? strchr(selector
, ASCII_TAB
) : NULL
;
433 /* Terminate selector */
437 port
= host
? strchr(host
, ASCII_TAB
) : NULL
;
443 portno
= strtol(port
+ 1, (char **) &end
, 10);
444 if (errno
|| !uri_port_is_valid(portno
)) {
448 /* Try to wipe out the default gopher port
449 * number from being appended to links. */
451 && entity_needs_gopher_access(entity
))
454 /* If the port number is 0 it means no port
455 * number is needed in which case it can be
456 * wiped out completely. Else append it to the
457 * host string a la W3. */
463 /* Chop port if there is junk after the
471 /* Nameless files are separator lines */
472 if (entity
== GOPHER_FILE
) {
473 int i
= strlen(name
) - 1;
475 while (name
[i
] == ' ' && i
>= 0)
479 entity
= GOPHER_INFO
;
482 if (entity
!= GOPHER_INDEX
) {
483 add_gopher_description(buffer
, entity
);
488 /* Gopher pointer to W3 */
490 add_gopher_link(buffer
, name
, selector
);
494 /* Fall through if no selector is defined so the
495 * text is just displayed. */
498 /* Information or separator line */
499 add_to_string(buffer
, name
);
504 struct string address
;
505 unsigned char *format
= selector
&& *selector
506 ? "%s://%s@%s/" : "%s://%s%s/";
508 /* If port is defined it means that both @selector and @host
509 * was correctly parsed. */
510 if (!port
|| !init_string(&address
)) {
511 /* Parse error: Bad menu item */
512 add_to_string(buffer
, name
);
516 assert(selector
&& host
);
518 if (entity
== GOPHER_TELNET
) {
519 add_format_to_string(&address
, format
,
520 "telnet", selector
, host
);
522 } else if (entity
== GOPHER_TN3270
) {
523 add_format_to_string(&address
, format
,
524 "tn3270", selector
, host
);
527 add_format_to_string(&address
, "gopher://%s/%c",
530 /* Encode selector string */
531 encode_selector_string(&address
, selector
);
534 /* Error response from Gopher doesn't deserve to
536 if (entity
== GOPHER_INDEX
) {
537 add_gopher_search_field(buffer
, name
, address
.source
);
539 } else if (address
.length
> 0
540 && strlcmp(address
.source
, address
.length
- 1,
541 "gopher://error.host:1/", -1)) {
542 add_gopher_link(buffer
, name
, address
.source
);
545 add_to_string(buffer
, name
);
548 done_string(&address
);
552 add_char_to_string(buffer
, '\n');
556 /* Search for line ending \r\n pair */
557 static unsigned char *
558 get_gopher_line_end(unsigned char *data
, int datalen
)
560 for (; datalen
> 1; data
++, datalen
--)
561 if (data
[0] == ASCII_CR
&& data
[1] == ASCII_LF
)
567 static inline unsigned char *
568 check_gopher_last_line(unsigned char *line
, unsigned char *end
)
572 /* Just to be safe NUL terminate the line */
575 return line
[0] == '.' && !line
[1] ? NULL
: line
;
578 /* Parse a Gopher Menu document */
579 static struct connection_state
580 read_gopher_directory_data(struct connection
*conn
, struct read_buffer
*rb
)
582 struct connection_state state
= connection_state(S_TRANS
);
583 struct string buffer
;
586 if (conn
->from
== 0) {
587 struct connection_state state
;
589 state
= init_directory_listing(&buffer
, conn
->uri
);
590 if (!is_in_state(state
, S_OK
))
593 } else if (!init_string(&buffer
)) {
594 return connection_state(S_OUT_OF_MEM
);
597 while ((end
= get_gopher_line_end(rb
->data
, rb
->length
))) {
598 unsigned char *line
= check_gopher_last_line(rb
->data
, end
);
600 /* Break on line with a dot by itself */
602 state
= connection_state(S_OK
);
606 add_gopher_menu_line(&buffer
, line
);
607 conn
->received
+= end
- rb
->data
;
608 kill_buffer_data(rb
, end
- rb
->data
);
611 if (!is_in_state(state
, S_TRANS
)
612 || conn
->socket
->state
== SOCKET_CLOSED
)
613 add_to_string(&buffer
,
618 add_fragment(conn
->cached
, conn
->from
, buffer
.source
, buffer
.length
);
619 conn
->from
+= buffer
.length
;
621 done_string(&buffer
);
627 static struct cache_entry
*
628 init_gopher_cache_entry(struct connection
*conn
)
630 struct gopher_connection_info
*gopher
= conn
->info
;
631 struct cache_entry
*cached
= get_cache_entry(conn
->uri
);
633 if (!cached
) return NULL
;
635 conn
->cached
= cached
;
637 if (!cached
->content_type
640 && gopher
->entity
->content_type
) {
641 cached
->content_type
= stracpy(gopher
->entity
->content_type
);
642 if (!cached
->content_type
) return NULL
;
648 /* Display a Gopher Index document. */
649 static struct connection_state
650 init_gopher_index_cache_entry(struct connection
*conn
)
652 unsigned char *where
;
653 struct string buffer
;
655 if (!init_gopher_cache_entry(conn
)
656 || !init_string(&buffer
))
657 return connection_state(S_OUT_OF_MEM
);
659 where
= get_uri_string(conn
->uri
, URI_PUBLIC
);
661 /* TODO: Use different function when using UTF-8
662 * in terminal (decode_uri_for_display replaces
663 * bytes of UTF-8 characters width '*'). */
664 if (where
) decode_uri_for_display(where
);
666 add_format_to_string(&buffer
,
669 "<title>Searchable gopher index at %s</title>\n"
672 "<h1>Searchable gopher index at %s</h1>\n",
673 empty_string_or_(where
), empty_string_or_(where
));
676 add_gopher_search_field(&buffer
, "Please enter search keywords",
682 /* FIXME: I think this needs a form or something */
684 add_fragment(conn
->cached
, conn
->from
, buffer
.source
, buffer
.length
);
685 conn
->from
+= buffer
.length
;
686 done_string(&buffer
);
688 conn
->cached
->content_type
= stracpy("text/html");
690 return conn
->cached
->content_type
691 ? connection_state(S_OK
)
692 : connection_state(S_OUT_OF_MEM
);
697 read_gopher_response_data(struct socket
*socket
, struct read_buffer
*rb
)
699 struct connection
*conn
= socket
->conn
;
700 struct gopher_connection_info
*gopher
= conn
->info
;
701 struct connection_state state
= connection_state(S_TRANS
);
703 assert(gopher
&& gopher
->entity
);
705 if (!conn
->cached
&& !init_gopher_cache_entry(conn
)) {
706 abort_connection(conn
, connection_state(S_OUT_OF_MEM
));
710 /* Now read the data from the socket */
711 switch (gopher
->entity
->type
) {
712 case GOPHER_DIRECTORY
:
714 state
= read_gopher_directory_data(conn
, rb
);
719 /* FIXME: Merge CSO support */
720 state
= read_gopher_cso_data(conn
, rb
);
722 state
= connection_state(S_GOPHER_CSO_ERROR
);
726 case GOPHER_PLUS_SOUND
:
727 case GOPHER_PLUS_MOVIE
:
728 case GOPHER_PLUS_PDF
:
729 case GOPHER_MACBINHEX
:
730 case GOPHER_PCBINARY
:
731 case GOPHER_UUENCODED
:
738 case GOPHER_PLUS_IMAGE
:
740 /* Add the received data as a new cache entry fragment and do
741 * the connection data accounting. */
742 add_fragment(conn
->cached
, conn
->from
, rb
->data
, rb
->length
);
744 conn
->received
+= rb
->length
;
745 conn
->from
+= rb
->length
;
747 kill_buffer_data(rb
, rb
->length
);
750 /* Has the transport layer forced a shut down? */
751 if (socket
->state
== SOCKET_CLOSED
) {
752 state
= connection_state(S_OK
);
755 if (!is_in_state(state
, S_TRANS
)) {
756 abort_connection(conn
, state
);
760 read_from_socket(conn
->socket
, rb
, connection_state(S_TRANS
),
761 read_gopher_response_data
);
766 send_gopher_command(struct socket
*socket
)
768 struct connection
*conn
= socket
->conn
;
769 struct gopher_connection_info
*gopher
= conn
->info
;
771 request_from_socket(socket
, gopher
->command
, gopher
->commandlen
,
772 connection_state(S_SENT
), SOCKET_END_ONCLOSE
,
773 read_gopher_response_data
);
777 /* FIXME: No decoding of strange data types as yet. */
779 gopher_protocol_handler(struct connection
*conn
)
781 struct uri
*uri
= conn
->uri
;
782 struct connection_state state
= connection_state(S_CONN
);
784 switch (get_uri_port(uri
)) {
786 /* If it's a port 105 GOPHER_CSO gopher_entity with no ISINDEX
787 * token ('?'), use the form-based CSO gateway (otherwise,
788 * return an ISINDEX cover page or do the ISINDEX search).
790 if (uri
->datalen
== 1 && *uri
->data
== GOPHER_CSO
) {
791 /* FIXME: redirect_cache() */
792 abort_connection(conn
,
793 connection_state(S_GOPHER_CSO_ERROR
));
799 /* This is outcommented because it apparently means that the
800 * finger protocol handler needs to be extended for handling
801 * this the way Lynx does. --jonas */
802 /* If it's a port 79/0[/...] URL, use the finger gateway.
804 if (uri
->datalen
>= 1 && *uri
->data
== GOPHER_FILE
) {
805 /* FIXME: redirect_cache() */
806 abort_connection(conn
, connection_state(S_OK
));
812 state
= init_gopher_connection_info(conn
);
813 if (!is_in_state(state
, S_CONN
)) {
814 /* FIXME: Handle bad selector ... */
815 abort_connection(conn
, state
);
819 /* Set up a socket to the server for the data */
821 make_connection(conn
->socket
, conn
->uri
, send_gopher_command
,
822 conn
->cache_mode
>= CACHE_MODE_FORCE_RELOAD
);