2 * Copyright 2013-2014 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
6 * François Revol, revol@free.fr
15 #include <Directory.h>
16 #include <DynamicBuffer.h>
18 #include <GopherRequest.h>
22 #include <StackOrHeapArray.h>
24 #include <StringList.h>
27 * TODO: fix '+' in selectors, cf. gopher://gophernicus.org/1/doc/gopher/
28 * TODO: add proper favicon
29 * TODO: add proper dir and document icons
30 * TODO: correctly eat the extraneous .\r\n at end of text files
31 * TODO: move parsing stuff to a translator?
34 * gopher://gopher.floodgap.com/1/gopher/tech
35 * gopher://gopher.floodgap.com/0/overbite/dbrowse?pluginm%201
38 * gopher://sdf.org/1/sdf/historical images
39 * gopher://gopher.r-36.net/1/ large photos
40 * gopher://sdf.org/1/sdf/classes binaries
41 * gopher://sdf.org/1/users/ long page
42 * gopher://jgw.mdns.org/1/ search items
43 * gopher://jgw.mdns.org/1/MISC/ 's' item (sound)
44 * gopher://gopher.floodgap.com/1/gopher broken link
45 * gopher://sdf.org/1/maps/m missing lines
46 * gopher://sdf.org/1/foo gophernicus reports errors incorrectly
47 * gopher://gopher.floodgap.com/1/foo correct error report
50 /** Type of Gopher items */
52 GOPHER_TYPE_NONE
= 0, /**< none set */
53 GOPHER_TYPE_ENDOFPAGE
= '.', /**< a dot alone on a line */
54 /* these come from http://tools.ietf.org/html/rfc1436 */
55 GOPHER_TYPE_TEXTPLAIN
= '0', /**< text/plain */
56 GOPHER_TYPE_DIRECTORY
= '1', /**< gopher directory */
57 GOPHER_TYPE_CSO_SEARCH
= '2', /**< CSO search */
58 GOPHER_TYPE_ERROR
= '3', /**< error message */
59 GOPHER_TYPE_BINHEX
= '4', /**< binhex encoded text */
60 GOPHER_TYPE_BINARCHIVE
= '5', /**< binary archive file */
61 GOPHER_TYPE_UUENCODED
= '6', /**< uuencoded text */
62 GOPHER_TYPE_QUERY
= '7', /**< gopher search query */
63 GOPHER_TYPE_TELNET
= '8', /**< telnet link */
64 GOPHER_TYPE_BINARY
= '9', /**< generic binary */
65 GOPHER_TYPE_DUPSERV
= '+', /**< duplicated server */
66 GOPHER_TYPE_GIF
= 'g', /**< GIF image */
67 GOPHER_TYPE_IMAGE
= 'I', /**< image (depends, usually jpeg) */
68 GOPHER_TYPE_TN3270
= 'T', /**< tn3270 session */
69 /* not standardized but widely used,
70 * cf. http://en.wikipedia.org/wiki/Gopher_%28protocol%29#Gopher_item_types
72 GOPHER_TYPE_HTML
= 'h', /**< HTML file or URL */
73 GOPHER_TYPE_INFO
= 'i', /**< information text */
74 GOPHER_TYPE_AUDIO
= 's', /**< audio (wav?) */
75 /* not standardized, some servers use them */
76 GOPHER_TYPE_DOC
= 'd', /**< gophernicus uses it for PS and PDF */
77 GOPHER_TYPE_PNG
= 'p', /**< PNG image */
78 /* cf. gopher://namcub.accelera-labs.com/1/pics */
79 GOPHER_TYPE_MIME
= 'M', /**< multipart/mixed MIME data */
80 /* cf. http://www.pms.ifi.lmu.de/mitarbeiter/ohlbach/multimedia/IT/IBMtutorial/3376c61.html */
81 /* cf. http://nofixedpoint.motd.org/2011/02/22/an-introduction-to-the-gopher-protocol/ */
82 GOPHER_TYPE_PDF
= 'P', /**< PDF file */
83 GOPHER_TYPE_BITMAP
= ':', /**< Bitmap image (Gopher+) */
84 GOPHER_TYPE_MOVIE
= ';', /**< Movie (Gopher+) */
85 GOPHER_TYPE_SOUND
= '<', /**< Sound (Gopher+) */
86 GOPHER_TYPE_CALENDAR
= 'c', /**< Calendar */
87 GOPHER_TYPE_EVENT
= 'e', /**< Event */
88 GOPHER_TYPE_MBOX
= 'm', /**< mbox file */
91 /** Types of fields in a line */
99 FIELD_COUNT
= FIELD_EOL
102 /** Map of gopher types to MIME types */
104 gopher_item_type type
;
106 } gopher_type_map
[] = {
107 /* these come from http://tools.ietf.org/html/rfc1436 */
108 { GOPHER_TYPE_TEXTPLAIN
, "text/plain" },
109 { GOPHER_TYPE_DIRECTORY
, "text/html;charset=UTF-8" },
110 { GOPHER_TYPE_QUERY
, "text/html;charset=UTF-8" },
111 { GOPHER_TYPE_GIF
, "image/gif" },
112 { GOPHER_TYPE_HTML
, "text/html" },
113 /* those are not standardized */
114 { GOPHER_TYPE_PDF
, "application/pdf" },
115 { GOPHER_TYPE_PNG
, "image/png"},
116 { GOPHER_TYPE_NONE
, NULL
}
119 static const char *kStyleSheet
= "\n"
121 " * gopher listing style\n"
125 " /* margin: 10px;*/\n"
126 " background-color: Window;\n"
127 " color: WindowText;\n"
128 " font-size: 100%;\n"
129 " padding-bottom: 2em; }\n"
131 "body#gopher div.uplink {\n"
134 " position: fixed;\n"
141 " border-bottom: 2px solid #777; }\n"
143 "body#gopher span {\n"
144 " margin-left: 1em;\n"
145 " padding-left: 2em;\n"
146 " font-family: 'DejaVu Sans Mono', Courier, monospace;\n"
147 " word-wrap: break-word;\n"
148 " white-space: pre-wrap; }\n"
150 "body#gopher span.error {\n"
153 "body#gopher span.unknown {\n"
156 "body#gopher span.dir {\n"
157 " background-image: url('resource:icons/directory.png');\n"
158 " background-repeat: no-repeat;\n"
159 " background-position: bottom left; }\n"
161 "body#gopher span.text {\n"
162 " background-image: url('resource:icons/content.png');\n"
163 " background-repeat: no-repeat;\n"
164 " background-position: bottom left; }\n"
166 "body#gopher span.query {\n"
167 " background-image: url('resource:icons/search.png');\n"
168 " background-repeat: no-repeat;\n"
169 " background-position: bottom left; }\n"
171 "body#gopher span.img img {\n"
173 " margin-left:auto;\n"
174 " margin-right:auto; }\n";
176 static const int32 kGopherBufferSize
= 4096;
178 static const bool kInlineImages
= true;
181 BGopherRequest::BGopherRequest(const BUrl
& url
, BUrlProtocolListener
* listener
,
182 BUrlContext
* context
)
184 BNetworkRequest(url
, listener
, context
, "BUrlProtocol.Gopher", "gopher"),
185 fItemType(GOPHER_TYPE_NONE
),
189 fSocket
= new(std::nothrow
) BSocket();
192 // the first part of the path is actually the document type
194 fPath
= Url().Path();
195 if (!Url().HasPath() || fPath
.Length() == 0 || fPath
== "/") {
197 fItemType
= GOPHER_TYPE_DIRECTORY
;
199 } else if (fPath
.Length() > 1 && fPath
[0] == '/') {
200 fItemType
= fPath
[1];
206 BGopherRequest::~BGopherRequest()
215 BGopherRequest::Stop()
217 if (fSocket
!= NULL
) {
218 fSocket
->Disconnect();
219 // Unlock any pending connect, read or write operation.
221 return BNetworkRequest::Stop();
226 BGopherRequest::Result() const
233 BGopherRequest::_ProtocolLoop()
238 if (!_ResolveHostName(fUrl
.Host(), fUrl
.HasPort() ? fUrl
.Port() : 70)) {
239 _EmitDebug(B_URL_PROTOCOL_DEBUG_ERROR
,
240 "Unable to resolve hostname (%s), aborting.",
241 fUrl
.Host().String());
242 return B_SERVER_NOT_FOUND
;
245 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
, "Connection to %s on port %d.",
246 fUrl
.Authority().String(), fRemoteAddr
.Port());
247 status_t connectError
= fSocket
->Connect(fRemoteAddr
);
249 if (connectError
!= B_OK
) {
250 _EmitDebug(B_URL_PROTOCOL_DEBUG_ERROR
, "Socket connection error %s",
251 strerror(connectError
));
255 //! ProtocolHook:ConnectionOpened
256 if (fListener
!= NULL
)
257 fListener
->ConnectionOpened(this);
259 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
,
260 "Connection opened, sending request.");
263 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
, "Request sent.");
266 bool receiveEnd
= false;
267 status_t readError
= B_OK
;
268 ssize_t bytesRead
= 0;
269 //ssize_t bytesReceived = 0;
270 //ssize_t bytesTotal = 0;
271 bool dataValidated
= false;
272 BStackOrHeapArray
<char, 4096> chunk(kGopherBufferSize
);
274 while (!fQuit
&& !receiveEnd
) {
275 fSocket
->WaitForReadable();
276 bytesRead
= fSocket
->Read(chunk
, kGopherBufferSize
);
279 readError
= bytesRead
;
281 } else if (bytesRead
== 0)
284 fInputBuffer
.AppendData(chunk
, bytesRead
);
286 if (!dataValidated
) {
288 // on error (file doesn't exist, ...) the server sends
289 // a faked directory entry with an error message
290 if (fInputBuffer
.Size() && fInputBuffer
.Data()[0] == '3') {
294 // make sure the buffer only contains printable characters
295 // and has at least 3 tabs before a CRLF
296 for (i
= 0; i
< fInputBuffer
.Size(); i
++) {
297 char c
= fInputBuffer
.Data()[i
];
301 } else if (c
== '\r' || c
== '\n') {
305 } else if (!isprint(fInputBuffer
.Data()[i
])) {
310 if (crlf
&& tabs
> 2 && tabs
< 5) {
314 fItemType
= GOPHER_TYPE_DIRECTORY
;
315 readError
= B_RESOURCE_NOT_FOUND
;
316 // continue parsing the error text anyway
319 // special case for buggy(?) Gophernicus/1.5
320 static const char *buggy
= "Error: File or directory not found!";
321 if (fInputBuffer
.Size() > strlen(buggy
)
322 && !memcmp(fInputBuffer
.Data(), buggy
, strlen(buggy
))) {
323 fItemType
= GOPHER_TYPE_DIRECTORY
;
324 readError
= B_RESOURCE_NOT_FOUND
;
325 // continue parsing the error text anyway
326 // but it won't look good
330 // now we probably have correct data
331 dataValidated
= true;
333 //! ProtocolHook:ResponseStarted
334 if (fListener
!= NULL
)
335 fListener
->ResponseStarted(this);
337 // we don't really have headers but well...
338 //! ProtocolHook:HeadersReceived
339 if (fListener
!= NULL
)
340 fListener
->HeadersReceived(this);
342 // now we can assign MIME type if we know it
343 const char *mime
= "application/octet-stream";
344 for (i
= 0; gopher_type_map
[i
].type
!= GOPHER_TYPE_NONE
; i
++) {
345 if (gopher_type_map
[i
].type
== fItemType
) {
346 mime
= gopher_type_map
[i
].mime
;
350 fResult
.SetContentType(mime
);
354 _ParseInput(receiveEnd
);
355 else if (fInputBuffer
.Size()) {
356 // send input directly
357 if (fListener
!= NULL
) {
358 fListener
->DataReceived(this, (const char *)fInputBuffer
.Data(),
359 fPosition
, fInputBuffer
.Size());
362 fPosition
+= fInputBuffer
.Size();
364 // XXX: this is plain stupid, we already copied the data
365 // and just want to drop it...
366 char *inputTempBuffer
= new(std::nothrow
) char[bytesRead
];
367 if (inputTempBuffer
== NULL
) {
368 readError
= B_NO_MEMORY
;
371 fInputBuffer
.RemoveData(inputTempBuffer
, fInputBuffer
.Size());
372 delete[] inputTempBuffer
;
377 fResult
.SetLength(fPosition
);
378 if (fListener
!= NULL
)
379 fListener
->DownloadProgress(this, fPosition
, fPosition
);
382 fSocket
->Disconnect();
384 if (readError
!= B_OK
)
387 return fQuit
? B_INTERRUPTED
: B_OK
;
392 BGopherRequest::_SendRequest()
398 if (Url().HasRequest())
399 request
<< '\t' << Url().Request();
403 fSocket
->Write(request
.String(), request
.Length());
408 BGopherRequest::_NeedsParsing()
410 if (fItemType
== GOPHER_TYPE_DIRECTORY
411 || fItemType
== GOPHER_TYPE_QUERY
)
418 BGopherRequest::_NeedsLastDotStrip()
420 if (fItemType
== GOPHER_TYPE_DIRECTORY
421 || fItemType
== GOPHER_TYPE_QUERY
422 || fItemType
== GOPHER_TYPE_TEXTPLAIN
)
429 BGopherRequest::_ParseInput(bool last
)
433 while (_GetLine(line
) == B_OK
) {
434 char type
= GOPHER_TYPE_NONE
;
437 line
.MoveInto(&type
, 0, 1);
439 line
.Split("\t", false, fields
);
441 if (type
!= GOPHER_TYPE_ENDOFPAGE
442 && fields
.CountStrings() < FIELD_GPFLAG
)
443 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
,
444 "Unterminated gopher item (type '%c')", type
);
448 BString title
= fields
.StringAt(FIELD_NAME
);
449 BString
link("gopher://");
451 if (fields
.CountStrings() > 3) {
452 link
<< fields
.StringAt(FIELD_HOST
);
453 if (fields
.StringAt(FIELD_PORT
).Length())
454 link
<< ":" << fields
.StringAt(FIELD_PORT
);
456 //if (fields.StringAt(FIELD_SELECTOR).ByteAt(0) != '/')
458 link
<< fields
.StringAt(FIELD_SELECTOR
);
460 _HTMLEscapeString(title
);
461 _HTMLEscapeString(link
);
464 case GOPHER_TYPE_ENDOFPAGE
:
465 /* end of the page */
467 case GOPHER_TYPE_TEXTPLAIN
:
468 item
<< "<a href=\"" << link
<< "\">"
469 "<span class=\"text\">" << title
<< "</span></a>"
472 case GOPHER_TYPE_BINARY
:
473 case GOPHER_TYPE_BINHEX
:
474 case GOPHER_TYPE_BINARCHIVE
:
475 case GOPHER_TYPE_UUENCODED
:
476 item
<< "<a href=\"" << link
<< "\">"
477 "<span class=\"binary\">" << title
<< "</span></a>"
480 case GOPHER_TYPE_DIRECTORY
:
484 item
<< "<a href=\"" << link
<< "\">"
485 "<span class=\"dir\">" << title
<< "</span></a>"
488 case GOPHER_TYPE_ERROR
:
489 item
<< "<span class=\"error\">" << title
<< "</span>"
491 if (fPosition
== 0 && pageTitle
.Length() == 0)
492 pageTitle
<< "Error: " << title
;
494 case GOPHER_TYPE_QUERY
:
495 /* TODO: handle search better.
496 * For now we use an unnamed input field and accept sending ?=foo
497 * as it seems at least Veronica-2 ignores the = but it's unclean.
499 item
<< "<form method=\"get\" action=\"" << link
<< "\" "
500 "onsubmit=\"window.location = this.action + '?' + "
501 "this.elements['q'].value; return false;\">"
502 "<span class=\"query\">"
503 "<label>" << title
<< " "
504 "<input id=\"q\" name=\"\" type=\"text\" align=\"right\" />"
509 case GOPHER_TYPE_TELNET
:
511 * cf. gopher://78.80.30.202/1/ps3
512 * -> gopher://78.80.30.202:23/8/ps3/new -> new@78.80.30.202
515 user
= fields
.StringAt(FIELD_SELECTOR
);
516 if (user
.FindLast('/') > -1) {
517 user
.Remove(0, user
.FindLast('/'));
520 link
<< fields
.StringAt(FIELD_HOST
);
521 if (fields
.StringAt(FIELD_PORT
) != "23")
522 link
<< ":" << fields
.StringAt(FIELD_PORT
);
524 item
<< "<a href=\"" << link
<< "\">"
525 "<span class=\"telnet\">" << title
<< "</span></a>"
528 case GOPHER_TYPE_TN3270
:
529 /* tn3270: URI scheme, cf. http://tools.ietf.org/html/rfc6270 */
531 user
= fields
.StringAt(FIELD_SELECTOR
);
532 if (user
.FindLast('/') > -1) {
533 user
.Remove(0, user
.FindLast('/'));
536 link
<< fields
.StringAt(FIELD_HOST
);
537 if (fields
.StringAt(FIELD_PORT
) != "23")
538 link
<< ":" << fields
.StringAt(FIELD_PORT
);
540 item
<< "<a href=\"" << link
<< "\">"
541 "<span class=\"telnet\">" << title
<< "</span></a>"
544 case GOPHER_TYPE_CSO_SEARCH
:
546 * At least Lynx supports a cso:// URI scheme:
547 * http://lynx.isc.org/lynx2.8.5/lynx2-8-5/lynx_help/lynx_url_support.html
550 user
= fields
.StringAt(FIELD_SELECTOR
);
551 if (user
.FindLast('/') > -1) {
552 user
.Remove(0, user
.FindLast('/'));
555 link
<< fields
.StringAt(FIELD_HOST
);
556 if (fields
.StringAt(FIELD_PORT
) != "105")
557 link
<< ":" << fields
.StringAt(FIELD_PORT
);
559 item
<< "<a href=\"" << link
<< "\">"
560 "<span class=\"cso\">" << title
<< "</span></a>"
563 case GOPHER_TYPE_GIF
:
564 case GOPHER_TYPE_IMAGE
:
565 case GOPHER_TYPE_PNG
:
566 case GOPHER_TYPE_BITMAP
:
567 /* quite dangerous, cf. gopher://namcub.accela-labs.com/1/pics */
569 item
<< "<a href=\"" << link
<< "\">"
570 "<span class=\"img\">" << title
<< " "
571 "<img src=\"" << link
<< "\" "
572 "alt=\"" << title
<< "\"/>"
577 /* fallback to default, link them */
578 item
<< "<a href=\"" << link
<< "\">"
579 "<span class=\"img\">" << title
<< "</span></a>"
582 case GOPHER_TYPE_HTML
:
583 /* cf. gopher://pineapple.vg/1 */
584 if (fields
.StringAt(FIELD_SELECTOR
).StartsWith("URL:")) {
585 link
= fields
.StringAt(FIELD_SELECTOR
);
588 /* cf. gopher://sdf.org/1/sdf/classes/ */
590 item
<< "<a href=\"" << link
<< "\">"
591 "<span class=\"html\">" << title
<< "</span></a>"
594 case GOPHER_TYPE_INFO
:
595 // TITLE resource, cf.
596 // gopher://gophernicus.org/0/doc/gopher/gopher-title-resource.txt
597 if (fPosition
== 0 && pageTitle
.Length() == 0
598 && fields
.StringAt(FIELD_SELECTOR
) == "TITLE") {
602 item
<< "<span class=\"info\">" << title
<< "</span>"
605 case GOPHER_TYPE_AUDIO
:
606 case GOPHER_TYPE_SOUND
:
607 item
<< "<a href=\"" << link
<< "\">"
608 "<span class=\"audio\">" << title
<< "</span></a>"
609 "<audio src=\"" << link
<< "\" "
610 //TODO:Fix crash in WebPositive with these
611 //"controls=\"controls\" "
612 //"width=\"300\" height=\"50\" "
613 "alt=\"" << title
<< "\"/>"
614 "<span>[player]</span></audio>"
617 case GOPHER_TYPE_PDF
:
618 case GOPHER_TYPE_DOC
:
619 /* generic case for known-to-work items */
620 item
<< "<a href=\"" << link
<< "\">"
621 "<span class=\"document\">" << title
<< "</span></a>"
624 case GOPHER_TYPE_MOVIE
:
625 item
<< "<a href=\"" << link
<< "\">"
626 "<span class=\"video\">" << title
<< "</span></a>"
627 "<video src=\"" << link
<< "\" "
628 //TODO:Fix crash in WebPositive with these
629 //"controls=\"controls\" "
630 //"width=\"300\" height=\"300\" "
631 "alt=\"" << title
<< "\"/>"
632 "<span>[player]</span></audio>"
636 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
,
637 "Unknown gopher item (type 0x%02x '%c')", type
, type
);
638 item
<< "<a href=\"" << link
<< "\">"
639 "<span class=\"unknown\">" << title
<< "</span></a>"
644 if (fPosition
== 0) {
645 if (pageTitle
.Length() == 0)
646 pageTitle
<< "Index of " << Url();
648 const char *uplink
= ".";
649 if (fPath
.EndsWith("/"))
657 "<meta http-equiv=\"Content-Type\""
658 " content=\"text/html; charset=UTF-8\" />\n"
660 //"<link rel=\"icon\" type=\"image/png\""
661 // " href=\"resource:icons/directory.png\">\n"
662 "<style type=\"text/css\">\n" << kStyleSheet
<< "</style>\n"
663 "<title>" << pageTitle
<< "</title>\n"
665 "<body id=\"gopher\">\n"
666 "<div class=\"uplink dontprint\">\n"
667 "<a href=" << uplink
<< ">[up]</a>\n"
668 "<a href=\"/\">[top]</a>\n"
670 "<h1>" << pageTitle
<< "</h1>\n";
672 fListener
->DataReceived(this, header
.String(), fPosition
,
675 fPosition
+= header
.Length();
679 fListener
->DataReceived(this, item
.String(), fPosition
,
682 fPosition
+= item
.Length();
693 fListener
->DataReceived(this, footer
.String(), fPosition
,
696 fPosition
+= footer
.Length();
702 BGopherRequest::_HTMLEscapeString(BString
&str
)
704 str
.ReplaceAll("&", "&");
705 str
.ReplaceAll("<", "<");
706 str
.ReplaceAll(">", ">");