2 * Copyright 2013-2014 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
6 * François Revol, revol@free.fr
15 #include <Directory.h>
16 #include <DynamicBuffer.h>
18 #include <GopherRequest.h>
22 #include <StackOrHeapArray.h>
24 #include <StringList.h>
27 * TODO: fix '+' in selectors, cf. gopher://gophernicus.org/1/doc/gopher/
28 * TODO: add proper favicon
29 * TODO: add proper dir and document icons
30 * TODO: correctly eat the extraneous .\r\n at end of text files
31 * TODO: move parsing stuff to a translator?
34 * gopher://gopher.floodgap.com/1/gopher/tech
35 * gopher://gopher.floodgap.com/0/overbite/dbrowse?pluginm%201
38 * gopher://sdf.org/1/sdf/historical images
39 * gopher://gopher.r-36.net/1/ large photos
40 * gopher://sdf.org/1/sdf/classes binaries
41 * gopher://sdf.org/1/users/ long page
42 * gopher://jgw.mdns.org/1/ search items
43 * gopher://jgw.mdns.org/1/MISC/ 's' item (sound)
44 * gopher://gopher.floodgap.com/1/gopher broken link
45 * gopher://sdf.org/1/maps/m missing lines
46 * gopher://sdf.org/1/foo gophernicus reports errors incorrectly
47 * gopher://gopher.floodgap.com/1/foo correct error report
50 /** Type of Gopher items */
52 GOPHER_TYPE_NONE
= 0, /**< none set */
53 GOPHER_TYPE_ENDOFPAGE
= '.', /**< a dot alone on a line */
54 /* these come from http://tools.ietf.org/html/rfc1436 */
55 GOPHER_TYPE_TEXTPLAIN
= '0', /**< text/plain */
56 GOPHER_TYPE_DIRECTORY
= '1', /**< gopher directory */
57 GOPHER_TYPE_CSO_SEARCH
= '2', /**< CSO search */
58 GOPHER_TYPE_ERROR
= '3', /**< error message */
59 GOPHER_TYPE_BINHEX
= '4', /**< binhex encoded text */
60 GOPHER_TYPE_BINARCHIVE
= '5', /**< binary archive file */
61 GOPHER_TYPE_UUENCODED
= '6', /**< uuencoded text */
62 GOPHER_TYPE_QUERY
= '7', /**< gopher search query */
63 GOPHER_TYPE_TELNET
= '8', /**< telnet link */
64 GOPHER_TYPE_BINARY
= '9', /**< generic binary */
65 GOPHER_TYPE_DUPSERV
= '+', /**< duplicated server */
66 GOPHER_TYPE_GIF
= 'g', /**< GIF image */
67 GOPHER_TYPE_IMAGE
= 'I', /**< image (depends, usually jpeg) */
68 GOPHER_TYPE_TN3270
= 'T', /**< tn3270 session */
69 /* not standardized but widely used,
70 * cf. http://en.wikipedia.org/wiki/Gopher_%28protocol%29#Gopher_item_types
72 GOPHER_TYPE_HTML
= 'h', /**< HTML file or URL */
73 GOPHER_TYPE_INFO
= 'i', /**< information text */
74 GOPHER_TYPE_AUDIO
= 's', /**< audio (wav?) */
75 /* not standardized, some servers use them */
76 GOPHER_TYPE_DOC
= 'd', /**< gophernicus uses it for PS and PDF */
77 GOPHER_TYPE_PNG
= 'p', /**< PNG image */
78 /* cf. gopher://namcub.accelera-labs.com/1/pics */
79 GOPHER_TYPE_MIME
= 'M', /**< multipart/mixed MIME data */
80 /* cf. http://www.pms.ifi.lmu.de/mitarbeiter/ohlbach/multimedia/IT/IBMtutorial/3376c61.html */
81 /* cf. http://nofixedpoint.motd.org/2011/02/22/an-introduction-to-the-gopher-protocol/ */
82 GOPHER_TYPE_PDF
= 'P', /**< PDF file */
83 GOPHER_TYPE_BITMAP
= ':', /**< Bitmap image (Gopher+) */
84 GOPHER_TYPE_MOVIE
= ';', /**< Movie (Gopher+) */
85 GOPHER_TYPE_SOUND
= '<', /**< Sound (Gopher+) */
86 GOPHER_TYPE_CALENDAR
= 'c', /**< Calendar */
87 GOPHER_TYPE_EVENT
= 'e', /**< Event */
88 GOPHER_TYPE_MBOX
= 'm', /**< mbox file */
91 /** Types of fields in a line */
99 FIELD_COUNT
= FIELD_EOL
102 /** Map of gopher types to MIME types */
104 gopher_item_type type
;
106 } gopher_type_map
[] = {
107 /* these come from http://tools.ietf.org/html/rfc1436 */
108 { GOPHER_TYPE_TEXTPLAIN
, "text/plain" },
109 { GOPHER_TYPE_DIRECTORY
, "text/html;charset=UTF-8" },
110 { GOPHER_TYPE_QUERY
, "text/html;charset=UTF-8" },
111 { GOPHER_TYPE_GIF
, "image/gif" },
112 { GOPHER_TYPE_HTML
, "text/html" },
113 /* those are not standardized */
114 { GOPHER_TYPE_PDF
, "application/pdf" },
115 { GOPHER_TYPE_PNG
, "image/png"},
116 { GOPHER_TYPE_NONE
, NULL
}
119 static const char *kStyleSheet
= "\n"
121 " * gopher listing style\n"
125 " /* margin: 10px;*/\n"
126 " background-color: Window;\n"
127 " color: WindowText;\n"
128 " font-size: 100%;\n"
129 " padding-bottom: 2em; }\n"
131 "body#gopher div.uplink {\n"
134 " position: fixed;\n"
141 " border-bottom: 2px solid #777; }\n"
143 "body#gopher span {\n"
144 " margin-left: 1em;\n"
145 " padding-left: 2em;\n"
146 " font-family: 'Noto Mono', Courier, monospace;\n"
147 " word-wrap: break-word;\n"
148 " white-space: pre-wrap; }\n"
150 "body#gopher span.error {\n"
153 "body#gopher span.unknown {\n"
156 "body#gopher span.dir {\n"
157 " background-image: url('resource:icons/directory.png');\n"
158 " background-repeat: no-repeat;\n"
159 " background-position: bottom left; }\n"
161 "body#gopher span.text {\n"
162 " background-image: url('resource:icons/content.png');\n"
163 " background-repeat: no-repeat;\n"
164 " background-position: bottom left; }\n"
166 "body#gopher span.query {\n"
167 " background-image: url('resource:icons/search.png');\n"
168 " background-repeat: no-repeat;\n"
169 " background-position: bottom left; }\n"
171 "body#gopher span.img img {\n"
173 " margin-left:auto;\n"
174 " margin-right:auto; }\n";
176 static const int32 kGopherBufferSize
= 4096;
178 static const bool kInlineImages
= true;
181 BGopherRequest::BGopherRequest(const BUrl
& url
, BUrlProtocolListener
* listener
,
182 BUrlContext
* context
)
184 BNetworkRequest(url
, listener
, context
, "BUrlProtocol.Gopher", "gopher"),
185 fItemType(GOPHER_TYPE_NONE
),
189 fSocket
= new(std::nothrow
) BSocket();
192 // the first part of the path is actually the document type
194 fPath
= Url().Path();
195 if (!Url().HasPath() || fPath
.Length() == 0 || fPath
== "/") {
197 fItemType
= GOPHER_TYPE_DIRECTORY
;
199 } else if (fPath
.Length() > 1 && fPath
[0] == '/') {
200 fItemType
= fPath
[1];
206 BGopherRequest::~BGopherRequest()
215 BGopherRequest::Stop()
217 if (fSocket
!= NULL
) {
218 fSocket
->Disconnect();
219 // Unlock any pending connect, read or write operation.
221 return BNetworkRequest::Stop();
226 BGopherRequest::Result() const
233 BGopherRequest::_ProtocolLoop()
238 if (!_ResolveHostName(fUrl
.Host(), fUrl
.HasPort() ? fUrl
.Port() : 70)) {
239 _EmitDebug(B_URL_PROTOCOL_DEBUG_ERROR
,
240 "Unable to resolve hostname (%s), aborting.",
241 fUrl
.Host().String());
242 return B_SERVER_NOT_FOUND
;
245 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
, "Connection to %s on port %d.",
246 fUrl
.Authority().String(), fRemoteAddr
.Port());
247 status_t connectError
= fSocket
->Connect(fRemoteAddr
);
249 if (connectError
!= B_OK
) {
250 _EmitDebug(B_URL_PROTOCOL_DEBUG_ERROR
, "Socket connection error %s",
251 strerror(connectError
));
255 //! ProtocolHook:ConnectionOpened
256 if (fListener
!= NULL
)
257 fListener
->ConnectionOpened(this);
259 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
,
260 "Connection opened, sending request.");
263 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
, "Request sent.");
266 bool receiveEnd
= false;
267 status_t readError
= B_OK
;
268 ssize_t bytesRead
= 0;
269 //ssize_t bytesReceived = 0;
270 //ssize_t bytesTotal = 0;
271 bool dataValidated
= false;
272 BStackOrHeapArray
<char, 4096> chunk(kGopherBufferSize
);
274 while (!fQuit
&& !receiveEnd
) {
275 fSocket
->WaitForReadable();
276 bytesRead
= fSocket
->Read(chunk
, kGopherBufferSize
);
279 readError
= bytesRead
;
281 } else if (bytesRead
== 0)
284 fInputBuffer
.AppendData(chunk
, bytesRead
);
286 if (!dataValidated
) {
288 // on error (file doesn't exist, ...) the server sends
289 // a faked directory entry with an error message
290 if (fInputBuffer
.Size() && fInputBuffer
.Data()[0] == '3') {
294 // make sure the buffer only contains printable characters
295 // and has at least 3 tabs before a CRLF
296 for (i
= 0; i
< fInputBuffer
.Size(); i
++) {
297 char c
= fInputBuffer
.Data()[i
];
301 } else if (c
== '\r' || c
== '\n') {
305 } else if (!isprint(fInputBuffer
.Data()[i
])) {
310 if (crlf
&& tabs
> 2 && tabs
< 5) {
314 fItemType
= GOPHER_TYPE_DIRECTORY
;
315 readError
= B_RESOURCE_NOT_FOUND
;
316 // continue parsing the error text anyway
319 // special case for buggy(?) Gophernicus/1.5
320 static const char *buggy
= "Error: File or directory not found!";
321 if (fInputBuffer
.Size() > strlen(buggy
)
322 && !memcmp(fInputBuffer
.Data(), buggy
, strlen(buggy
))) {
323 fItemType
= GOPHER_TYPE_DIRECTORY
;
324 readError
= B_RESOURCE_NOT_FOUND
;
325 // continue parsing the error text anyway
326 // but it won't look good
329 // now we probably have correct data
330 dataValidated
= true;
332 //! ProtocolHook:ResponseStarted
333 if (fListener
!= NULL
)
334 fListener
->ResponseStarted(this);
336 // now we can assign MIME type if we know it
337 const char *mime
= "application/octet-stream";
338 for (i
= 0; gopher_type_map
[i
].type
!= GOPHER_TYPE_NONE
; i
++) {
339 if (gopher_type_map
[i
].type
== fItemType
) {
340 mime
= gopher_type_map
[i
].mime
;
344 fResult
.SetContentType(mime
);
346 // we don't really have headers but well...
347 //! ProtocolHook:HeadersReceived
348 if (fListener
!= NULL
)
349 fListener
->HeadersReceived(this, fResult
);
353 _ParseInput(receiveEnd
);
354 else if (fInputBuffer
.Size()) {
355 // send input directly
356 if (fListener
!= NULL
) {
357 fListener
->DataReceived(this, (const char *)fInputBuffer
.Data(),
358 fPosition
, fInputBuffer
.Size());
361 fPosition
+= fInputBuffer
.Size();
363 // XXX: this is plain stupid, we already copied the data
364 // and just want to drop it...
365 char *inputTempBuffer
= new(std::nothrow
) char[bytesRead
];
366 if (inputTempBuffer
== NULL
) {
367 readError
= B_NO_MEMORY
;
370 fInputBuffer
.RemoveData(inputTempBuffer
, fInputBuffer
.Size());
371 delete[] inputTempBuffer
;
376 fResult
.SetLength(fPosition
);
377 if (fListener
!= NULL
)
378 fListener
->DownloadProgress(this, fPosition
, fPosition
);
381 fSocket
->Disconnect();
383 if (readError
!= B_OK
)
386 return fQuit
? B_INTERRUPTED
: B_OK
;
391 BGopherRequest::_SendRequest()
397 if (Url().HasRequest())
398 request
<< '\t' << Url().Request();
402 fSocket
->Write(request
.String(), request
.Length());
407 BGopherRequest::_NeedsParsing()
409 if (fItemType
== GOPHER_TYPE_DIRECTORY
410 || fItemType
== GOPHER_TYPE_QUERY
)
417 BGopherRequest::_NeedsLastDotStrip()
419 if (fItemType
== GOPHER_TYPE_DIRECTORY
420 || fItemType
== GOPHER_TYPE_QUERY
421 || fItemType
== GOPHER_TYPE_TEXTPLAIN
)
428 BGopherRequest::_ParseInput(bool last
)
432 while (_GetLine(line
) == B_OK
) {
433 char type
= GOPHER_TYPE_NONE
;
436 line
.MoveInto(&type
, 0, 1);
438 line
.Split("\t", false, fields
);
440 if (type
!= GOPHER_TYPE_ENDOFPAGE
441 && fields
.CountStrings() < FIELD_GPFLAG
)
442 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
,
443 "Unterminated gopher item (type '%c')", type
);
447 BString title
= fields
.StringAt(FIELD_NAME
);
448 BString
link("gopher://");
450 if (fields
.CountStrings() > 3) {
451 link
<< fields
.StringAt(FIELD_HOST
);
452 if (fields
.StringAt(FIELD_PORT
).Length())
453 link
<< ":" << fields
.StringAt(FIELD_PORT
);
455 //if (fields.StringAt(FIELD_SELECTOR).ByteAt(0) != '/')
457 link
<< fields
.StringAt(FIELD_SELECTOR
);
459 _HTMLEscapeString(title
);
460 _HTMLEscapeString(link
);
463 case GOPHER_TYPE_ENDOFPAGE
:
464 /* end of the page */
466 case GOPHER_TYPE_TEXTPLAIN
:
467 item
<< "<a href=\"" << link
<< "\">"
468 "<span class=\"text\">" << title
<< "</span></a>"
471 case GOPHER_TYPE_BINARY
:
472 case GOPHER_TYPE_BINHEX
:
473 case GOPHER_TYPE_BINARCHIVE
:
474 case GOPHER_TYPE_UUENCODED
:
475 item
<< "<a href=\"" << link
<< "\">"
476 "<span class=\"binary\">" << title
<< "</span></a>"
479 case GOPHER_TYPE_DIRECTORY
:
483 item
<< "<a href=\"" << link
<< "\">"
484 "<span class=\"dir\">" << title
<< "</span></a>"
487 case GOPHER_TYPE_ERROR
:
488 item
<< "<span class=\"error\">" << title
<< "</span>"
490 if (fPosition
== 0 && pageTitle
.Length() == 0)
491 pageTitle
<< "Error: " << title
;
493 case GOPHER_TYPE_QUERY
:
494 /* TODO: handle search better.
495 * For now we use an unnamed input field and accept sending ?=foo
496 * as it seems at least Veronica-2 ignores the = but it's unclean.
498 item
<< "<form method=\"get\" action=\"" << link
<< "\" "
499 "onsubmit=\"window.location = this.action + '?' + "
500 "this.elements['q'].value; return false;\">"
501 "<span class=\"query\">"
502 "<label>" << title
<< " "
503 "<input id=\"q\" name=\"\" type=\"text\" align=\"right\" />"
508 case GOPHER_TYPE_TELNET
:
510 * cf. gopher://78.80.30.202/1/ps3
511 * -> gopher://78.80.30.202:23/8/ps3/new -> new@78.80.30.202
514 user
= fields
.StringAt(FIELD_SELECTOR
);
515 if (user
.FindLast('/') > -1) {
516 user
.Remove(0, user
.FindLast('/'));
519 link
<< fields
.StringAt(FIELD_HOST
);
520 if (fields
.StringAt(FIELD_PORT
) != "23")
521 link
<< ":" << fields
.StringAt(FIELD_PORT
);
523 item
<< "<a href=\"" << link
<< "\">"
524 "<span class=\"telnet\">" << title
<< "</span></a>"
527 case GOPHER_TYPE_TN3270
:
528 /* tn3270: URI scheme, cf. http://tools.ietf.org/html/rfc6270 */
530 user
= fields
.StringAt(FIELD_SELECTOR
);
531 if (user
.FindLast('/') > -1) {
532 user
.Remove(0, user
.FindLast('/'));
535 link
<< fields
.StringAt(FIELD_HOST
);
536 if (fields
.StringAt(FIELD_PORT
) != "23")
537 link
<< ":" << fields
.StringAt(FIELD_PORT
);
539 item
<< "<a href=\"" << link
<< "\">"
540 "<span class=\"telnet\">" << title
<< "</span></a>"
543 case GOPHER_TYPE_CSO_SEARCH
:
545 * At least Lynx supports a cso:// URI scheme:
546 * http://lynx.isc.org/lynx2.8.5/lynx2-8-5/lynx_help/lynx_url_support.html
549 user
= fields
.StringAt(FIELD_SELECTOR
);
550 if (user
.FindLast('/') > -1) {
551 user
.Remove(0, user
.FindLast('/'));
554 link
<< fields
.StringAt(FIELD_HOST
);
555 if (fields
.StringAt(FIELD_PORT
) != "105")
556 link
<< ":" << fields
.StringAt(FIELD_PORT
);
558 item
<< "<a href=\"" << link
<< "\">"
559 "<span class=\"cso\">" << title
<< "</span></a>"
562 case GOPHER_TYPE_GIF
:
563 case GOPHER_TYPE_IMAGE
:
564 case GOPHER_TYPE_PNG
:
565 case GOPHER_TYPE_BITMAP
:
566 /* quite dangerous, cf. gopher://namcub.accela-labs.com/1/pics */
568 item
<< "<a href=\"" << link
<< "\">"
569 "<span class=\"img\">" << title
<< " "
570 "<img src=\"" << link
<< "\" "
571 "alt=\"" << title
<< "\"/>"
576 /* fallback to default, link them */
577 item
<< "<a href=\"" << link
<< "\">"
578 "<span class=\"img\">" << title
<< "</span></a>"
581 case GOPHER_TYPE_HTML
:
582 /* cf. gopher://pineapple.vg/1 */
583 if (fields
.StringAt(FIELD_SELECTOR
).StartsWith("URL:")) {
584 link
= fields
.StringAt(FIELD_SELECTOR
);
587 /* cf. gopher://sdf.org/1/sdf/classes/ */
589 item
<< "<a href=\"" << link
<< "\">"
590 "<span class=\"html\">" << title
<< "</span></a>"
593 case GOPHER_TYPE_INFO
:
594 // TITLE resource, cf.
595 // gopher://gophernicus.org/0/doc/gopher/gopher-title-resource.txt
596 if (fPosition
== 0 && pageTitle
.Length() == 0
597 && fields
.StringAt(FIELD_SELECTOR
) == "TITLE") {
601 item
<< "<span class=\"info\">" << title
<< "</span>"
604 case GOPHER_TYPE_AUDIO
:
605 case GOPHER_TYPE_SOUND
:
606 item
<< "<a href=\"" << link
<< "\">"
607 "<span class=\"audio\">" << title
<< "</span></a>"
608 "<audio src=\"" << link
<< "\" "
609 //TODO:Fix crash in WebPositive with these
610 //"controls=\"controls\" "
611 //"width=\"300\" height=\"50\" "
612 "alt=\"" << title
<< "\"/>"
613 "<span>[player]</span></audio>"
616 case GOPHER_TYPE_PDF
:
617 case GOPHER_TYPE_DOC
:
618 /* generic case for known-to-work items */
619 item
<< "<a href=\"" << link
<< "\">"
620 "<span class=\"document\">" << title
<< "</span></a>"
623 case GOPHER_TYPE_MOVIE
:
624 item
<< "<a href=\"" << link
<< "\">"
625 "<span class=\"video\">" << title
<< "</span></a>"
626 "<video src=\"" << link
<< "\" "
627 //TODO:Fix crash in WebPositive with these
628 //"controls=\"controls\" "
629 //"width=\"300\" height=\"300\" "
630 "alt=\"" << title
<< "\"/>"
631 "<span>[player]</span></audio>"
635 _EmitDebug(B_URL_PROTOCOL_DEBUG_TEXT
,
636 "Unknown gopher item (type 0x%02x '%c')", type
, type
);
637 item
<< "<a href=\"" << link
<< "\">"
638 "<span class=\"unknown\">" << title
<< "</span></a>"
643 if (fPosition
== 0) {
644 if (pageTitle
.Length() == 0)
645 pageTitle
<< "Index of " << Url();
647 const char *uplink
= ".";
648 if (fPath
.EndsWith("/"))
656 "<meta http-equiv=\"Content-Type\""
657 " content=\"text/html; charset=UTF-8\" />\n"
659 //"<link rel=\"icon\" type=\"image/png\""
660 // " href=\"resource:icons/directory.png\">\n"
661 "<style type=\"text/css\">\n" << kStyleSheet
<< "</style>\n"
662 "<title>" << pageTitle
<< "</title>\n"
664 "<body id=\"gopher\">\n"
665 "<div class=\"uplink dontprint\">\n"
666 "<a href=" << uplink
<< ">[up]</a>\n"
667 "<a href=\"/\">[top]</a>\n"
669 "<h1>" << pageTitle
<< "</h1>\n";
671 fListener
->DataReceived(this, header
.String(), fPosition
,
674 fPosition
+= header
.Length();
678 fListener
->DataReceived(this, item
.String(), fPosition
,
681 fPosition
+= item
.Length();
692 fListener
->DataReceived(this, footer
.String(), fPosition
,
695 fPosition
+= footer
.Length();
701 BGopherRequest::_HTMLEscapeString(BString
&str
)
703 str
.ReplaceAll("&", "&");
704 str
.ReplaceAll("<", "<");
705 str
.ReplaceAll(">", ">");