[gaim-migrate @ 5234]
[pidgin-git.git] / src / html.c
blobed428c0d4de3b1fb05a531fb25737b4b5b3bbbe2
1 /*
2 * gaim
4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26 #include <stdio.h>
27 #include <stdlib.h>
29 #ifndef _WIN32
30 #include <sys/time.h>
31 #include <unistd.h>
32 #include <sys/socket.h>
33 #include <netdb.h>
34 #include <netinet/in.h>
35 #endif
37 #include <sys/types.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include "gaim.h"
41 #include "proxy.h"
43 #ifdef _WIN32
44 #include "win32dep.h"
45 #endif
47 gchar *strip_html(const gchar *text)
49 int i, j, k;
50 int visible = 1;
51 gchar *text2 = g_strdup(text);
53 if(!text)
54 return NULL;
56 for (i = 0, j = 0; text2[i]; i++) {
57 if (text2[i] == '<') {
58 k = i + 1;
59 if(g_ascii_isspace(text2[k])) {
60 visible = 1;
61 } else {
62 while (text2[k]) {
63 if (text2[k] == '<') {
64 visible = 1;
65 break;
67 if (text2[k] == '>') {
68 visible = 0;
69 break;
71 k++;
74 } else if (text2[i] == '>' && !visible) {
75 visible = 1;
76 continue;
78 if (text2[i] == '&' && strncasecmp(text2+i,"&quot;",6) == 0) {
79 text2[j++] = '\"';
80 i = i+5;
81 continue;
83 if (visible) {
84 text2[j++] = text2[i];
87 text2[j] = '\0';
88 return text2;
91 struct g_url *parse_url(char *url)
93 struct g_url *test = g_new0(struct g_url, 1);
94 char scan_info[255];
95 char port[5];
96 int f;
98 if (strstr(url, "http://"))
99 g_snprintf(scan_info, sizeof(scan_info),
100 "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+]");
101 else
102 g_snprintf(scan_info, sizeof(scan_info),
103 "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+^]");
104 f = sscanf(url, scan_info, test->address, port, test->page);
105 if (f == 1) {
106 if (strstr(url, "http://"))
107 g_snprintf(scan_info, sizeof(scan_info),
108 "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
109 else
110 g_snprintf(scan_info, sizeof(scan_info),
111 "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
112 f = sscanf(url, scan_info, test->address, test->page);
113 g_snprintf(port, sizeof(test->port), "80");
114 port[2] = 0;
116 if (f == 1) {
117 if (strstr(url, "http://"))
118 g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]");
119 else
120 g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]");
121 f = sscanf(url, scan_info, test->address);
122 g_snprintf(test->page, sizeof(test->page), "%c", '\0');
125 sscanf(port, "%d", &test->port);
126 return test;
129 struct grab_url_data {
130 void (* callback)(gpointer, char *, unsigned long);
131 gpointer data;
132 struct g_url *website;
133 char *url;
134 gboolean full;
136 int inpa;
138 gboolean sentreq;
139 gboolean newline;
140 gboolean startsaving;
141 char *webdata;
142 unsigned long len;
143 unsigned long data_len;
146 static gboolean
147 parse_redirect(const char *data, size_t data_len, gint sock,
148 struct grab_url_data *gunk)
150 gchar *s;
152 if ((s = g_strstr_len(data, data_len, "Location: ")) != NULL) {
153 gchar *new_url, *end;
154 int len;
156 s += strlen("Location: ");
157 end = strchr(s, '\r');
159 /* Just in case :) */
160 if (end == NULL)
161 end = strchr(s, '\n');
163 len = end - s;
165 new_url = g_malloc(len + 1);
166 strncpy(new_url, s, len);
167 new_url[len] = '\0';
169 /* Close the existing stuff. */
170 gaim_input_remove(gunk->inpa);
171 close(sock);
173 /* Try again, with this new location. */
174 grab_url(new_url, gunk->full, gunk->callback,
175 gunk->data);
177 /* Free up. */
178 g_free(new_url);
179 g_free(gunk->webdata);
180 g_free(gunk->website);
181 g_free(gunk->url);
182 g_free(gunk);
184 return TRUE;
187 return FALSE;
190 static size_t
191 parse_content_len(const char *data, size_t data_len)
193 size_t content_len = 0;
195 sscanf(data, "Content-Length: %d", &content_len);
197 return content_len;
200 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond)
202 struct grab_url_data *gunk = dat;
203 char data;
205 if (sock == -1) {
206 gunk->callback(gunk->data, NULL, 0);
207 g_free(gunk->website);
208 g_free(gunk->url);
209 g_free(gunk);
210 return;
213 if (!gunk->sentreq) {
214 char buf[256];
216 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/",
217 gunk->full ? gunk->url : gunk->website->page);
218 debug_printf("Request: %s\n", buf);
220 write(sock, buf, strlen(buf));
221 fcntl(sock, F_SETFL, O_NONBLOCK);
222 gunk->sentreq = TRUE;
223 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat);
224 gunk->data_len = 4096;
225 gunk->webdata = g_malloc(gunk->data_len);
226 return;
229 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) {
230 if (errno == EWOULDBLOCK) {
231 errno = 0;
232 return;
235 gunk->len++;
237 if (gunk->len == gunk->data_len + 1) {
238 gunk->data_len += (gunk->data_len) / 2;
240 gunk->webdata = g_realloc(gunk->webdata, gunk->data_len);
243 gunk->webdata[gunk->len - 1] = data;
245 if (!gunk->startsaving) {
246 if (data == '\r')
247 return;
248 if (data == '\n') {
249 if (gunk->newline) {
250 size_t content_len;
251 gunk->startsaving = TRUE;
253 /* See if we can find a redirect. */
254 if (parse_redirect(gunk->webdata, gunk->len, sock, gunk))
255 return;
257 /* No redirect. See if we can find a content length. */
258 content_len = parse_content_len(gunk->webdata, gunk->len);
260 if (content_len == 0) {
261 /* We'll stick with an initial 8192 */
262 content_len = 8192;
265 /* Out with the old... */
266 gunk->len = 0;
267 g_free(gunk->webdata);
268 gunk->webdata = NULL;
270 /* In with the new. */
271 gunk->data_len = content_len;
272 gunk->webdata = g_malloc(gunk->data_len);
274 else
275 gunk->newline = TRUE;
276 return;
278 gunk->newline = FALSE;
280 } else if (errno != ETIMEDOUT) {
281 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1);
282 gunk->webdata[gunk->len] = 0;
284 debug_printf(_("Received: '%s'\n"), gunk->webdata);
286 gaim_input_remove(gunk->inpa);
287 close(sock);
288 gunk->callback(gunk->data, gunk->webdata, gunk->len);
289 if (gunk->webdata)
290 g_free(gunk->webdata);
291 g_free(gunk->website);
292 g_free(gunk->url);
293 g_free(gunk);
294 } else {
295 gaim_input_remove(gunk->inpa);
296 close(sock);
297 gunk->callback(gunk->data, NULL, 0);
298 if (gunk->webdata)
299 g_free(gunk->webdata);
300 g_free(gunk->website);
301 g_free(gunk->url);
302 g_free(gunk);
306 void grab_url(char *url, gboolean full, void callback(gpointer, char *, unsigned long), gpointer data)
308 int sock;
309 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1);
311 gunk->callback = callback;
312 gunk->data = data;
313 gunk->url = g_strdup(url);
314 gunk->website = parse_url(url);
315 gunk->full = full;
317 if ((sock = proxy_connect(NULL, gunk->website->address, gunk->website->port,
318 grab_url_callback, gunk)) < 0) {
319 g_free(gunk->website);
320 g_free(gunk->url);
321 g_free(gunk);
322 callback(data, g_strdup(_("g003: Error opening connection.\n")), 0);