[gaim-migrate @ 4156]
[pidgin-git.git] / src / html.c
blob56fcc8fd18d83d6e740100150226b05e5d74ca67
1 /*
2 * gaim
4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26 #include <stdio.h>
27 #include <stdlib.h>
29 #ifndef _WIN32
30 #include <sys/time.h>
31 #include <unistd.h>
32 #include <sys/socket.h>
33 #include <netdb.h>
34 #include <netinet/in.h>
35 #endif
37 #include <sys/types.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include "gaim.h"
41 #include "proxy.h"
43 #ifdef _WIN32
44 #include "win32dep.h"
45 #endif
47 gchar *strip_html(gchar *text)
49 int i, j, k;
50 int visible = 1;
51 gchar *text2 = g_strdup(text);
53 for (i = 0, j = 0; text2[i]; i++) {
54 if (text2[i] == '<') {
55 k = i + 1;
56 while (text2[k]) {
57 if (text2[k] == '<') {
58 visible = 1;
59 break;
61 if (text2[k] == '>') {
62 visible = 0;
63 break;
65 k++;
67 } else if (text2[i] == '>' && !visible) {
68 visible = 1;
69 continue;
71 if (visible) {
72 text2[j++] = text2[i];
75 text2[j] = '\0';
76 return text2;
79 struct g_url *parse_url(char *url)
81 struct g_url *test = g_new0(struct g_url, 1);
82 char scan_info[255];
83 char port[5];
84 int f;
86 if (strstr(url, "http://"))
87 g_snprintf(scan_info, sizeof(scan_info),
88 "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+]");
89 else
90 g_snprintf(scan_info, sizeof(scan_info),
91 "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+^]");
92 f = sscanf(url, scan_info, test->address, port, test->page);
93 if (f == 1) {
94 if (strstr(url, "http://"))
95 g_snprintf(scan_info, sizeof(scan_info),
96 "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
97 else
98 g_snprintf(scan_info, sizeof(scan_info),
99 "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
100 f = sscanf(url, scan_info, test->address, test->page);
101 g_snprintf(port, sizeof(test->port), "80");
102 port[2] = 0;
104 if (f == 1) {
105 if (strstr(url, "http://"))
106 g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]");
107 else
108 g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]");
109 f = sscanf(url, scan_info, test->address);
110 g_snprintf(test->page, sizeof(test->page), "%c", '\0');
113 sscanf(port, "%d", &test->port);
114 return test;
117 struct grab_url_data {
118 void (* callback)(gpointer, char *);
119 gpointer data;
120 struct g_url *website;
121 char *url;
122 gboolean full;
124 int inpa;
126 gboolean sentreq;
127 gboolean newline;
128 gboolean startsaving;
129 char *webdata;
130 int len;
133 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond)
135 struct grab_url_data *gunk = dat;
136 char data;
138 if (sock == -1) {
139 gunk->callback(gunk->data, NULL);
140 g_free(gunk->website);
141 g_free(gunk->url);
142 g_free(gunk);
143 return;
146 if (!gunk->sentreq) {
147 char buf[256];
149 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/",
150 gunk->full ? gunk->url : gunk->website->page);
151 debug_printf("Request: %s\n", buf);
153 write(sock, buf, strlen(buf));
154 fcntl(sock, F_SETFL, O_NONBLOCK);
155 gunk->sentreq = TRUE;
156 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat);
157 return;
160 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) {
161 if (errno == EWOULDBLOCK) {
162 errno = 0;
163 return;
165 if (!gunk->startsaving) {
166 if (data == '\r')
167 return;
168 if (data == '\n') {
169 if (gunk->newline)
170 gunk->startsaving = TRUE;
171 else
172 gunk->newline = TRUE;
173 return;
175 gunk->newline = FALSE;
176 } else {
177 gunk->len++;
178 gunk->webdata = g_realloc(gunk->webdata, gunk->len);
179 gunk->webdata[gunk->len - 1] = data;
181 } else if (errno != ETIMEDOUT) {
182 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1);
183 gunk->webdata[gunk->len] = 0;
185 debug_printf(_("Received: '%s'\n"), gunk->webdata);
187 gaim_input_remove(gunk->inpa);
188 close(sock);
189 gunk->callback(gunk->data, gunk->webdata);
190 if (gunk->webdata)
191 g_free(gunk->webdata);
192 g_free(gunk->website);
193 g_free(gunk->url);
194 g_free(gunk);
195 } else {
196 gaim_input_remove(gunk->inpa);
197 close(sock);
198 gunk->callback(gunk->data, NULL);
199 if (gunk->webdata)
200 g_free(gunk->webdata);
201 g_free(gunk->website);
202 g_free(gunk->url);
203 g_free(gunk);
207 void grab_url(char *url, gboolean full, void callback(gpointer, char *), gpointer data)
209 int sock;
210 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1);
212 gunk->callback = callback;
213 gunk->data = data;
214 gunk->url = g_strdup(url);
215 gunk->website = parse_url(url);
216 gunk->full = full;
218 if ((sock = proxy_connect(gunk->website->address, gunk->website->port,
219 grab_url_callback, gunk)) < 0) {
220 g_free(gunk->website);
221 g_free(gunk->url);
222 g_free(gunk);
223 callback(data, g_strdup(_("g003: Error opening connection.\n")));