[gaim-migrate @ 2985]
[pidgin-git.git] / src / html.c
blob16bf33f7097bf9dc0324c9ad61b1607bbe95fe72
1 /*
2 * gaim
4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 #include <string.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <sys/time.h>
29 #include <unistd.h>
30 #include "gaim.h"
31 #include <sys/types.h>
32 #include <sys/socket.h>
33 #include <netdb.h>
34 #include <netinet/in.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include "proxy.h"
39 struct g_url {
40 char address[255];
41 int port;
42 char page[255];
45 gchar *strip_html(gchar *text)
47 int i, j, k;
48 int visible = 1;
49 gchar *text2 = g_strdup(text);
51 for (i = 0, j = 0; text2[i]; i++) {
52 if (text2[i] == '<') {
53 k = i + 1;
54 while (text2[k]) {
55 if (text2[k] == '<') {
56 visible = 1;
57 break;
59 if (text2[k] == '>') {
60 visible = 0;
61 break;
63 k++;
65 } else if (text2[i] == '>' && !visible) {
66 visible = 1;
67 continue;
69 if (visible) {
70 text2[j++] = text2[i];
73 text2[j] = '\0';
74 return text2;
77 static struct g_url *parse_url(char *url)
79 struct g_url *test = g_new0(struct g_url, 1);
80 char scan_info[255];
81 char port[5];
82 int f;
84 if (strstr(url, "http://"))
85 g_snprintf(scan_info, sizeof(scan_info),
86 "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+]");
87 else
88 g_snprintf(scan_info, sizeof(scan_info),
89 "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+^]");
90 f = sscanf(url, scan_info, test->address, port, test->page);
91 if (f == 1) {
92 if (strstr(url, "http://"))
93 g_snprintf(scan_info, sizeof(scan_info),
94 "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
95 else
96 g_snprintf(scan_info, sizeof(scan_info),
97 "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
98 f = sscanf(url, scan_info, test->address, test->page);
99 g_snprintf(port, sizeof(test->port), "80");
100 port[2] = 0;
102 if (f == 1) {
103 if (strstr(url, "http://"))
104 g_snprintf(scan_info, sizeof(scan_info), "http://%%[A-Za-z0-9.]");
105 else
106 g_snprintf(scan_info, sizeof(scan_info), "%%[A-Za-z0-9.]");
107 f = sscanf(url, scan_info, test->address);
108 g_snprintf(test->page, sizeof(test->page), "%c", '\0');
111 sscanf(port, "%d", &test->port);
112 return test;
115 struct grab_url_data {
116 void (* callback)(gpointer, char *);
117 gpointer data;
118 struct g_url *website;
119 char *url;
120 gboolean full;
122 int inpa;
124 gboolean sentreq;
125 gboolean newline;
126 gboolean startsaving;
127 char *webdata;
128 int len;
131 static void grab_url_callback(gpointer dat, gint sock, GaimInputCondition cond)
133 struct grab_url_data *gunk = dat;
134 char data;
136 if (sock == -1) {
137 gunk->callback(gunk->data, NULL);
138 g_free(gunk->website);
139 g_free(gunk->url);
140 g_free(gunk);
141 return;
144 if (!gunk->sentreq) {
145 char buf[256];
146 g_snprintf(buf, sizeof(buf), "GET %s%s HTTP/1.0\r\n\r\n", gunk->full ? "" : "/",
147 gunk->full ? gunk->url : gunk->website->page);
148 debug_printf("Request: %s\n", buf);
149 write(sock, buf, strlen(buf));
150 fcntl(sock, F_SETFL, O_NONBLOCK);
151 gunk->sentreq = TRUE;
152 gunk->inpa = gaim_input_add(sock, GAIM_INPUT_READ, grab_url_callback, dat);
153 return;
156 if (read(sock, &data, 1) > 0 || errno == EWOULDBLOCK) {
157 if (errno == EWOULDBLOCK) {
158 errno = 0;
159 return;
162 if (!gunk->startsaving) {
163 if (data == '\r')
164 return;
165 if (data == '\n') {
166 if (gunk->newline)
167 gunk->startsaving = TRUE;
168 else
169 gunk->newline = TRUE;
170 return;
172 gunk->newline = FALSE;
173 } else {
174 gunk->len++;
175 gunk->webdata = g_realloc(gunk->webdata, gunk->len);
176 gunk->webdata[gunk->len - 1] = data;
178 } else if (errno != ETIMEDOUT) {
180 gunk->webdata = g_realloc(gunk->webdata, gunk->len + 1);
181 gunk->webdata[gunk->len] = 0;
183 debug_printf(_("Receieved: '%s'\n"), gunk->webdata);
185 gaim_input_remove(gunk->inpa);
186 close(sock);
187 gunk->callback(gunk->data, gunk->webdata);
188 if (gunk->webdata)
189 g_free(gunk->webdata);
190 g_free(gunk->website);
191 g_free(gunk->url);
192 g_free(gunk);
193 } else {
194 gaim_input_remove(gunk->inpa);
195 close(sock);
196 gunk->callback(gunk->data, NULL);
197 if (gunk->webdata)
198 g_free(gunk->webdata);
199 g_free(gunk->website);
200 g_free(gunk->url);
201 g_free(gunk);
205 void grab_url(char *url, gboolean full, void callback(gpointer, char *), gpointer data)
207 int sock;
208 struct grab_url_data *gunk = g_new0(struct grab_url_data, 1);
210 gunk->callback = callback;
211 gunk->data = data;
212 gunk->url = g_strdup(url);
213 gunk->website = parse_url(url);
214 gunk->full = full;
216 if ((sock = proxy_connect(gunk->website->address, gunk->website->port,
217 grab_url_callback, gunk)) < 0) {
218 g_free(gunk->website);
219 g_free(gunk->url);
220 g_free(gunk);
221 callback(data, g_strdup(_("g003: Error opening connection.\n")));