4 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
32 #include <sys/socket.h>
34 #include <netinet/in.h>
37 #include <sys/types.h>
47 gchar
*strip_html(const gchar
*text
)
51 gchar
*text2
= g_strdup(text
);
56 for (i
= 0, j
= 0; text2
[i
]; i
++) {
57 if (text2
[i
] == '<') {
59 if(g_ascii_isspace(text2
[k
])) {
63 if (text2
[k
] == '<') {
67 if (text2
[k
] == '>') {
74 } else if (text2
[i
] == '>' && !visible
) {
78 if (text2
[i
] == '&' && strncasecmp(text2
+i
,""",6) == 0) {
84 text2
[j
++] = text2
[i
];
91 struct g_url
*parse_url(char *url
)
93 struct g_url
*test
= g_new0(struct g_url
, 1);
98 if (strstr(url
, "http://"))
99 g_snprintf(scan_info
, sizeof(scan_info
),
100 "http://%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+]");
102 g_snprintf(scan_info
, sizeof(scan_info
),
103 "%%[A-Za-z0-9.]:%%[0-9]/%%[A-Za-z0-9.~_-/&%%?=+^]");
104 f
= sscanf(url
, scan_info
, test
->address
, port
, test
->page
);
106 if (strstr(url
, "http://"))
107 g_snprintf(scan_info
, sizeof(scan_info
),
108 "http://%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
110 g_snprintf(scan_info
, sizeof(scan_info
),
111 "%%[A-Za-z0-9.]/%%[A-Za-z0-9.~_-/&%%?=+^]");
112 f
= sscanf(url
, scan_info
, test
->address
, test
->page
);
113 g_snprintf(port
, sizeof(test
->port
), "80");
117 if (strstr(url
, "http://"))
118 g_snprintf(scan_info
, sizeof(scan_info
), "http://%%[A-Za-z0-9.]");
120 g_snprintf(scan_info
, sizeof(scan_info
), "%%[A-Za-z0-9.]");
121 f
= sscanf(url
, scan_info
, test
->address
);
122 g_snprintf(test
->page
, sizeof(test
->page
), "%c", '\0');
125 sscanf(port
, "%d", &test
->port
);
129 struct grab_url_data
{
130 void (* callback
)(gpointer
, char *, unsigned long);
132 struct g_url
*website
;
140 gboolean startsaving
;
143 unsigned long data_len
;
147 parse_redirect(const char *data
, size_t data_len
, gint sock
,
148 struct grab_url_data
*gunk
)
152 if ((s
= g_strstr_len(data
, data_len
, "Location: ")) != NULL
) {
153 gchar
*new_url
, *end
;
156 s
+= strlen("Location: ");
157 end
= strchr(s
, '\r');
159 /* Just in case :) */
161 end
= strchr(s
, '\n');
165 new_url
= g_malloc(len
+ 1);
166 strncpy(new_url
, s
, len
);
169 /* Close the existing stuff. */
170 gaim_input_remove(gunk
->inpa
);
173 /* Try again, with this new location. */
174 grab_url(new_url
, gunk
->full
, gunk
->callback
,
179 g_free(gunk
->webdata
);
180 g_free(gunk
->website
);
191 parse_content_len(const char *data
, size_t data_len
)
193 size_t content_len
= 0;
195 sscanf(data
, "Content-Length: %d", &content_len
);
200 static void grab_url_callback(gpointer dat
, gint sock
, GaimInputCondition cond
)
202 struct grab_url_data
*gunk
= dat
;
206 gunk
->callback(gunk
->data
, NULL
, 0);
207 g_free(gunk
->website
);
213 if (!gunk
->sentreq
) {
216 g_snprintf(buf
, sizeof(buf
), "GET %s%s HTTP/1.0\r\n\r\n", gunk
->full
? "" : "/",
217 gunk
->full
? gunk
->url
: gunk
->website
->page
);
218 debug_printf("Request: %s\n", buf
);
220 write(sock
, buf
, strlen(buf
));
221 fcntl(sock
, F_SETFL
, O_NONBLOCK
);
222 gunk
->sentreq
= TRUE
;
223 gunk
->inpa
= gaim_input_add(sock
, GAIM_INPUT_READ
, grab_url_callback
, dat
);
224 gunk
->data_len
= 4096;
225 gunk
->webdata
= g_malloc(gunk
->data_len
);
229 if (read(sock
, &data
, 1) > 0 || errno
== EWOULDBLOCK
) {
230 if (errno
== EWOULDBLOCK
) {
237 if (gunk
->len
== gunk
->data_len
+ 1) {
238 gunk
->data_len
+= (gunk
->data_len
) / 2;
240 gunk
->webdata
= g_realloc(gunk
->webdata
, gunk
->data_len
);
243 gunk
->webdata
[gunk
->len
- 1] = data
;
245 if (!gunk
->startsaving
) {
251 gunk
->startsaving
= TRUE
;
253 /* See if we can find a redirect. */
254 if (parse_redirect(gunk
->webdata
, gunk
->len
, sock
, gunk
))
257 /* No redirect. See if we can find a content length. */
258 content_len
= parse_content_len(gunk
->webdata
, gunk
->len
);
260 if (content_len
== 0) {
261 /* We'll stick with an initial 8192 */
265 /* Out with the old... */
267 g_free(gunk
->webdata
);
268 gunk
->webdata
= NULL
;
270 /* In with the new. */
271 gunk
->data_len
= content_len
;
272 gunk
->webdata
= g_malloc(gunk
->data_len
);
275 gunk
->newline
= TRUE
;
278 gunk
->newline
= FALSE
;
280 } else if (errno
!= ETIMEDOUT
) {
281 gunk
->webdata
= g_realloc(gunk
->webdata
, gunk
->len
+ 1);
282 gunk
->webdata
[gunk
->len
] = 0;
284 debug_printf(_("Received: '%s'\n"), gunk
->webdata
);
286 gaim_input_remove(gunk
->inpa
);
288 gunk
->callback(gunk
->data
, gunk
->webdata
, gunk
->len
);
290 g_free(gunk
->webdata
);
291 g_free(gunk
->website
);
295 gaim_input_remove(gunk
->inpa
);
297 gunk
->callback(gunk
->data
, NULL
, 0);
299 g_free(gunk
->webdata
);
300 g_free(gunk
->website
);
306 void grab_url(char *url
, gboolean full
, void callback(gpointer
, char *, unsigned long), gpointer data
)
309 struct grab_url_data
*gunk
= g_new0(struct grab_url_data
, 1);
311 gunk
->callback
= callback
;
313 gunk
->url
= g_strdup(url
);
314 gunk
->website
= parse_url(url
);
317 if ((sock
= proxy_connect(NULL
, gunk
->website
->address
, gunk
->website
->port
,
318 grab_url_callback
, gunk
)) < 0) {
319 g_free(gunk
->website
);
322 callback(data
, g_strdup(_("g003: Error opening connection.\n")), 0);