1 /***************************************************************************
3 * Project ___| | | | _ \| |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
8 * Copyright (C) 1998 - 2007, Daniel Stenberg, <daniel@haxx.se>, et al.
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at http://curl.haxx.se/docs/copyright.html.
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
21 * $Id: urlglob.c,v 1.1.1.1 2008-09-23 16:32:06 hoffman Exp $
22 ***************************************************************************/
24 /* client-local setup.h */
31 #include <curl/curl.h>
33 #define _MPRINTF_REPLACE /* we want curl-functions instead of native ones */
34 #include <curl/mprintf.h>
38 #if defined(CURLDEBUG) && defined(CURLTOOLDEBUG)
50 * Input a full globbed string, set the forth argument to the amount of
51 * strings we get out of this. Return GlobCode.
53 static GlobCode
glob_word(URLGlob
*, /* object anchor */
54 char *, /* globbed string */
55 size_t, /* position */
56 int *); /* returned number of strings */
58 static GlobCode
glob_set(URLGlob
*glob
, char *pattern
,
59 size_t pos
, int *amount
)
61 /* processes a set expression with the point behind the opening '{'
62 ','-separated elements are collected until the next closing '}'
65 char* buf
= glob
->glob_buffer
;
68 pat
= (URLPattern
*)&glob
->pattern
[glob
->size
/ 2];
69 /* patterns 0,1,2,... correspond to size=1,3,5,... */
71 pat
->content
.Set
.size
= 0;
72 pat
->content
.Set
.ptr_s
= 0;
73 pat
->content
.Set
.elements
= (char**)malloc(0);
80 case '\0': /* URL ended while set was still open */
81 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
82 "unmatched brace at pos %d\n", (int)pos
);
86 case '[': /* no nested expressions at this time */
87 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
88 "nested braces not supported at pos %d\n", (int)pos
);
92 case '}': /* set element completed */
94 pat
->content
.Set
.elements
=
95 realloc(pat
->content
.Set
.elements
,
96 (pat
->content
.Set
.size
+ 1) * sizeof(char*));
97 if (!pat
->content
.Set
.elements
) {
98 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
), "out of memory");
101 pat
->content
.Set
.elements
[pat
->content
.Set
.size
] =
102 strdup(glob
->glob_buffer
);
103 ++pat
->content
.Set
.size
;
105 if (*pattern
== '}') {
106 /* entire set pattern completed */
109 /* always check for a literal (may be "") between patterns */
110 if(GLOB_ERROR
== glob_word(glob
, ++pattern
, ++pos
, &wordamount
))
112 *amount
= pat
->content
.Set
.size
* wordamount
;
118 buf
= glob
->glob_buffer
;
123 case ']': /* illegal closing bracket */
124 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
125 "illegal pattern at pos %d\n", (int)pos
);
128 case '\\': /* escaped character, skip '\' */
142 if (*(buf
+1) == '\0') { /* but no escaping of '\0'! */
143 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
144 "illegal pattern at pos %d\n", (int)pos
);
150 /* intentional fallthrough */
152 *buf
++ = *pattern
++; /* copy character to set element */
159 static GlobCode
glob_range(URLGlob
*glob
, char *pattern
,
160 size_t pos
, int *amount
)
162 /* processes a range expression with the point behind the opening '['
163 - char range: e.g. "a-z]", "B-Q]"
164 - num range: e.g. "0-9]", "17-2000]"
165 - num range with leading zeros: e.g. "001-999]"
166 expression is checked for well-formedness and collected until the next ']'
176 pat
= (URLPattern
*)&glob
->pattern
[glob
->size
/ 2];
177 /* patterns 0,1,2,... correspond to size=1,3,5,... */
180 if (ISALPHA(*pattern
)) { /* character range detected */
184 pat
->type
= UPTCharRange
;
185 rc
= sscanf(pattern
, "%c-%c%c%d%c", &min_c
, &max_c
, &sep
, &step
, &sep2
);
186 if ((rc
< 3) || (min_c
>= max_c
) || ((max_c
- min_c
) > ('z' - 'a'))) {
187 /* the pattern is not well-formed */
188 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
189 "error: bad range specification after pos %d\n", pos
);
193 /* check the (first) separating character */
194 if((sep
!= ']') && (sep
!= ':')) {
195 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
196 "error: unsupported character (%c) after range at pos %d\n",
201 /* if there was a ":[num]" thing, use that as step or else use 1 */
202 pat
->content
.CharRange
.step
=
203 ((sep
== ':') && (rc
== 5) && (sep2
== ']'))?step
:1;
205 pat
->content
.CharRange
.ptr_c
= pat
->content
.CharRange
.min_c
= min_c
;
206 pat
->content
.CharRange
.max_c
= max_c
;
208 else if (ISDIGIT(*pattern
)) { /* numeric range detected */
212 pat
->type
= UPTNumRange
;
213 pat
->content
.NumRange
.padlength
= 0;
215 rc
= sscanf(pattern
, "%d-%d%c%d%c", &min_n
, &max_n
, &sep
, &step
, &sep2
);
217 if ((rc
< 2) || (min_n
> max_n
)) {
218 /* the pattern is not well-formed */
219 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
220 "error: bad range specification after pos %d\n", pos
);
223 pat
->content
.NumRange
.ptr_n
= pat
->content
.NumRange
.min_n
= min_n
;
224 pat
->content
.NumRange
.max_n
= max_n
;
226 /* if there was a ":[num]" thing, use that as step or else use 1 */
227 pat
->content
.NumRange
.step
=
228 ((sep
== ':') && (rc
== 5) && (sep2
== ']'))?step
:1;
230 if (*pattern
== '0') { /* leading zero specified */
232 while (ISDIGIT(*c
)) {
234 ++pat
->content
.NumRange
.padlength
; /* padding length is set for all
235 instances of this pattern */
241 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
),
242 "illegal character in range specification at pos %d\n", pos
);
246 c
= (char*)strchr(pattern
, ']'); /* continue after next ']' */
250 snprintf(glob
->errormsg
, sizeof(glob
->errormsg
), "missing ']'");
251 return GLOB_ERROR
; /* missing ']' */
254 /* always check for a literal (may be "") between patterns */
256 if(GLOB_ERROR
== glob_word(glob
, c
, pos
+ (c
- pattern
), &wordamount
))
259 if(pat
->type
== UPTCharRange
)
260 *amount
= (pat
->content
.CharRange
.max_c
-
261 pat
->content
.CharRange
.min_c
+ 1) *
264 *amount
= (pat
->content
.NumRange
.max_n
-
265 pat
->content
.NumRange
.min_n
+ 1) * wordamount
;
270 static GlobCode
glob_word(URLGlob
*glob
, char *pattern
,
271 size_t pos
, int *amount
)
273 /* processes a literal string component of a URL
274 special characters '{' and '[' branch to set/range processing functions
276 char* buf
= glob
->glob_buffer
;
278 GlobCode res
= GLOB_OK
;
280 *amount
= 1; /* default is one single string */
282 while (*pattern
!= '\0' && *pattern
!= '{' && *pattern
!= '[') {
283 if (*pattern
== '}' || *pattern
== ']')
286 /* only allow \ to escape known "special letters" */
287 if (*pattern
== '\\' &&
288 (*(pattern
+1) == '{' || *(pattern
+1) == '[' ||
289 *(pattern
+1) == '}' || *(pattern
+1) == ']') ) {
291 /* escape character, skip '\' */
294 if (*pattern
== '\0') /* but no escaping of '\0'! */
297 *buf
++ = *pattern
++; /* copy character to literal */
301 litindex
= glob
->size
/ 2;
302 /* literals 0,1,2,... correspond to size=0,2,4,... */
303 glob
->literal
[litindex
] = strdup(glob
->glob_buffer
);
304 if(!glob
->literal
[litindex
])
310 break; /* singular URL processed */
313 /* process set pattern */
314 res
= glob_set(glob
, ++pattern
, ++pos
, amount
);
318 /* process range pattern */
319 res
= glob_range(glob
, ++pattern
, ++pos
, amount
);
324 /* free that strdup'ed string again */
325 free(glob
->literal
[litindex
]);
327 return res
; /* something got wrong */
330 int glob_url(URLGlob
** glob
, char* url
, int *urlnum
, FILE *error
)
333 * We can deal with any-size, just make a buffer with the same length
334 * as the specified URL!
336 URLGlob
*glob_expand
;
338 char *glob_buffer
=(char *)malloc(strlen(url
)+1);
341 if(NULL
== glob_buffer
)
342 return CURLE_OUT_OF_MEMORY
;
344 glob_expand
= (URLGlob
*)calloc(sizeof(URLGlob
), 1);
345 if(NULL
== glob_expand
) {
347 return CURLE_OUT_OF_MEMORY
;
349 glob_expand
->size
= 0;
350 glob_expand
->urllen
= strlen(url
);
351 glob_expand
->glob_buffer
= glob_buffer
;
352 glob_expand
->beenhere
=0;
353 if(GLOB_OK
== glob_word(glob_expand
, url
, 1, &amount
))
356 if(error
&& glob_expand
->errormsg
[0]) {
357 /* send error description to the error-stream */
358 fprintf(error
, "curl: (%d) [globbing] %s\n",
359 CURLE_URL_MALFORMAT
, glob_expand
->errormsg
);
361 /* it failed, we cleanup */
366 return CURLE_URL_MALFORMAT
;
373 void glob_cleanup(URLGlob
* glob
)
378 for (i
= glob
->size
- 1; i
< glob
->size
; --i
) {
379 if (!(i
& 1)) { /* even indexes contain literals */
380 free(glob
->literal
[i
/2]);
382 else { /* odd indexes contain sets or ranges */
383 if (glob
->pattern
[i
/2].type
== UPTSet
) {
384 for (elem
= glob
->pattern
[i
/2].content
.Set
.size
- 1;
387 free(glob
->pattern
[i
/2].content
.Set
.elements
[elem
]);
389 free(glob
->pattern
[i
/2].content
.Set
.elements
);
393 free(glob
->glob_buffer
);
397 char *glob_next_url(URLGlob
*glob
)
399 char *buf
= glob
->glob_buffer
;
404 size_t buflen
= glob
->urllen
+1;
412 /* implement a counter over the index ranges of all patterns,
413 starting with the rightmost pattern */
414 for (i
= glob
->size
/ 2 - 1; carry
&& i
< glob
->size
; --i
) {
416 pat
= &glob
->pattern
[i
];
419 if (++pat
->content
.Set
.ptr_s
== pat
->content
.Set
.size
) {
420 pat
->content
.Set
.ptr_s
= 0;
425 pat
->content
.CharRange
.ptr_c
= (char)(pat
->content
.CharRange
.step
+
426 (int)((unsigned char)pat
->content
.CharRange
.ptr_c
));
427 if (pat
->content
.CharRange
.ptr_c
> pat
->content
.CharRange
.max_c
) {
428 pat
->content
.CharRange
.ptr_c
= pat
->content
.CharRange
.min_c
;
433 pat
->content
.NumRange
.ptr_n
+= pat
->content
.NumRange
.step
;
434 if (pat
->content
.NumRange
.ptr_n
> pat
->content
.NumRange
.max_n
) {
435 pat
->content
.NumRange
.ptr_n
= pat
->content
.NumRange
.min_n
;
440 printf("internal error: invalid pattern type (%d)\n", (int)pat
->type
);
441 exit (CURLE_FAILED_INIT
);
444 if (carry
) /* first pattern ptr has run into overflow, done! */
448 for (j
= 0; j
< glob
->size
; ++j
) {
449 if (!(j
&1)) { /* every other term (j even) is a literal */
450 lit
= glob
->literal
[j
/2];
451 len
= snprintf(buf
, buflen
, "%s", lit
);
455 else { /* the rest (i odd) are patterns */
456 pat
= &glob
->pattern
[j
/2];
459 len
= strlen(pat
->content
.Set
.elements
[pat
->content
.Set
.ptr_s
]);
460 snprintf(buf
, buflen
, "%s",
461 pat
->content
.Set
.elements
[pat
->content
.Set
.ptr_s
]);
466 *buf
++ = pat
->content
.CharRange
.ptr_c
;
469 len
= snprintf(buf
, buflen
, "%0*d",
470 pat
->content
.NumRange
.padlength
,
471 pat
->content
.NumRange
.ptr_n
);
476 printf("internal error: invalid pattern type (%d)\n", (int)pat
->type
);
477 exit (CURLE_FAILED_INIT
);
482 return strdup(glob
->glob_buffer
);
485 char *glob_match_url(char *filename
, URLGlob
*glob
)
491 char *appendthis
= NULL
;
492 size_t appendlen
= 0;
494 /* We cannot use the glob_buffer for storage here since the filename may
495 * be longer than the URL we use. We allocate a good start size, then
496 * we need to realloc in case of need.
498 allocsize
=strlen(filename
);
499 target
= malloc(allocsize
);
501 return NULL
; /* major failure */
504 if (*filename
== '#' && ISDIGIT(filename
[1])) {
506 char *ptr
= filename
;
507 unsigned long num
= strtoul(&filename
[1], &filename
, 10);
510 if (num
&& (i
<= glob
->size
/ 2)) {
511 URLPattern pat
= glob
->pattern
[i
];
514 appendthis
= pat
.content
.Set
.elements
[pat
.content
.Set
.ptr_s
];
515 appendlen
= strlen(pat
.content
.Set
.elements
[pat
.content
.Set
.ptr_s
]);
518 numbuf
[0]=pat
.content
.CharRange
.ptr_c
;
524 snprintf(numbuf
, sizeof(numbuf
), "%0*d",
525 pat
.content
.NumRange
.padlength
,
526 pat
.content
.NumRange
.ptr_n
);
528 appendlen
= strlen(numbuf
);
531 printf("internal error: invalid pattern type (%d)\n",
538 /* #[num] out of range, use the #[num] in the output */
540 appendthis
=filename
++;
545 appendthis
=filename
++;
548 if(appendlen
+ stringlen
>= allocsize
) {
550 allocsize
= (appendlen
+ stringlen
)*2;
551 newstr
=realloc(target
, allocsize
);
558 memcpy(&target
[stringlen
], appendthis
, appendlen
);
559 stringlen
+= appendlen
;
561 target
[stringlen
]= '\0';