2 * Copyright (C) 2003 David Roundy
3 * Most of the UTF code is Copyright (C) 1999-2001 Free Software Foundation, Inc.
4 * This file is part of darcs.
6 * Darcs is free software; you can redistribute it and/or modify it under
7 * the terms of the GNU Library General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version.
11 * You should have received a copy of the GNU Library General Public
12 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
13 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
14 * Fifth Floor, Boston, MA 02110-1301, USA.
29 /* A locale-independent isspace(3) so patches are interpreted the same
31 // #define ISSPACE(c) \
32 // ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
34 // int first_white(const char *s, int len)
39 // for (start = s, end = s + len; s < end && !ISSPACE(*s); s++);
44 // int first_nonwhite(const char *s, int len)
49 // for (start = s, end = s + len; s < end && ISSPACE(*s); s++);
54 int has_funky_char(const char *s
, int len
)
56 // We check first for the more likely \0 so we can break out of
57 // memchr that much sooner.
58 return !!(memchr(s
, 0, len
) || memchr(s
, 26, len
));
66 /* I have no idea if this works or not, and it is very tied to the usage
67 * of mmap in FastPackedString. Most arguments are ignored...
70 char *my_mmap(size_t length
, int fd
)
72 exit(1); /* mmap is not implemented on Windows */
75 int munmap(void *start
, size_t length
)
77 UnmapViewOfFile(start
);
82 char *my_mmap(size_t len
, int fd
) {
83 void *maybeok
= mmap(NULL
, len
, PROT_READ
, MAP_SHARED
, fd
, 0);
84 if (maybeok
== MAP_FAILED
) return NULL
;
85 else return (char *)maybeok
;
90 // ForeignPtr debugging stuff...
92 static int num_alloced
= 0;
94 void debug_free(void *p
) {
96 fprintf(stderr
, "Freeing %p (%d left)\n", p
, num_alloced
);
99 void debug_alloc(void *p
, const char *name
) {
101 fprintf(stderr
, "Allocating %p named %s (%d left)\n",
102 p
, name
, num_alloced
);
105 /* Specification: RFC 2279 */
107 int utf8_to_ints(HsInt
*pwc
, const unsigned char *s
, int n
) {
108 /* returns number of unicode chars in the output. The output array is
109 assumed to have the same number of elements as the input array, which
112 HsInt
*pwc_original
= pwc
;
114 unsigned char c
= s
[0];
120 } else if (c
< 0xc2) {
122 } else if (c
< 0xe0) {
123 if (n
< 2) return -1;
124 if (!((s
[1] ^ 0x80) < 0x40)) return -1;
125 *pwc
++ = ((unsigned) (c
& 0x1f) << 6)
126 | (unsigned) (s
[1] ^ 0x80);
129 } else if (c
< 0xf0) {
130 if (n
< 3) return -1;
131 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40
132 && (c
>= 0xe1 || s
[1] >= 0xa0)))
134 *pwc
++ = ((unsigned) (c
& 0x0f) << 12)
135 | ((unsigned) (s
[1] ^ 0x80) << 6)
136 | (unsigned) (s
[2] ^ 0x80);
139 } else if (c
< 0xf8 && sizeof(unsigned)*8 >= 32) {
140 if (n
< 4) return -1;
141 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40
142 && (s
[3] ^ 0x80) < 0x40
143 && (c
>= 0xf1 || s
[1] >= 0x90)))
145 *pwc
++ = ((unsigned) (c
& 0x07) << 18)
146 | ((unsigned) (s
[1] ^ 0x80) << 12)
147 | ((unsigned) (s
[2] ^ 0x80) << 6)
148 | (unsigned) (s
[3] ^ 0x80);
151 } else if (c
< 0xfc && sizeof(unsigned)*8 >= 32) {
152 if (n
< 5) return -1;
153 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40
154 && (s
[3] ^ 0x80) < 0x40 && (s
[4] ^ 0x80) < 0x40
155 && (c
>= 0xf9 || s
[1] >= 0x88)))
157 *pwc
++ = ((unsigned) (c
& 0x03) << 24)
158 | ((unsigned) (s
[1] ^ 0x80) << 18)
159 | ((unsigned) (s
[2] ^ 0x80) << 12)
160 | ((unsigned) (s
[3] ^ 0x80) << 6)
161 | (unsigned) (s
[4] ^ 0x80);
164 } else if (c
< 0xfe && sizeof(unsigned)*8 >= 32) {
165 if (n
< 6) return -1;
166 if (!((s
[1] ^ 0x80) < 0x40 && (s
[2] ^ 0x80) < 0x40
167 && (s
[3] ^ 0x80) < 0x40 && (s
[4] ^ 0x80) < 0x40
168 && (s
[5] ^ 0x80) < 0x40
169 && (c
>= 0xfd || s
[1] >= 0x84)))
171 *pwc
++ = ((unsigned) (c
& 0x01) << 30)
172 | ((unsigned) (s
[1] ^ 0x80) << 24)
173 | ((unsigned) (s
[2] ^ 0x80) << 18)
174 | ((unsigned) (s
[3] ^ 0x80) << 12)
175 | ((unsigned) (s
[4] ^ 0x80) << 6)
176 | (unsigned) (s
[5] ^ 0x80);
182 return pwc
- pwc_original
;
185 /* Conversion to and from hex */
187 void conv_to_hex(unsigned char *dest
, unsigned char *from
, int num_chars
)
189 static char hex
[] = "0123456789abcdef";
192 for (end
= from
+ num_chars
; from
< end
; from
++) {
193 *dest
++ = hex
[*from
>> 4];
194 *dest
++ = hex
[*from
& 0xf];
200 #define NYBBLE_TO_INT(c) \
201 ((c) - ((c) >= 'a' ? 'a' - 10 : '0'))
203 void conv_from_hex(unsigned char *dest
, unsigned char *from
, int num_chars
)
208 end
= dest
+ num_chars
;
210 c
= NYBBLE_TO_INT(*from
) << 4, from
++;
211 *dest
++ = c
| NYBBLE_TO_INT(*from
), from
++;