Follow upstream changes -- Bytestring updates
[git-darcs-import.git] / src / fpstring.c
blob6436d623eac046009aecf8aadf6da6c41ce36337
1 /*
2 * Copyright (C) 2003 David Roundy
3 * Most of the UTF code is Copyright (C) 1999-2001 Free Software Foundation, Inc.
4 * This file is part of darcs.
6 * Darcs is free software; you can redistribute it and/or modify it under
7 * the terms of the GNU Library General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version.
11 * You should have received a copy of the GNU Library General Public
12 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
13 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
14 * Fifth Floor, Boston, MA 02110-1301, USA.
17 #include "fpstring.h"
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdio.h>
23 #ifdef _WIN32
24 #include <windows.h>
25 #else
26 #include <sys/mman.h>
27 #endif
29 /* A locale-independent isspace(3) so patches are interpreted the same
30 * everywhere. */
31 // #define ISSPACE(c) \
32 // ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
34 // int first_white(const char *s, int len)
35 // {
36 // const char *start;
37 // const char *end;
38 //
39 // for (start = s, end = s + len; s < end && !ISSPACE(*s); s++);
40 //
41 // return s - start;
42 // }
44 // int first_nonwhite(const char *s, int len)
45 // {
46 // const char *start;
47 // const char *end;
48 //
49 // for (start = s, end = s + len; s < end && ISSPACE(*s); s++);
50 //
51 // return s - start;
52 // }
54 int has_funky_char(const char *s, int len)
56 // We check first for the more likely \0 so we can break out of
57 // memchr that much sooner.
58 return !!(memchr(s, 0, len) || memchr(s, 26, len));
62 // mmapping...
64 #ifdef _WIN32
66 /* I have no idea if this works or not, and it is very tied to the usage
67 * of mmap in FastPackedString. Most arguments are ignored...
70 char *my_mmap(size_t length, int fd)
72 exit(1); /* mmap is not implemented on Windows */
75 int munmap(void *start, size_t length)
77 UnmapViewOfFile(start);
80 #else
82 char *my_mmap(size_t len, int fd) {
83 void *maybeok = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
84 if (maybeok == MAP_FAILED) return NULL;
85 else return (char *)maybeok;
88 #endif
90 // ForeignPtr debugging stuff...
92 static int num_alloced = 0;
94 void debug_free(void *p) {
95 num_alloced--;
96 fprintf(stderr, "Freeing %p (%d left)\n", p, num_alloced);
99 void debug_alloc(void *p, const char *name) {
100 num_alloced++;
101 fprintf(stderr, "Allocating %p named %s (%d left)\n",
102 p, name, num_alloced);
105 /* Specification: RFC 2279 */
107 int utf8_to_ints(HsInt *pwc, const unsigned char *s, int n) {
108 /* returns number of unicode chars in the output. The output array is
109 assumed to have the same number of elements as the input array, which
110 is n. */
112 HsInt *pwc_original = pwc;
113 while (n > 0) {
114 unsigned char c = s[0];
116 if (c < 0x80) {
117 *pwc++ = c;
118 n--;
119 s++;
120 } else if (c < 0xc2) {
121 return -1;
122 } else if (c < 0xe0) {
123 if (n < 2) return -1;
124 if (!((s[1] ^ 0x80) < 0x40)) return -1;
125 *pwc++ = ((unsigned) (c & 0x1f) << 6)
126 | (unsigned) (s[1] ^ 0x80);
127 n -= 2;
128 s += 2;
129 } else if (c < 0xf0) {
130 if (n < 3) return -1;
131 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
132 && (c >= 0xe1 || s[1] >= 0xa0)))
133 return -1;
134 *pwc++ = ((unsigned) (c & 0x0f) << 12)
135 | ((unsigned) (s[1] ^ 0x80) << 6)
136 | (unsigned) (s[2] ^ 0x80);
137 n -= 3;
138 s += 3;
139 } else if (c < 0xf8 && sizeof(unsigned)*8 >= 32) {
140 if (n < 4) return -1;
141 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
142 && (s[3] ^ 0x80) < 0x40
143 && (c >= 0xf1 || s[1] >= 0x90)))
144 return -1;
145 *pwc++ = ((unsigned) (c & 0x07) << 18)
146 | ((unsigned) (s[1] ^ 0x80) << 12)
147 | ((unsigned) (s[2] ^ 0x80) << 6)
148 | (unsigned) (s[3] ^ 0x80);
149 n -= 4;
150 s += 4;
151 } else if (c < 0xfc && sizeof(unsigned)*8 >= 32) {
152 if (n < 5) return -1;
153 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
154 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
155 && (c >= 0xf9 || s[1] >= 0x88)))
156 return -1;
157 *pwc++ = ((unsigned) (c & 0x03) << 24)
158 | ((unsigned) (s[1] ^ 0x80) << 18)
159 | ((unsigned) (s[2] ^ 0x80) << 12)
160 | ((unsigned) (s[3] ^ 0x80) << 6)
161 | (unsigned) (s[4] ^ 0x80);
162 n -= 5;
163 s += 5;
164 } else if (c < 0xfe && sizeof(unsigned)*8 >= 32) {
165 if (n < 6) return -1;
166 if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
167 && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
168 && (s[5] ^ 0x80) < 0x40
169 && (c >= 0xfd || s[1] >= 0x84)))
170 return -1;
171 *pwc++ = ((unsigned) (c & 0x01) << 30)
172 | ((unsigned) (s[1] ^ 0x80) << 24)
173 | ((unsigned) (s[2] ^ 0x80) << 18)
174 | ((unsigned) (s[3] ^ 0x80) << 12)
175 | ((unsigned) (s[4] ^ 0x80) << 6)
176 | (unsigned) (s[5] ^ 0x80);
177 n -= 6;
178 s += 6;
179 } else
180 return -1;
182 return pwc - pwc_original;
185 /* Conversion to and from hex */
187 void conv_to_hex(unsigned char *dest, unsigned char *from, int num_chars)
189 static char hex[] = "0123456789abcdef";
190 unsigned char *end;
192 for (end = from + num_chars; from < end; from++) {
193 *dest++ = hex[*from >> 4];
194 *dest++ = hex[*from & 0xf];
197 return;
200 #define NYBBLE_TO_INT(c) \
201 ((c) - ((c) >= 'a' ? 'a' - 10 : '0'))
203 void conv_from_hex(unsigned char *dest, unsigned char *from, int num_chars)
205 unsigned char *end;
206 unsigned char c;
208 end = dest + num_chars;
209 while (dest < end) {
210 c = NYBBLE_TO_INT(*from) << 4, from++;
211 *dest++ = c | NYBBLE_TO_INT(*from), from++;
214 return;