Added spec:commit task to commit changes to spec/ruby sources.
[rbx.git] / shotgun / lib / string.c
blob5a15df6656075a74077d664e27c3ee5848b217bc
1 #include <string.h>
2 #include <stdlib.h>
3 #include <errno.h>
4 #include <bstrlib.h>
6 #include "shotgun/lib/shotgun.h"
7 #include "shotgun/lib/bytearray.h"
8 #include "shotgun/lib/tuple.h"
9 #include "shotgun/lib/object.h"
10 #include "shotgun/lib/symbol.h"
11 #include "shotgun/lib/string.h"
13 OBJECT string_new2(STATE, const char *str, int sz) {
14 OBJECT obj, data;
15 char *ba;
17 xassert(sz >= 0);
18 obj = string_allocate(state);
19 string_set_bytes(obj, I2N(sz));
20 string_set_characters(obj, I2N(sz));
21 string_set_encoding(obj, Qnil);
23 data = bytearray_new_dirty(state, sz+1);
24 ba = bytearray_byte_address(state, data);
25 memset(ba, 0, SIZE_OF_BODY(data));
27 if(str != NULL && sz > 0) {
28 memcpy(ba, str, sz);
29 ba[sz] = 0;
32 string_set_data(obj, data);
33 return obj;
36 OBJECT string_new(STATE, const char *str) {
37 int sz;
39 if(str == NULL) {
40 return string_new2(state, str, 0);
43 sz = strlen(str);
44 return string_new2(state, str, sz);
47 OBJECT string_new_shared(STATE, OBJECT cur) {
48 OBJECT obj;
50 obj = string_allocate(state);
51 string_set_bytes(obj, string_get_bytes(cur));
52 string_set_characters(obj, string_get_bytes(cur));
53 string_set_encoding(obj, Qnil);
55 string_set_data(obj, string_get_data(cur));
56 string_set_shared(obj, Qtrue);
57 string_set_shared(cur, Qtrue);
58 return obj;
61 OBJECT string_newfrombstr(STATE, bstring str)
63 if(str == NULL) {
64 return string_new2(state, NULL, 0);
67 return string_new2(state, (const char*)str->data, str->slen);
70 OBJECT string_dup(STATE, OBJECT self) {
71 OBJECT obj;
72 xassert(STRING_P(self));
73 obj = string_new_shared(state, self);
74 SET_CLASS(obj, object_class(state, self));
75 obj->IsTainted = self->IsTainted;
76 return obj;
79 OBJECT string_append(STATE, OBJECT self, OBJECT other) {
80 OBJECT cur, obs, nd;
81 int cur_sz, oth_sz, ns, tmp, extra;
82 char *ba;
84 xassert(STRING_P(self));
85 xassert(STRING_P(other));
87 string_unshare(state, self);
89 cur = string_get_data(self);
90 obs = string_get_data(other);
91 cur_sz = N2I(string_get_bytes(self));
92 oth_sz = N2I(string_get_bytes(other));
94 ns = cur_sz + oth_sz;
95 tmp = bytearray_bytes(state, cur);
96 if(ns+1 > tmp) {
97 extra = ns * 0.01;
98 if(extra < 10) extra = 10;
99 nd = bytearray_new_dirty(state, ns+extra);
100 object_copy_bytes_into(state, cur, nd, cur_sz, 0);
101 object_copy_bytes_into(state, obs, nd, oth_sz, cur_sz);
102 ba = bytearray_byte_address(state, nd);
103 string_set_data(self, nd);
104 } else {
105 object_copy_bytes_into(state, obs, cur, oth_sz, cur_sz);
106 ba = bytearray_byte_address(state, cur);
108 ba[ns] = 0;
109 string_set_bytes(self, I2N(ns));
110 return self;
113 /* returns pointer to bytearray string based on */
114 char *string_byte_address(STATE, OBJECT self) {
115 OBJECT data;
117 xassert(STRING_P(self));
118 data = string_get_data(self);
119 if(NIL_P(data)) {
120 return (char*)"";
123 return bytearray_byte_address(state, data);
126 /* Direct pointer to underlying string: handle with care! */
127 char* rbx_string_as_cstr(STATE, OBJECT self)
129 xassert(STRING_P(self));
131 /* Terminator may be incorrect due to explicit length being used usually */
132 char* cstr = string_byte_address(state, self);
133 cstr[((size_t)N2I(string_get_bytes(self)))] = '\0';
135 return cstr;
138 double string_to_double(STATE, OBJECT self) {
139 double value;
140 char *p, *n, *ba, *rest;
141 int e_seen = 0;
142 xassert(STRING_P(self));
144 // We'll modify the buffer, so we need our own copy.
145 ba = bytearray_as_string(state, string_get_data(self));
147 p = ba;
148 while (ISSPACE(*p)) p++;
149 n = p;
151 while (*p) {
152 if (*p == '_') {
153 p++;
154 } else {
155 if(*p == 'e' || *p == 'E') {
156 if(e_seen) {
157 *n = 0;
158 break;
160 e_seen = 1;
161 } else if(!(ISDIGIT(*p) || *p == '.' || *p == '-' || *p == '+')) {
162 *n = 0;
163 break;
166 *n++ = *p++;
169 *n = 0;
171 /* Some implementations of strtod() don't guarantee to
172 * set errno, so we need to reset it ourselves.
174 errno = 0;
176 value = strtod(ba, &rest);
177 if (errno == ERANGE) {
178 printf("Float %s out of range\n", ba);
181 free(ba);
184 return value;
187 static OBJECT tr_replace(STATE, OBJECT string, int bytes, unsigned char *str,
188 unsigned char *data, int size, int steps) {
189 if(size > bytes || RTEST(string_get_shared(string))) {
190 OBJECT ba;
192 ba = bytearray_new_dirty(state, size+1);
193 str = bytearray_byte_address(state, ba);
194 memset(str, 0, SIZE_OF_BODY(ba));
195 string_set_data(string, ba);
196 string_set_shared(string, Qnil);
199 memcpy(str, data, size);
200 str[size] = 0;
202 string_set_bytes(string, I2N(size));
203 string_set_characters(string, I2N(size));
205 return I2N(steps);
208 #define tr_assign(set, tr, last, c) { \
209 int _j, _i = set[c]; \
211 if(lim >= 0 && steps >= lim) { \
212 return tr_replace(state, string, bytes, str, tr, last, steps); \
215 if(_i < 0) { \
216 tr[last] = c; \
217 } else { \
218 last--; \
219 for(_j = _i+1; _j <= last; _j++) { \
220 set[tr[_j]]--; \
221 tr[_j-1] = tr[_j]; \
223 tr[last] = c; \
225 set[c] = last++; \
226 steps++; \
229 OBJECT string_tr_expand(STATE, OBJECT string, OBJECT limit) {
230 int i, start, bytes,
231 steps = 0, lim = -1,
232 last = 0, c, seq, max, set[256];
233 unsigned char *str, tr[256];
235 if(!NIL_P(limit)) {
236 lim = N2I(limit);
239 str = (unsigned char *)string_byte_address(state, string);
240 bytes = N2I(string_get_bytes(string));
241 start = bytes > 1 && str[0] == '^' ? 1 : 0;
242 memset(set, -1, sizeof(int) * 256);
244 for(i = start; i < bytes;) {
245 c = str[i];
246 seq = ++i < bytes ? str[i] : -1;
248 if(seq == '-') {
249 max = ++i < bytes ? str[i] : -1;
250 if(max >= 0) {
251 while(c <= max) {
252 tr_assign(set, tr, last, c);
253 c++;
255 i++;
256 } else {
257 tr_assign(set, tr, last, c);
258 tr_assign(set, tr, last, seq);
260 } else if(c == '\\' && seq >= 0) {
261 continue;
262 } else {
263 tr_assign(set, tr, last, c);
267 return tr_replace(state, string, bytes, str, tr, last, steps);
270 #define HashPrime 16777619
271 #define MASK_28 (((unsigned int)1<<28)-1)
273 unsigned int string_hash_str(unsigned char *bp, unsigned int sz) {
274 unsigned char *be;
275 unsigned int hv;
277 be = bp + sz;
279 hv = 0;
281 while(bp < be) {
282 hv *= HashPrime;
283 hv ^= *bp++;
285 hv = (hv>>28) ^ (hv & MASK_28);
287 return hv;
290 unsigned int string_hash_int(STATE, OBJECT self) {
291 unsigned char *bp;
292 unsigned int sz, h;
293 OBJECT data, hsh;
295 xassert(STRING_P(self));
296 data = string_get_data(self);
297 hsh = string_get_hash(self);
298 if(hsh != Qnil) {
299 return N2I(hsh);
301 bp = (unsigned char*)bytearray_byte_address(state, data);
302 sz = N2I(string_get_bytes(self));
304 h = string_hash_str(bp, sz);
305 string_set_hash(self, UI2N(h));
307 return h;
310 unsigned int string_hash_cstr(STATE, const char *bp) {
311 unsigned int sz = strlen(bp);
312 return string_hash_str((unsigned char*)bp, sz);
315 unsigned int string_hash_str_with_size(STATE, const char *bp, int size) {
316 return string_hash_str((unsigned char*)bp, size);
319 OBJECT string_to_sym(STATE, OBJECT self) {
320 xassert(STRING_P(self));
321 return symtbl_lookup(state, state->global->symbols, self);
324 int string_equal_p(STATE, OBJECT self, OBJECT other) {
325 OBJECT bytes;
327 bytes = string_get_bytes(self);
328 if(bytes != string_get_bytes(other)) return FALSE;
329 if(strncmp(string_byte_address(state, self), string_byte_address(state, other), N2I(bytes))) {
330 return FALSE;
333 return TRUE;