GC removed, readme updated
[libs.git] / src / Sylph / Core / String.cpp
blob7836afc76b4fcee4241923a089f34f7f073dd5bc
1 /*
2 * String.cpp
4 * Created on: 26-nov-2008
5 * Author: SeySayux
6 */
8 #include "String.h"
9 #include "Array.h"
10 #include "Hash.h"
11 #include "StringBuffer.h"
12 #include "Util.h"
14 #include <cctype>
15 #include <cstring>
17 #include <unicode/uchar.h>
19 #include <iostream>
21 SYLPH_BEGIN_NAMESPACE
23 String::String() {
24 strdata = new Data(0);
27 String::String(const char * orig) {
28 fromUtf8(orig);
31 String::String(const Array<uchar> orig) {
32 strdata = new Data(0);
33 strdata->data = orig.copy();
36 String::String(const std::string& orig) {
37 // std::string's always ascii...
38 fromAscii(orig.c_str());
41 String::String(const String& orig) {
42 // refcounted
43 this->strdata = orig.strdata;
44 this->strdata->refcount++;
47 String::String(const char c) {
48 strdata = new Data(1);
49 strdata->data[0] = c;
52 String::String(const bool b) {
53 fromAscii(b ? "true" : "false");
56 String::String(const sint i) {
57 size_t tmplen = sizeof (i) * 5; // long enough, i presume?
58 char * buf = new char[tmplen];
59 sprintf(buf, "%"S_FMT_I, i);
60 fromAscii(buf);
61 delete[] buf;
64 String::String(const suint i) {
65 size_t tmplen = sizeof (i) * 5; // long enough, i presume?
66 char * buf = new char[tmplen];
67 sprintf(buf, "%"S_FMT_UI, i);
68 fromAscii(buf);
69 delete[] buf;
72 String::String(const slong l) {
73 size_t tmplen = sizeof (l) * 5; // long enough, i presume?
74 char * buf = new char[tmplen];
75 sprintf(buf, "%"S_FMT_L, l);
76 fromAscii(buf);
77 delete[] buf;
80 String::String(const sulong l) {
81 size_t tmplen = sizeof (l) * 5; // long enough, i presume?
82 char * buf = new char[tmplen];
83 sprintf(buf, "%"S_FMT_UL, l);
84 fromAscii(buf);
85 delete[] buf;
88 String::String(const float f) {
89 size_t tmplen = sizeof (f) * 10; // long enough, i presume?
90 char * buf = new char[tmplen];
91 sprintf(buf, "%f", f);
92 fromAscii(buf);
93 delete[] buf;
96 String::String(const double d) {
97 size_t tmplen = sizeof (d) * 10; // long enough, i presume?
98 char * buf = new char[tmplen];
99 sprintf(buf, "%f", d);
100 fromAscii(buf);
101 delete[] buf;
104 String::~String() {
105 strdata->refcount--;
106 if (strdata->refcount == 0) {
107 delete strdata;
108 strdata = NULL;
112 std::size_t String::length() const {
113 return strdata->data.length;
116 const uchar String::at(std::size_t idx) const {
117 return strdata->data[idx];
120 const char * String::ascii() const {
121 // all non-ascii chars will be converted to '?' literals.
122 char * buf = new char[length()+1];
123 for(idx_t i = 0; i < length(); i++) {
124 if(at(i) > 0x7F) buf[i] = '?';
125 else buf[i] = at(i);
127 return buf;
130 const char * String::utf8() const {
131 // In the best case, the the buffer need to be length()+1. In the worst
132 // case, it's 3 * length() + 1. Always prepare for the worst ;)
133 char * buf = new char[3*length()+1];
134 size_t buflen = 0;
135 for(idx_t i = 0; i < length(); i++) {
136 if(at(i) <= 0x7F) {
137 // ascii
138 buf[buflen] = at(i);
139 buflen++;
140 } else if(at(i) < 0x07FF) {
141 // 2-byte
142 buf[buflen] = 0xC0 | ((at(i) & 0x07C0) >> 6);
143 buf[buflen+1] = 0x80 | (at(i) & 0x3F);
144 buflen += 2;
145 } else {
146 // 3-byte
147 buf[buflen] = 0xE0 | ((at(i) & 0xF000) >> 12);
148 buf[buflen+1] = 0x80 | ((at(i) & 0x0FC0) >> 6);
149 buf[buflen+2] = 0x80 | (at(i) & 0x3F);
150 buflen += 3;
154 // now copy it to the final buffer...
155 char * final = new char[buflen+1];
156 carraycopy(buf,0,final,0,buflen);
157 final[buflen] = 0;
158 return final;
161 const Array<uchar> String::utf16() const {
162 return strdata->data;
165 String String::toLowerCase() const {
166 StringBuffer buf;
167 for(idx_t i = 0; i < length(); i++) {
168 buf << u_tolower(at(i));
170 return buf;
173 String String::toUpperCase() const {
174 StringBuffer buf;
175 for(idx_t i = 0; i < length(); i++) {
176 buf << u_toupper(at(i));
178 return buf;
181 bool String::equalsIgnoreCase(const String other) const {
182 return this->toLowerCase() == other.toLowerCase();
185 bool String::endsWith(const String other) const {
186 if (this->length() < other.length()) return false;
187 suint count = 0;
188 for (idx_t i = this->length() - 1; i > this->length() - other.length(); i--) {
189 if (this->at(i) == other.at(i)) count++;
190 else break;
192 return count == other.length();
195 bool String::startsWith(const String other) const {
196 if (this->length() < other.length()) return false;
197 suint count = 0;
198 for (idx_t i = 0; i < other.length(); i++) {
199 if (this->at(i) == other.at(i)) count++;
200 else break;
202 return count == other.length();
205 bool String::contains(const String other) const {
206 return indexOf(other) != -1;
209 String String::trim() const {
210 size_t beginct = 0;
211 size_t endct = length() - 1;
212 for (idx_t i = 0; i < this->length(); i++) {
213 if (isspace(this->at(i))) beginct++;
215 for (idx_t i = length() - 1; i >= 0; i--) {
216 if (isspace(this->at(i))) endct--;
218 return substring(beginct, endct);
221 String String::substring(idx_t begin) const {
222 return substring(begin, length());
225 String String::substring(idx_t begin, idx_t end) const {
226 return String(strdata->data[range(begin, end)]);
229 sidx_t String::indexOf(const String substr, idx_t start) const {
230 if (this->length() - start < substr.length()) return -1;
231 suint currentidx = 0;
232 suint idxexport = 0;
233 for (idx_t i = start; i < substr.length(); i++) {
234 if (this->at(i) == substr.at(currentidx)) {
235 currentidx++;
236 } else {
237 currentidx = 0;
239 if (currentidx == substr.length()) {
240 idxexport = i;
241 break;
244 return currentidx == substr.length() ?
245 this->length() - idxexport - substr.length() : -1;
248 sidx_t String::lastIndexOf(const String substr) const {
249 return lastIndexOf(substr, length() - 1);
252 sidx_t String::lastIndexOf(const String substr, idx_t start) const {
253 if (start < substr.length()) return -1;
254 suint currentidx = substr.length() - 1;
255 suint idxexport = 0;
256 for (idx_t i = start; i >= 0; i--) {
257 if (this->at(i) == substr.at(currentidx)) {
258 currentidx--;
259 } else {
260 currentidx = substr.length() - 1;
262 if (currentidx == 0) {
263 idxexport = i;
264 break;
267 return currentidx == 0 ? idxexport : -1;
270 String String::copy() const {
271 return strdata->data.copy();
274 bool String::merge(String other) const {
275 if (other != *this) return false;
276 else {
277 this->strdata->refcount += other.strdata->refcount;
278 delete other.strdata;
279 other.strdata = this->strdata;
280 return true;
284 sint String::hashCode() const {
285 suint hash = 0;
286 suint x = 0;
287 suint i = 0;
288 uchar * b = strdata->data.carray();
290 for(i = 0; i < length(); b++, i++)
292 hash = (hash << 4) + (*b);
293 if((x = hash & 0xF0000000L) != 0)
295 hash ^= (x >> 24);
297 hash &= ~x;
300 return hash;
303 String String::fromHex(int i, bool up) {
304 size_t tmplen = sizeof (i) * 5; // long enough, i presume?
305 char * buf = new char[tmplen];
306 sprintf(buf, up ? "%#X" : "%#x", i);
307 String toReturn;
308 delete toReturn.strdata;
309 toReturn.fromAscii(buf);
310 delete[] buf;
311 return toReturn;
314 String String::fromOct(int i, bool up) {
315 size_t tmplen = sizeof (i) * 5; // long enough, i presume?
316 char * buf = new char[tmplen];
317 sprintf(buf, up ? "%#O" : "%#o", i);
318 String toReturn;
319 delete toReturn.strdata;
320 toReturn.fromAscii(buf);
321 delete[] buf;
322 return toReturn;
325 String String::fromSci(float f, bool up) {
326 size_t tmplen = sizeof (f) * 10; // long enough, i presume?
327 char * buf = new char[tmplen];
328 sprintf(buf, up ? "%#E" : "%#e", f);
329 String toReturn;
330 delete toReturn.strdata;
331 toReturn.fromAscii(buf);
332 delete[] buf;
333 return toReturn;
336 String String::fromSci(double d, bool up) {
337 size_t tmplen = sizeof (d) * 10; // long enough, i presume?
338 char * buf = new char[tmplen];
339 sprintf(buf, up ? "%#E" : "%#e", d);
340 String toReturn;
341 delete toReturn.strdata;
342 toReturn.fromAscii(buf);
343 delete[] buf;
344 return toReturn;
347 bool String::boolValue() const {
348 return (*this&lc) == "true";
351 sint String::intValue() const {
352 sint i = 0;
353 sscanf(this->ascii(), "%"S_FMT_I, &i);
354 return i;
357 suint String::uintValue() const {
358 suint i = 0;
359 sscanf(this->ascii(), "%"S_FMT_UI, &i);
360 return i;
363 slong String::longValue() const {
364 slong l = 0;
365 sscanf(this->ascii(), "%"S_FMT_L, &l);
366 return l;
369 sulong String::ulongValue() const {
370 sulong l = 0;
371 sscanf(this->ascii(), "%"S_FMT_UL, &l);
372 return l;
375 float String::floatValue() const {
376 float f = 0;
377 sscanf(this->ascii(), "%f", &f);
378 return f;
381 double String::doubleValue() const {
382 double d = 0;
383 sscanf(this->ascii(), "%lf", &d);
384 return d;
387 const String& String::operator=(const char * orig) const {
388 strdata->refcount--;
389 if (strdata->refcount == 0) delete strdata;
390 fromUtf8(orig);
393 const String& String::operator=(const std::string & orig) const {
394 strdata->refcount--;
395 if (strdata->refcount == 0) delete strdata;
396 fromAscii(orig.c_str());
399 const String& String::operator=(const String orig) const {
400 strdata->refcount--;
401 if (strdata->refcount == 0) delete strdata;
402 strdata = orig.strdata;
403 strdata->refcount++;
406 String::operator const char *() const {
407 return utf8();
410 String::operator std::string() const {
411 return std::string(ascii());
414 void String::fromAscii(const char* ascii) const {
415 // no conversion required. Just plain ol' copy.
416 strdata = new Data(std::strlen(ascii));
417 for (idx_t i = 0; i < std::strlen(ascii); i++) {
418 strdata->data[i] = ascii[i];
422 void String::fromUtf8(const char* unicode) const {
423 size_t len = std::strlen(unicode);
424 StringBuffer buf;
425 uchar current;
426 byte bytecount = 0;
427 for (idx_t i = 0; i < len; i++) {
428 unsigned char univalue = static_cast<unsigned char>(unicode[i]);
429 switch (bytecount) {
430 case 0:
431 if (univalue <= 0x7F) {
432 // ascii
433 buf << univalue;
434 } else if ((univalue | 0x1F) == 0xDF) {
435 // start of 2-byte char
436 bytecount = 1;
437 current = (univalue & 0x1F) << 6;
438 } else if ((univalue | 0x0F) == 0xEF) {
439 // start of 3-byte char
440 bytecount = 2;
441 current = (univalue & 0x0F) << 12;
442 } else if ((univalue | 0x07) == 0xF7) {
443 // start of 4-byte char, unsupported!
444 buf << 0xFFFD; // That's such a nice ? in a black diamond.
445 i+=3;
446 } else {
447 // invalid!
448 buf << 0xFFFD;
450 break;
451 case 1:
452 if((univalue | 0x3F) != 0xBF) {
453 // invalid followup
454 bytecount = 0;
455 buf << 0xFFFD;
456 } else {
457 current +=(univalue & 0x3F);
458 buf << current;
459 bytecount = 0;
461 break;
462 case 2: case 3:
463 if((univalue | 0x3F) != 0xBF) {
464 // invalid followup
465 i += bytecount -1;
466 bytecount = 0;
467 buf << 0xFFFD;
468 } else {
469 current +=(univalue & 0x3F) << 6;
470 bytecount--;
472 break;
473 default:
474 // okaay... somehow, an error occured.
475 // Todo: add a 'this is plainly impossible' statement.
476 break;
480 String tmp = buf.toString();
481 strdata = tmp.strdata;
482 strdata->refcount++;
485 bool operator==(const String lhs, const String rhs) {
486 if (lhs.length() != rhs.length()) return false;
487 for (idx_t i = 0; i < lhs.length(); i++) {
488 if (lhs.at(i) != rhs.at(i)) return false;
490 return true;
493 bool operator<(const String lhs, const String rhs) {
494 String l = lhs.length() < rhs.length() ? lhs : rhs;
495 String r = lhs.length() < rhs.length() ? rhs : lhs;
496 for (idx_t i = 0; i < l.length(); i++) {
497 if (l.at(i) == r.at(i)) continue;
498 else if (l.at(i) > l.at(i)) return false;
499 else return true;
501 return true;
504 const String& String::operator+=(const String rhs) const {
505 Data * newdata = new Data(length() + rhs.length());
506 arraycopy(strdata->data, 0, newdata->data, 0, length());
507 arraycopy(rhs.strdata->data, 0, newdata->data, length(), rhs.length());
508 strdata->refcount--;
509 if (strdata->refcount == 0) delete strdata;
510 strdata = newdata;
511 return *this;
514 String operator+(const String lhs, const String rhs) {
515 return String(lhs) += rhs;
518 String operator&(const String lhs, String(*rhs)(const String)) {
519 return rhs(lhs);
522 String operator&(String(*lhs)(const String), const String rhs) {
523 return lhs(rhs);
526 String operator*(const String lhs, const std::size_t len) {
527 StringBuffer buf;
528 for (int i = 0; i < len; i++) {
529 buf << lhs;
531 return buf;
534 std::ostream & operator<<(std::ostream& lhs, const String rhs) {
535 return lhs << rhs.utf8();
538 String lc(const String rhs) {
539 return rhs.toLowerCase();
542 String uc(const String rhs) {
543 return rhs.toUpperCase();
546 String t(const String rhs) {
547 return rhs.trim();
550 SYLPH_END_NAMESPACE