4 * Created on: 26-nov-2008
11 #include "StringBuffer.h"
17 #include <unicode/uchar.h>
24 strdata
= new Data(0);
27 String::String(const char * orig
) {
31 String::String(const Array
<uchar
> orig
) {
32 strdata
= new Data(0);
33 strdata
->data
= orig
.copy();
36 String::String(const std::string
& orig
) {
37 // std::string's always ascii...
38 fromAscii(orig
.c_str());
41 String::String(const String
& orig
) {
43 this->strdata
= orig
.strdata
;
44 this->strdata
->refcount
++;
47 String::String(const char c
) {
48 strdata
= new Data(1);
52 String::String(const bool b
) {
53 fromAscii(b
? "true" : "false");
56 String::String(const sint i
) {
57 size_t tmplen
= sizeof (i
) * 5; // long enough, i presume?
58 char * buf
= new char[tmplen
];
59 sprintf(buf
, "%"S_FMT_I
, i
);
64 String::String(const suint i
) {
65 size_t tmplen
= sizeof (i
) * 5; // long enough, i presume?
66 char * buf
= new char[tmplen
];
67 sprintf(buf
, "%"S_FMT_UI
, i
);
72 String::String(const slong l
) {
73 size_t tmplen
= sizeof (l
) * 5; // long enough, i presume?
74 char * buf
= new char[tmplen
];
75 sprintf(buf
, "%"S_FMT_L
, l
);
80 String::String(const sulong l
) {
81 size_t tmplen
= sizeof (l
) * 5; // long enough, i presume?
82 char * buf
= new char[tmplen
];
83 sprintf(buf
, "%"S_FMT_UL
, l
);
88 String::String(const float f
) {
89 size_t tmplen
= sizeof (f
) * 10; // long enough, i presume?
90 char * buf
= new char[tmplen
];
91 sprintf(buf
, "%f", f
);
96 String::String(const double d
) {
97 size_t tmplen
= sizeof (d
) * 10; // long enough, i presume?
98 char * buf
= new char[tmplen
];
99 sprintf(buf
, "%f", d
);
106 if (strdata
->refcount
== 0) {
112 std::size_t String::length() const {
113 return strdata
->data
.length
;
116 const uchar
String::at(std::size_t idx
) const {
117 return strdata
->data
[idx
];
120 const char * String::ascii() const {
121 // all non-ascii chars will be converted to '?' literals.
122 char * buf
= new char[length()+1];
123 for(idx_t i
= 0; i
< length(); i
++) {
124 if(at(i
) > 0x7F) buf
[i
] = '?';
130 const char * String::utf8() const {
131 // In the best case, the the buffer need to be length()+1. In the worst
132 // case, it's 3 * length() + 1. Always prepare for the worst ;)
133 char * buf
= new char[3*length()+1];
135 for(idx_t i
= 0; i
< length(); i
++) {
140 } else if(at(i
) < 0x07FF) {
142 buf
[buflen
] = 0xC0 | ((at(i
) & 0x07C0) >> 6);
143 buf
[buflen
+1] = 0x80 | (at(i
) & 0x3F);
147 buf
[buflen
] = 0xE0 | ((at(i
) & 0xF000) >> 12);
148 buf
[buflen
+1] = 0x80 | ((at(i
) & 0x0FC0) >> 6);
149 buf
[buflen
+2] = 0x80 | (at(i
) & 0x3F);
154 // now copy it to the final buffer...
155 char * final
= new char[buflen
+1];
156 carraycopy(buf
,0,final
,0,buflen
);
161 const Array
<uchar
> String::utf16() const {
162 return strdata
->data
;
165 String
String::toLowerCase() const {
167 for(idx_t i
= 0; i
< length(); i
++) {
168 buf
<< u_tolower(at(i
));
173 String
String::toUpperCase() const {
175 for(idx_t i
= 0; i
< length(); i
++) {
176 buf
<< u_toupper(at(i
));
181 bool String::equalsIgnoreCase(const String other
) const {
182 return this->toLowerCase() == other
.toLowerCase();
185 bool String::endsWith(const String other
) const {
186 if (this->length() < other
.length()) return false;
188 for (idx_t i
= this->length() - 1; i
> this->length() - other
.length(); i
--) {
189 if (this->at(i
) == other
.at(i
)) count
++;
192 return count
== other
.length();
195 bool String::startsWith(const String other
) const {
196 if (this->length() < other
.length()) return false;
198 for (idx_t i
= 0; i
< other
.length(); i
++) {
199 if (this->at(i
) == other
.at(i
)) count
++;
202 return count
== other
.length();
205 bool String::contains(const String other
) const {
206 return indexOf(other
) != -1;
209 String
String::trim() const {
211 size_t endct
= length() - 1;
212 for (idx_t i
= 0; i
< this->length(); i
++) {
213 if (isspace(this->at(i
))) beginct
++;
215 for (idx_t i
= length() - 1; i
>= 0; i
--) {
216 if (isspace(this->at(i
))) endct
--;
218 return substring(beginct
, endct
);
221 String
String::substring(idx_t begin
) const {
222 return substring(begin
, length());
225 String
String::substring(idx_t begin
, idx_t end
) const {
226 return String(strdata
->data
[range(begin
, end
)]);
229 sidx_t
String::indexOf(const String substr
, idx_t start
) const {
230 if (this->length() - start
< substr
.length()) return -1;
231 suint currentidx
= 0;
233 for (idx_t i
= start
; i
< substr
.length(); i
++) {
234 if (this->at(i
) == substr
.at(currentidx
)) {
239 if (currentidx
== substr
.length()) {
244 return currentidx
== substr
.length() ?
245 this->length() - idxexport
- substr
.length() : -1;
248 sidx_t
String::lastIndexOf(const String substr
) const {
249 return lastIndexOf(substr
, length() - 1);
252 sidx_t
String::lastIndexOf(const String substr
, idx_t start
) const {
253 if (start
< substr
.length()) return -1;
254 suint currentidx
= substr
.length() - 1;
256 for (idx_t i
= start
; i
>= 0; i
--) {
257 if (this->at(i
) == substr
.at(currentidx
)) {
260 currentidx
= substr
.length() - 1;
262 if (currentidx
== 0) {
267 return currentidx
== 0 ? idxexport
: -1;
270 String
String::copy() const {
271 return strdata
->data
.copy();
274 bool String::merge(String other
) const {
275 if (other
!= *this) return false;
277 this->strdata
->refcount
+= other
.strdata
->refcount
;
278 delete other
.strdata
;
279 other
.strdata
= this->strdata
;
284 sint
String::hashCode() const {
288 uchar
* b
= strdata
->data
.carray();
290 for(i
= 0; i
< length(); b
++, i
++)
292 hash
= (hash
<< 4) + (*b
);
293 if((x
= hash
& 0xF0000000L
) != 0)
303 String
String::fromHex(int i
, bool up
) {
304 size_t tmplen
= sizeof (i
) * 5; // long enough, i presume?
305 char * buf
= new char[tmplen
];
306 sprintf(buf
, up
? "%#X" : "%#x", i
);
308 delete toReturn
.strdata
;
309 toReturn
.fromAscii(buf
);
314 String
String::fromOct(int i
, bool up
) {
315 size_t tmplen
= sizeof (i
) * 5; // long enough, i presume?
316 char * buf
= new char[tmplen
];
317 sprintf(buf
, up
? "%#O" : "%#o", i
);
319 delete toReturn
.strdata
;
320 toReturn
.fromAscii(buf
);
325 String
String::fromSci(float f
, bool up
) {
326 size_t tmplen
= sizeof (f
) * 10; // long enough, i presume?
327 char * buf
= new char[tmplen
];
328 sprintf(buf
, up
? "%#E" : "%#e", f
);
330 delete toReturn
.strdata
;
331 toReturn
.fromAscii(buf
);
336 String
String::fromSci(double d
, bool up
) {
337 size_t tmplen
= sizeof (d
) * 10; // long enough, i presume?
338 char * buf
= new char[tmplen
];
339 sprintf(buf
, up
? "%#E" : "%#e", d
);
341 delete toReturn
.strdata
;
342 toReturn
.fromAscii(buf
);
347 bool String::boolValue() const {
348 return (*this&lc
) == "true";
351 sint
String::intValue() const {
353 sscanf(this->ascii(), "%"S_FMT_I
, &i
);
357 suint
String::uintValue() const {
359 sscanf(this->ascii(), "%"S_FMT_UI
, &i
);
363 slong
String::longValue() const {
365 sscanf(this->ascii(), "%"S_FMT_L
, &l
);
369 sulong
String::ulongValue() const {
371 sscanf(this->ascii(), "%"S_FMT_UL
, &l
);
375 float String::floatValue() const {
377 sscanf(this->ascii(), "%f", &f
);
381 double String::doubleValue() const {
383 sscanf(this->ascii(), "%lf", &d
);
387 const String
& String::operator=(const char * orig
) const {
389 if (strdata
->refcount
== 0) delete strdata
;
393 const String
& String::operator=(const std::string
& orig
) const {
395 if (strdata
->refcount
== 0) delete strdata
;
396 fromAscii(orig
.c_str());
399 const String
& String::operator=(const String orig
) const {
401 if (strdata
->refcount
== 0) delete strdata
;
402 strdata
= orig
.strdata
;
406 String::operator const char *() const {
410 String::operator std::string() const {
411 return std::string(ascii());
414 void String::fromAscii(const char* ascii
) const {
415 // no conversion required. Just plain ol' copy.
416 strdata
= new Data(std::strlen(ascii
));
417 for (idx_t i
= 0; i
< std::strlen(ascii
); i
++) {
418 strdata
->data
[i
] = ascii
[i
];
422 void String::fromUtf8(const char* unicode
) const {
423 size_t len
= std::strlen(unicode
);
427 for (idx_t i
= 0; i
< len
; i
++) {
428 unsigned char univalue
= static_cast<unsigned char>(unicode
[i
]);
431 if (univalue
<= 0x7F) {
434 } else if ((univalue
| 0x1F) == 0xDF) {
435 // start of 2-byte char
437 current
= (univalue
& 0x1F) << 6;
438 } else if ((univalue
| 0x0F) == 0xEF) {
439 // start of 3-byte char
441 current
= (univalue
& 0x0F) << 12;
442 } else if ((univalue
| 0x07) == 0xF7) {
443 // start of 4-byte char, unsupported!
444 buf
<< 0xFFFD; // That's such a nice ? in a black diamond.
452 if((univalue
| 0x3F) != 0xBF) {
457 current
+=(univalue
& 0x3F);
463 if((univalue
| 0x3F) != 0xBF) {
469 current
+=(univalue
& 0x3F) << 6;
474 // okaay... somehow, an error occured.
475 // Todo: add a 'this is plainly impossible' statement.
480 String tmp
= buf
.toString();
481 strdata
= tmp
.strdata
;
485 bool operator==(const String lhs
, const String rhs
) {
486 if (lhs
.length() != rhs
.length()) return false;
487 for (idx_t i
= 0; i
< lhs
.length(); i
++) {
488 if (lhs
.at(i
) != rhs
.at(i
)) return false;
493 bool operator<(const String lhs
, const String rhs
) {
494 String l
= lhs
.length() < rhs
.length() ? lhs
: rhs
;
495 String r
= lhs
.length() < rhs
.length() ? rhs
: lhs
;
496 for (idx_t i
= 0; i
< l
.length(); i
++) {
497 if (l
.at(i
) == r
.at(i
)) continue;
498 else if (l
.at(i
) > l
.at(i
)) return false;
504 const String
& String::operator+=(const String rhs
) const {
505 Data
* newdata
= new Data(length() + rhs
.length());
506 arraycopy(strdata
->data
, 0, newdata
->data
, 0, length());
507 arraycopy(rhs
.strdata
->data
, 0, newdata
->data
, length(), rhs
.length());
509 if (strdata
->refcount
== 0) delete strdata
;
514 String
operator+(const String lhs
, const String rhs
) {
515 return String(lhs
) += rhs
;
518 String
operator&(const String lhs
, String(*rhs
)(const String
)) {
522 String
operator&(String(*lhs
)(const String
), const String rhs
) {
526 String
operator*(const String lhs
, const std::size_t len
) {
528 for (int i
= 0; i
< len
; i
++) {
534 std::ostream
& operator<<(std::ostream
& lhs
, const String rhs
) {
535 return lhs
<< rhs
.utf8();
538 String
lc(const String rhs
) {
539 return rhs
.toLowerCase();
542 String
uc(const String rhs
) {
543 return rhs
.toUpperCase();
546 String
t(const String rhs
) {