Add kludge for IntelliJ IDEA transients
[notion.git] / ioncore / strings.c
bloba010180ed146cfa953c2a1dd948278d12738631f
1 /*
2 * ion/ioncore/strings.c
4 * Copyright (c) Tuomo Valkonen 1999-2009.
6 * See the included file LICENSE for details.
7 */
9 #include <libtu/output.h>
10 #include <libtu/misc.h>
11 #include <string.h>
12 #include <regex.h>
13 #include "log.h"
14 #include "common.h"
15 #include "global.h"
16 #include "strings.h"
19 /*{{{ String scanning */
22 wchar_t str_wchar_at(char *p, int max)
24 wchar_t wc;
25 if(mbtowc(&wc, p, max)>0)
26 return wc;
27 return 0;
31 char *str_stripws(char *p)
33 mbstate_t ps;
34 wchar_t wc;
35 int first=-1, pos=0;
36 int n=strlen(p);
37 int ret;
39 memset(&ps, 0, sizeof(ps));
41 while(1){
42 ret=mbrtowc(&wc, p+pos, n-pos, &ps);
43 if(ret<=0)
44 break;
45 if(!iswspace(wc))
46 break;
47 pos+=ret;
50 if(pos!=0)
51 memmove(p, p+pos, n-pos+1);
53 if(ret<=0)
54 return p;
56 pos=ret;
58 while(1){
59 ret=mbrtowc(&wc, p+pos, n-pos, &ps);
60 if(ret<=0)
61 break;
62 if(iswspace(wc)){
63 if(first==-1)
64 first=pos;
65 }else{
66 first=-1;
68 pos+=ret;
71 if(first!=-1)
72 p[first]='\0';
74 return p;
78 int str_prevoff(const char *p, int pos)
80 if(ioncore_g.enc_sb)
81 return (pos>0 ? 1 : 0);
83 if(ioncore_g.enc_utf8){
84 int opos=pos;
86 while(pos>0){
87 pos--;
88 if((p[pos]&0xC0)!=0x80)
89 break;
91 return opos-pos;
94 assert(ioncore_g.use_mb);
96 /* *sigh* */
97 int l, prev=0;
98 mbstate_t ps;
100 memset(&ps, 0, sizeof(ps));
102 while(1){
103 l=mbrlen(p+prev, pos-prev, &ps);
104 if(l<0){
105 warn(TR("Invalid multibyte string."));
106 return 0;
108 if(prev+l>=pos)
109 return pos-prev;
110 prev+=l;
117 int str_nextoff(const char *p, int opos)
119 if(ioncore_g.enc_sb)
120 return (*(p+opos)=='\0' ? 0 : 1);
122 if(ioncore_g.enc_utf8){
123 int pos=opos;
125 while(p[pos]){
126 pos++;
127 if((p[pos]&0xC0)!=0x80)
128 break;
130 return pos-opos;
133 assert(ioncore_g.use_mb);
135 mbstate_t ps;
136 int l;
137 memset(&ps, 0, sizeof(ps));
139 l=mbrlen(p+opos, strlen(p+opos), &ps);
140 if(l<0){
141 warn(TR("Invalid multibyte string."));
142 return 0;
144 return l;
149 int str_len(const char *p)
151 if(ioncore_g.enc_sb)
152 return strlen(p);
154 if(ioncore_g.enc_utf8){
155 int len=0;
157 while(*p){
158 if(((*p)&0xC0)!=0x80)
159 len++;
160 p++;
162 return len;
165 assert(ioncore_g.use_mb);
167 mbstate_t ps;
168 int len=0, bytes=strlen(p), l;
169 memset(&ps, 0, sizeof(ps));
171 while(bytes>0){
172 l=mbrlen(p, bytes, &ps);
173 if(l<=0){
174 warn(TR("Invalid multibyte string."));
175 break;
177 len++;
178 bytes-=l;
179 p += l;
181 return len;
186 /*}}}*/
189 /*{{{ Title shortening */
192 static char *scatn3(const char *p1, int l1,
193 const char *p2, int l2,
194 const char *p3, int l3)
196 char *p=ALLOC_N(char, l1+l2+l3+1);
198 if(p!=NULL){
199 strncat(p, p1, l1);
200 strncat(p, p2, l2);
201 strncat(p, p3, l3);
203 return p;
206 INTRSTRUCT(SR);
208 DECLSTRUCT(SR){
209 regex_t re;
210 char *rule;
211 SR *next, *prev;
212 bool always;
216 static SR *shortenrules=NULL;
219 /*EXTL_DOC
220 * Add a rule describing how too long titles should be shortened to fit in tabs.
221 * The regular expression \var{rx} (POSIX, not Lua!) is used to match titles
222 * and when \var{rx} matches, \var{rule} is attempted to use as a replacement
223 * for title. If \var{always} is set, the rule is used even if no shortening
224 * is necessary.
226 * Similarly to sed's 's' command, \var{rule} may contain characters that are
227 * inserted in the resulting string and specials as follows:
229 * \begin{tabularx}{\linewidth}{lX}
230 * \tabhead{Special & Description}
231 * \$0 & Place the original string here. \\
232 * \$1 to \$9 & Insert n:th capture here (as usual,captures are surrounded
233 * by parentheses in the regex). \\
234 * \$| & Alternative shortening separator. The shortening described
235 * before the first this kind of separator is tried first and
236 * if it fails to make the string short enough, the next is
237 * tried, and so on. \\
238 * \$< & Remove characters on the left of this marker to shorten the
239 * string. \\
240 * \$> & Remove characters on the right of this marker to shorten the
241 * string. Only the first \$< or \$> within an alternative
242 * shortening is used. \\
243 * \end{tabularx}
245 EXTL_EXPORT
246 bool ioncore_defshortening(const char *rx, const char *rule, bool always)
248 SR *si;
249 int ret;
250 #define ERRBUF_SIZE 256
251 static char errbuf[ERRBUF_SIZE];
253 if(rx==NULL || rule==NULL)
254 return FALSE;
256 si=ALLOC(SR);
258 if(si==NULL)
259 return FALSE;
261 ret=regcomp(&(si->re), rx, REG_EXTENDED);
263 if(ret!=0){
264 errbuf[0]='\0';
265 regerror(ret, &(si->re), errbuf, ERRBUF_SIZE);
266 warn(TR("Error compiling regular expression: %s"), errbuf);
267 goto fail2;
270 si->rule=scopy(rule);
271 si->always=always;
273 if(si->rule==NULL)
274 goto fail;
276 LINK_ITEM(shortenrules, si, next, prev);
278 return TRUE;
280 fail:
281 regfree(&(si->re));
282 fail2:
283 free(si);
284 return FALSE;
288 static char *shorten(GrBrush *brush, const char *str, uint maxw,
289 const char *rule, int nmatch, regmatch_t *pmatch)
291 char *s;
292 int rulelen, slen, i, j, k, ll;
293 int strippt=0;
294 int stripdir=-1;
295 bool more=FALSE;
297 /* Ensure matches are at character boundaries */
298 if(!ioncore_g.enc_sb){
299 int pos=0, len, strl;
300 mbstate_t ps;
301 memset(&ps, 0, sizeof(ps));
303 strl=strlen(str);
305 while(pos<strl){
306 len=mbrtowc(NULL, str+pos, strl-pos, &ps);
307 if(len<0){
308 /* Invalid multibyte string */
309 return scopy("???");
311 if(len==0)
312 break;
313 for(i=0; i<nmatch; i++){
314 if(pmatch[i].rm_so>pos && pmatch[i].rm_so<pos+len)
315 pmatch[i].rm_so=pos+len;
316 if(pmatch[i].rm_eo>pos && pmatch[i].rm_eo<pos+len)
317 pmatch[i].rm_eo=pos;
319 pos+=len;
323 /* Stupid alloc rule that wastes space */
324 rulelen=strlen(rule);
325 slen=rulelen;
327 for(i=0; i<nmatch; i++){
328 if(pmatch[i].rm_so==-1)
329 continue;
330 slen+=(pmatch[i].rm_eo-pmatch[i].rm_so);
333 s=ALLOC_N(char, slen);
335 if(s==NULL)
336 return NULL;
339 more=FALSE;
340 j=0;
341 strippt=0;
342 stripdir=-1;
344 for(i=0; i<rulelen; i++){
345 if(rule[i]!='$'){
346 s[j++]=rule[i];
347 continue;
350 i++;
352 if(rule[i]=='|'){
353 rule=rule+i+1;
354 rulelen=rulelen-i-1;
355 more=TRUE;
356 break;
359 if(rule[i]=='$'){
360 s[j++]='$';
361 continue;
364 if(rule[i]=='<'){
365 strippt=j;
366 stripdir=-1;
367 continue;
370 if(rule[i]=='>'){
371 strippt=j;
372 stripdir=1;
373 continue;
376 if(rule[i]>='0' && rule[i]<='9'){
377 k=(int)(rule[i]-'0');
378 if(k>=nmatch)
379 continue;
380 if(pmatch[k].rm_so==-1)
381 continue;
382 ll=(pmatch[k].rm_eo-pmatch[k].rm_so);
383 strncpy(s+j, str+pmatch[k].rm_so, ll);
384 j+=ll;
388 slen=j;
389 s[slen]='\0';
391 i=strippt;
392 j=strippt;
394 /* shorten */
396 uint bl=grbrush_get_text_width(brush, s, i);
397 uint el=grbrush_get_text_width(brush, s+j, slen-j);
399 while(1){
400 /* el+bl may not be the actual length, but close enough. */
401 if(el+bl<=maxw){
402 memmove(s+i, s+j, slen-j+1);
403 return s;
406 if(stripdir==-1){
407 ll=str_prevoff(s, i);
408 if(ll==0)
409 break;
410 i-=ll;
411 bl=grbrush_get_text_width(brush, s, i);
412 }else{
413 ll=str_nextoff(s, j);
414 if(ll==0)
415 break;
416 j+=ll;
417 el=grbrush_get_text_width(brush, s+j, slen-j);
421 }while(more);
423 free(s);
425 return NULL;
429 char *grbrush_make_label(GrBrush *brush, const char *str, uint maxw)
431 size_t nmatch=10;
432 regmatch_t pmatch[10];
433 SR *rule;
434 int ret;
435 char *retstr;
436 bool fits=FALSE;
438 if(grbrush_get_text_width(brush, str, strlen(str))<=maxw)
439 fits=TRUE;
441 /*return scopy(str);*/
443 for(rule=shortenrules; rule!=NULL; rule=rule->next){
444 if(fits && !rule->always)
445 continue;
446 ret=regexec(&(rule->re), str, nmatch, pmatch, 0);
447 if(ret!=0)
448 continue;
449 retstr=shorten(brush, str, maxw, rule->rule, nmatch, pmatch);
450 goto rettest;
453 if(fits){
454 retstr=scopy(str);
455 }else{
456 pmatch[0].rm_so=0;
457 pmatch[0].rm_eo=strlen(str)-1;
458 retstr=shorten(brush, str, maxw, "$1$<...", 1, pmatch);
461 rettest:
462 if(retstr!=NULL)
463 return retstr;
464 return scopy("");
468 /*}}}*/