Better error message
[notion/jeffpc.git] / ioncore / strings.c
blob6fef149e3d2cdc896f030262724a69e736426d70
1 /*
2 * ion/ioncore/strings.c
4 * Copyright (c) Tuomo Valkonen 1999-2009.
6 * See the included file LICENSE for details.
7 */
9 #include <libtu/output.h>
10 #include <libtu/misc.h>
11 #include <string.h>
12 #include <regex.h>
13 #include "common.h"
14 #include "global.h"
15 #include "strings.h"
18 /*{{{ String scanning */
21 wchar_t str_wchar_at(char *p, int max)
23 wchar_t wc;
24 if(mbtowc(&wc, p, max)>0)
25 return wc;
26 return 0;
30 char *str_stripws(char *p)
32 mbstate_t ps;
33 wchar_t wc;
34 int first=-1, pos=0;
35 int n=strlen(p);
36 int ret;
38 memset(&ps, 0, sizeof(ps));
40 while(1){
41 ret=mbrtowc(&wc, p+pos, n-pos, &ps);
42 if(ret<=0)
43 break;
44 if(!iswspace(wc))
45 break;
46 pos+=ret;
49 if(pos!=0)
50 memmove(p, p+pos, n-pos+1);
52 if(ret<=0)
53 return p;
55 pos=ret;
57 while(1){
58 ret=mbrtowc(&wc, p+pos, n-pos, &ps);
59 if(ret<=0)
60 break;
61 if(iswspace(wc)){
62 if(first==-1)
63 first=pos;
64 }else{
65 first=-1;
67 pos+=ret;
70 if(first!=-1)
71 p[first]='\0';
73 return p;
77 int str_prevoff(const char *p, int pos)
79 if(ioncore_g.enc_sb)
80 return (pos>0 ? 1 : 0);
82 if(ioncore_g.enc_utf8){
83 int opos=pos;
85 while(pos>0){
86 pos--;
87 if((p[pos]&0xC0)!=0x80)
88 break;
90 return opos-pos;
93 assert(ioncore_g.use_mb);
95 /* *sigh* */
96 int l, prev=0;
97 mbstate_t ps;
99 memset(&ps, 0, sizeof(ps));
101 while(1){
102 l=mbrlen(p+prev, pos-prev, &ps);
103 if(l<0){
104 warn(TR("Invalid multibyte string."));
105 return 0;
107 if(prev+l>=pos)
108 return pos-prev;
109 prev+=l;
116 int str_nextoff(const char *p, int opos)
118 if(ioncore_g.enc_sb)
119 return (*(p+opos)=='\0' ? 0 : 1);
121 if(ioncore_g.enc_utf8){
122 int pos=opos;
124 while(p[pos]){
125 pos++;
126 if((p[pos]&0xC0)!=0x80)
127 break;
129 return pos-opos;
132 assert(ioncore_g.use_mb);
134 mbstate_t ps;
135 int l;
136 memset(&ps, 0, sizeof(ps));
138 l=mbrlen(p+opos, strlen(p+opos), &ps);
139 if(l<0){
140 warn(TR("Invalid multibyte string."));
141 return 0;
143 return l;
148 int str_len(const char *p)
150 if(ioncore_g.enc_sb)
151 return strlen(p);
153 if(ioncore_g.enc_utf8){
154 int len=0;
156 while(*p){
157 if(((*p)&0xC0)!=0x80)
158 len++;
159 p++;
161 return len;
164 assert(ioncore_g.use_mb);
166 mbstate_t ps;
167 int len=0, bytes=strlen(p), l;
168 memset(&ps, 0, sizeof(ps));
170 while(bytes>0){
171 l=mbrlen(p, bytes, &ps);
172 if(l<=0){
173 warn(TR("Invalid multibyte string."));
174 break;
176 len++;
177 bytes-=l;
178 p += l;
180 return len;
185 /*}}}*/
188 /*{{{ Title shortening */
191 static char *scatn3(const char *p1, int l1,
192 const char *p2, int l2,
193 const char *p3, int l3)
195 char *p=ALLOC_N(char, l1+l2+l3+1);
197 if(p!=NULL){
198 strncat(p, p1, l1);
199 strncat(p, p2, l2);
200 strncat(p, p3, l3);
202 return p;
205 INTRSTRUCT(SR);
207 DECLSTRUCT(SR){
208 regex_t re;
209 char *rule;
210 SR *next, *prev;
211 bool always;
215 static SR *shortenrules=NULL;
218 /*EXTL_DOC
219 * Add a rule describing how too long titles should be shortened to fit in tabs.
220 * The regular expression \var{rx} (POSIX, not Lua!) is used to match titles
221 * and when \var{rx} matches, \var{rule} is attempted to use as a replacement
222 * for title. If \var{always} is set, the rule is used even if no shortening
223 * is necessary.
225 * Similarly to sed's 's' command, \var{rule} may contain characters that are
226 * inserted in the resulting string and specials as follows:
228 * \begin{tabularx}{\linewidth}{lX}
229 * \tabhead{Special & Description}
230 * \$0 & Place the original string here. \\
231 * \$1 to \$9 & Insert n:th capture here (as usual,captures are surrounded
232 * by parentheses in the regex). \\
233 * \$| & Alternative shortening separator. The shortening described
234 * before the first this kind of separator is tried first and
235 * if it fails to make the string short enough, the next is
236 * tried, and so on. \\
237 * \$< & Remove characters on the left of this marker to shorten the
238 * string. \\
239 * \$> & Remove characters on the right of this marker to shorten the
240 * string. Only the first \$< or \$> within an alternative
241 * shortening is used. \\
242 * \end{tabularx}
244 EXTL_EXPORT
245 bool ioncore_defshortening(const char *rx, const char *rule, bool always)
247 SR *si;
248 int ret;
249 #define ERRBUF_SIZE 256
250 static char errbuf[ERRBUF_SIZE];
252 if(rx==NULL || rule==NULL)
253 return FALSE;
255 si=ALLOC(SR);
257 if(si==NULL)
258 return FALSE;
260 ret=regcomp(&(si->re), rx, REG_EXTENDED);
262 if(ret!=0){
263 errbuf[0]='\0';
264 regerror(ret, &(si->re), errbuf, ERRBUF_SIZE);
265 warn(TR("Error compiling regular expression: %s"), errbuf);
266 goto fail2;
269 si->rule=scopy(rule);
270 si->always=always;
272 if(si->rule==NULL)
273 goto fail;
275 LINK_ITEM(shortenrules, si, next, prev);
277 return TRUE;
279 fail:
280 regfree(&(si->re));
281 fail2:
282 free(si);
283 return FALSE;
287 static char *shorten(GrBrush *brush, const char *str, uint maxw,
288 const char *rule, int nmatch, regmatch_t *pmatch)
290 char *s;
291 int rulelen, slen, i, j, k, ll;
292 int strippt=0;
293 int stripdir=-1;
294 bool more=FALSE;
296 /* Ensure matches are at character boundaries */
297 if(!ioncore_g.enc_sb){
298 int pos=0, len, strl;
299 mbstate_t ps;
300 memset(&ps, 0, sizeof(ps));
302 strl=strlen(str);
304 while(pos<strl){
305 len=mbrtowc(NULL, str+pos, strl-pos, &ps);
306 if(len<0){
307 /* Invalid multibyte string */
308 return scopy("???");
310 if(len==0)
311 break;
312 for(i=0; i<nmatch; i++){
313 if(pmatch[i].rm_so>pos && pmatch[i].rm_so<pos+len)
314 pmatch[i].rm_so=pos+len;
315 if(pmatch[i].rm_eo>pos && pmatch[i].rm_eo<pos+len)
316 pmatch[i].rm_eo=pos;
318 pos+=len;
322 /* Stupid alloc rule that wastes space */
323 rulelen=strlen(rule);
324 slen=rulelen;
326 for(i=0; i<nmatch; i++){
327 if(pmatch[i].rm_so==-1)
328 continue;
329 slen+=(pmatch[i].rm_eo-pmatch[i].rm_so);
332 s=ALLOC_N(char, slen);
334 if(s==NULL)
335 return NULL;
338 more=FALSE;
339 j=0;
340 strippt=0;
341 stripdir=-1;
343 for(i=0; i<rulelen; i++){
344 if(rule[i]!='$'){
345 s[j++]=rule[i];
346 continue;
349 i++;
351 if(rule[i]=='|'){
352 rule=rule+i+1;
353 rulelen=rulelen-i-1;
354 more=TRUE;
355 break;
358 if(rule[i]=='$'){
359 s[j++]='$';
360 continue;
363 if(rule[i]=='<'){
364 strippt=j;
365 stripdir=-1;
366 continue;
369 if(rule[i]=='>'){
370 strippt=j;
371 stripdir=1;
372 continue;
375 if(rule[i]>='0' && rule[i]<='9'){
376 k=(int)(rule[i]-'0');
377 if(k>=nmatch)
378 continue;
379 if(pmatch[k].rm_so==-1)
380 continue;
381 ll=(pmatch[k].rm_eo-pmatch[k].rm_so);
382 strncpy(s+j, str+pmatch[k].rm_so, ll);
383 j+=ll;
387 slen=j;
388 s[slen]='\0';
390 i=strippt;
391 j=strippt;
393 /* shorten */
395 uint bl=grbrush_get_text_width(brush, s, i);
396 uint el=grbrush_get_text_width(brush, s+j, slen-j);
398 while(1){
399 /* el+bl may not be the actual length, but close enough. */
400 if(el+bl<=maxw){
401 memmove(s+i, s+j, slen-j+1);
402 return s;
405 if(stripdir==-1){
406 ll=str_prevoff(s, i);
407 if(ll==0)
408 break;
409 i-=ll;
410 bl=grbrush_get_text_width(brush, s, i);
411 }else{
412 ll=str_nextoff(s, j);
413 if(ll==0)
414 break;
415 j+=ll;
416 el=grbrush_get_text_width(brush, s+j, slen-j);
420 }while(more);
422 free(s);
424 return NULL;
428 char *grbrush_make_label(GrBrush *brush, const char *str, uint maxw)
430 size_t nmatch=10;
431 regmatch_t pmatch[10];
432 SR *rule;
433 int ret;
434 char *retstr;
435 bool fits=FALSE;
437 if(grbrush_get_text_width(brush, str, strlen(str))<=maxw)
438 fits=TRUE;
440 /*return scopy(str);*/
442 for(rule=shortenrules; rule!=NULL; rule=rule->next){
443 if(fits && !rule->always)
444 continue;
445 ret=regexec(&(rule->re), str, nmatch, pmatch, 0);
446 if(ret!=0)
447 continue;
448 retstr=shorten(brush, str, maxw, rule->rule, nmatch, pmatch);
449 goto rettest;
452 if(fits){
453 retstr=scopy(str);
454 }else{
455 pmatch[0].rm_so=0;
456 pmatch[0].rm_eo=strlen(str)-1;
457 retstr=shorten(brush, str, maxw, "$1$<...", 1, pmatch);
460 rettest:
461 if(retstr!=NULL)
462 return retstr;
463 return scopy("");
467 /*}}}*/