Patch-ID: bash40-030
[bash.git] / lib / sh / casemod.c
blobb7e085018eb2fa994a8252705e533da1c1c2ef0c
1 /* casemod.c -- functions to change case of strings */
3 /* Copyright (C) 2008,2009 Free Software Foundation, Inc.
5 This file is part of GNU Bash, the Bourne Again SHell.
7 Bash is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 Bash is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with Bash. If not, see <http://www.gnu.org/licenses/>.
21 #if defined (HAVE_CONFIG_H)
22 # include <config.h>
23 #endif
25 #if defined (HAVE_UNISTD_H)
26 # include <unistd.h>
27 #endif /* HAVE_UNISTD_H */
29 #include <stdc.h>
31 #include <bashansi.h>
32 #include <bashintl.h>
33 #include <bashtypes.h>
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <xmalloc.h>
39 #include <shmbutil.h>
40 #include <chartypes.h>
42 #include <glob/strmatch.h>
44 #define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc))
45 #define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc))
47 #if !defined (HANDLE_MULTIBYTE)
48 # define cval(s, i) ((s)[(i)])
49 # define iswalnum(c) (isalnum(c))
50 # define TOGGLE(x) (ISUPPER (x) ? tolower (x) : (TOUPPER (x)))
51 #else
52 # define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x)))
53 #endif
55 /* These must agree with the defines in externs.h */
56 #define CASE_NOOP 0x0000
57 #define CASE_LOWER 0x0001
58 #define CASE_UPPER 0x0002
59 #define CASE_CAPITALIZE 0x0004
60 #define CASE_UNCAP 0x0008
61 #define CASE_TOGGLE 0x0010
62 #define CASE_TOGGLEALL 0x0020
63 #define CASE_UPFIRST 0x0040
64 #define CASE_LOWFIRST 0x0080
66 #define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */
68 extern char *substring __P((char *, int, int));
70 #if defined (HANDLE_MULTIBYTE)
71 static wchar_t
72 cval (s, i)
73 char *s;
74 int i;
76 size_t tmp;
77 wchar_t wc;
78 int l;
79 mbstate_t mps;
81 if (MB_CUR_MAX == 1)
82 return ((wchar_t)s[i]);
83 l = strlen (s);
84 if (i >= (l - 1))
85 return ((wchar_t)s[i]);
86 memset (&mps, 0, sizeof (mbstate_t));
87 tmp = mbrtowc (&wc, s + i, l - i, &mps);
88 if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
89 return ((wchar_t)s[i]);
90 return wc;
92 #endif
94 /* Modify the case of characters in STRING matching PAT based on the value of
95 FLAGS. If PAT is null, modify the case of each character */
96 char *
97 sh_modcase (string, pat, flags)
98 const char *string;
99 char *pat;
100 int flags;
102 int start, next, end;
103 int inword, c, nc, nop, match, usewords;
104 char *ret, *s;
105 wchar_t wc;
106 #if defined (HANDLE_MULTIBYTE)
107 wchar_t nwc;
108 char mb[MB_LEN_MAX+1];
109 int mlen;
110 mbstate_t state;
111 #endif
113 #if defined (HANDLE_MULTIBYTE)
114 memset (&state, 0, sizeof (mbstate_t));
115 #endif
117 start = 0;
118 end = strlen (string);
120 ret = (char *)xmalloc (end + 1);
121 strcpy (ret, string);
123 /* See if we are supposed to split on alphanumerics and operate on each word */
124 usewords = (flags & CASE_USEWORDS);
125 flags &= ~CASE_USEWORDS;
127 inword = 0;
128 while (start < end)
130 wc = cval (ret, start);
132 if (iswalnum (wc) == 0)
134 inword = 0;
135 ADVANCE_CHAR (ret, end, start);
136 continue;
139 if (pat)
141 next = start;
142 ADVANCE_CHAR (ret, end, next);
143 s = substring (ret, start, next);
144 match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH;
145 free (s);
146 if (match == 0)
148 start = next;
149 inword = 1;
150 continue;
154 /* XXX - for now, the toggling operators work on the individual
155 words in the string, breaking on alphanumerics. Should I
156 leave the capitalization operators to do that also? */
157 if (flags == CASE_CAPITALIZE)
159 if (usewords)
160 nop = inword ? CASE_LOWER : CASE_UPPER;
161 else
162 nop = (start > 0) ? CASE_LOWER : CASE_UPPER;
163 inword = 1;
165 else if (flags == CASE_UNCAP)
167 if (usewords)
168 nop = inword ? CASE_UPPER : CASE_LOWER;
169 else
170 nop = (start > 0) ? CASE_UPPER : CASE_LOWER;
171 inword = 1;
173 else if (flags == CASE_UPFIRST)
175 if (usewords)
176 nop = inword ? CASE_NOOP : CASE_UPPER;
177 else
178 nop = (start > 0) ? CASE_NOOP : CASE_UPPER;
179 inword = 1;
181 else if (flags == CASE_LOWFIRST)
183 if (usewords)
184 nop = inword ? CASE_NOOP : CASE_LOWER;
185 else
186 nop = (start > 0) ? CASE_NOOP : CASE_LOWER;
187 inword = 1;
189 else if (flags == CASE_TOGGLE)
191 nop = inword ? CASE_NOOP : CASE_TOGGLE;
192 inword = 1;
194 else
195 nop = flags;
197 if (MB_CUR_MAX == 1 || isascii (wc))
199 switch (nop)
201 default:
202 case CASE_NOOP: nc = wc; break;
203 case CASE_UPPER: nc = TOUPPER (wc); break;
204 case CASE_LOWER: nc = TOLOWER (wc); break;
205 case CASE_TOGGLEALL:
206 case CASE_TOGGLE: nc = TOGGLE (wc); break;
208 ret[start] = nc;
210 #if defined (HANDLE_MULTIBYTE)
211 else
213 mbrtowc (&wc, string + start, end - start, &state);
214 switch (nop)
216 default:
217 case CASE_NOOP: nwc = wc; break;
218 case CASE_UPPER: nwc = TOUPPER (wc); break;
219 case CASE_LOWER: nwc = TOLOWER (wc); break;
220 case CASE_TOGGLEALL:
221 case CASE_TOGGLE: nwc = TOGGLE (wc); break;
223 if (nwc != wc) /* just skip unchanged characters */
225 mlen = wcrtomb (mb, nwc, &state);
226 if (mlen > 0)
227 mb[mlen] = '\0';
228 /* Assume the same width */
229 strncpy (ret + start, mb, mlen);
232 #endif
234 /* This assumes that the upper and lower case versions are the same width. */
235 ADVANCE_CHAR (ret, end, start);
238 return ret;