Addons updated to new doc format
[io.git] / addons / Regex / source / IoRegex.c
blobd63f856dc7a791120d09aa0d3624d773ad2395ad
1 //metadoc Regex copyright Steve Dekorte 2005, Daniel Rosengren 2007
2 //metadoc Regex license BSD revised
3 //metadoc Regex category RegularExpressions")
4 /*metadoc description
5 The Regex addon adds support for Perl regular expressions
6 using the <a href=http://www.pcre.org/>PCRE</a> library by Philip Hazel.
8 Example use:
9 <code>
10 Io> "11aabb" allMatchesOfRegex("aa*")
11 ==> list("a", "a")
13 Io> re := "(wom)(bat)" asRegex
14 Io> "wombats are cuddly" matchesOfRegex(re) replaceAllWith("$2$1!")
15 ==> batwom!s are cuddly
16 </code>
18 <blockquote>
19 Some people, when confronted with a problem, think
20 "I know, I'll use regular expressions."
21 Now they have two problems.
22 </blockquote>
23 <strong>Jamie Zawinski</strong>
26 #include "IoRegex.h"
27 #include "IoState.h"
28 #include "IoNumber.h"
29 #include "IoList.h"
30 #include <stdlib.h>
31 #include <stdio.h>
33 #define DATA(self) ((IoRegexData *)IoObject_dataPointer(self))
35 static IoRegex *IoRegex_cloneWithOptions_(IoRegex *self, int options);
38 IoTag *IoRegex_newTag(void *state)
40 IoTag *tag = IoTag_newWithName_("Regex");
41 IoTag_state_(tag, state);
42 IoTag_freeFunc_(tag, (IoTagFreeFunc *)IoRegex_free);
43 IoTag_cloneFunc_(tag, (IoTagCloneFunc *)IoRegex_rawClone);
44 IoTag_markFunc_(tag, (IoTagMarkFunc *)IoRegex_mark);
45 return tag;
48 IoRegex *IoRegex_proto(void *state)
50 IoObject *self = IoObject_new(state);
51 IoObject_tag_(self, IoRegex_newTag(state));
53 IoObject_setDataPointer_(self, calloc(1, sizeof(IoRegexData)));
54 DATA(self)->pattern = IOSYMBOL("");
56 IoState_registerProtoWithFunc_(state, self, IoRegex_proto);
59 IoMethodTable methodTable[] = {
60 {"with", IoRegex_with},
62 {"pattern", IoRegex_pattern},
63 {"captureCount", IoRegex_captureCount},
64 {"nameToIndexMap", IoRegex_nameToIndexMap},
66 {"version", IoRegex_version},
68 /* Options */
70 {"caseless", IoRegex_caseless},
71 {"notCaseless", IoRegex_notCaseless},
72 {"isCaseless", IoRegex_isCaseless},
74 {"dotAll", IoRegex_dotAll},
75 {"notDotAll", IoRegex_notDotAll},
76 {"isDotAll", IoRegex_isDotAll},
78 {"extended", IoRegex_extended},
79 {"notExtended", IoRegex_notExtended},
80 {"isExtended", IoRegex_isExtended},
82 {"multiline", IoRegex_multiline},
83 {"notMultiline", IoRegex_notMultiline},
84 {"isMultiline", IoRegex_isMultiline},
86 {0, 0},
89 IoObject_addMethodTable_(self, methodTable);
92 return self;
95 IoRegex *IoRegex_rawClone(IoRegex *proto)
97 IoObject *self = IoObject_rawClonePrimitive(proto);
98 IoObject_setDataPointer_(self, calloc(1, sizeof(IoRegexData)));
99 DATA(self)->pattern = IOREF(DATA(proto)->pattern);
100 return self;
103 IoRegex *IoRegex_newWithPattern_(void *state, IoSymbol *pattern)
105 IoRegex *self = IOCLONE(IoState_protoWithInitFunction_(state, IoRegex_proto));
106 DATA(self)->pattern = IOREF(pattern);
107 return self;
110 void IoRegex_free(IoRegex *self)
112 if (DATA(self)->regex)
113 Regex_free(DATA(self)->regex);
114 free(DATA(self));
117 void IoRegex_mark(IoRegex *self)
119 IoObject_shouldMark(DATA(self)->pattern);
120 if (DATA(self)->nameToIndexMap)
121 IoObject_shouldMark(DATA(self)->nameToIndexMap);
125 Regex *IoRegex_rawRegex(IoRegex *self)
127 Regex *regex = DATA(self)->regex;
128 char *error = 0;
130 if (regex)
131 return regex;
133 DATA(self)->regex = regex = Regex_newFromPattern_withOptions_(
134 CSTRING(DATA(self)->pattern),
135 DATA(self)->options
138 error = (char *)Regex_error(regex);
139 if(error)
140 IoState_error_(IOSTATE, 0, error);
142 return regex;
146 /* ------------------------------------------------------------------------------------------------*/
148 IoObject *IoRegex_with(IoRegex *self, IoObject *locals, IoMessage *m)
150 /*doc Regex with(pattern)
151 Returns a new Regex created from the given pattern string.
154 return IoRegex_newWithPattern_(IOSTATE, IoMessage_locals_symbolArgAt_(m, locals, 0));
158 IoObject *IoRegex_pattern(IoRegex *self, IoObject *locals, IoMessage *m)
160 /*doc Regex pattern
161 Returns the pattern string that the receiver was created from.
164 return DATA(self)->pattern;
167 IoObject *IoRegex_captureCount(IoRegex *self, IoObject *locals, IoMessage *m)
169 /*doc Regex captureCount
170 Returns the number of captures defined by the pattern.
173 return IONUMBER(IoRegex_rawRegex(self)->captureCount);
176 IoObject *IoRegex_nameToIndexMap(IoRegex *self, IoObject *locals, IoMessage *m)
178 /*doc Regex nameToIndexMap
179 Returns a Map that maps capture names to capture indices.
182 IoMap *map = DATA(self)->nameToIndexMap;
183 NamedCapture *namedCaptures = 0, *capture = 0;
185 if (map)
186 return map;
188 map = DATA(self)->nameToIndexMap = IOREF(IoMap_new(IOSTATE));
190 capture = namedCaptures = Regex_namedCaptures(IoRegex_rawRegex(self));
192 if (!namedCaptures)
193 return map;
195 while (capture->name)
197 IoMap_rawAtPut(map, IOSYMBOL(capture->name), IONUMBER(capture->index));
198 capture++;
201 free(namedCaptures);
202 return map;
206 IoObject *IoRegex_version(IoRegex *self, IoObject *locals, IoMessage *m)
208 /*doc Regex version
209 Returns a string with PCRE version information.
212 return IOSYMBOL(pcre_version());
216 /* ------------------------------------------------------------------------------------------------*/
217 /* Options */
219 IoObject *IoRegex_caseless(IoRegex *self, IoObject *locals, IoMessage *m)
221 /*doc Regex caseless
222 Returns a case insensitive clone of the receiver, or self if the receiver itself is
223 case insensitive.
225 Example:
226 <code>
227 Io> "WORD" matchesRegex("[a-z]+")
228 ==> false
230 Io> "WORD" matchesRegex("[a-z]+" asRegex caseless)
231 ==> true
232 </code>
235 return IoRegex_cloneWithOptions_(self, DATA(self)->options | PCRE_CASELESS);
238 IoObject *IoRegex_notCaseless(IoRegex *self, IoObject *locals, IoMessage *m)
240 /*doc Regex notCaseless
241 The reverse of caseless.
244 return IoRegex_cloneWithOptions_(self, DATA(self)->options & ~PCRE_CASELESS);
247 IoObject *IoRegex_isCaseless(IoRegex *self, IoObject *locals, IoMessage *m)
249 /*doc Regex isCaseless
250 Returns true if the receiver is case insensitive, false if not.
253 return IOBOOL(self, DATA(self)->options & PCRE_CASELESS);
257 IoObject *IoRegex_dotAll(IoRegex *self, IoObject *locals, IoMessage *m)
259 /*doc Regex dotAll
260 Returns a clone of the receiver with the dotall option turned on,
261 or self if the receiver itself has the option turned on.
263 In dotall mode, "." matches any character, including newline. By default
264 it matches any character <em>except</em> newline.
266 Example:
267 <code>
268 Io> "A\nB" matchesOfRegex(".+") next string
269 ==> A
271 Io> "A\nB" matchesOfRegex(".+" asRegex dotAll) next string
272 ==> A\nB
273 </code>
275 return IoRegex_cloneWithOptions_(self, DATA(self)->options | PCRE_DOTALL);
278 IoObject *IoRegex_notDotAll(IoRegex *self, IoObject *locals, IoMessage *m)
280 /*doc Regex notDotAll
281 The reverse of dotAll.
284 return IoRegex_cloneWithOptions_(self, DATA(self)->options & ~PCRE_DOTALL);
287 IoObject *IoRegex_isDotAll(IoRegex *self, IoObject *locals, IoMessage *m)
289 /*doc Regex isDotAll
290 Returns true if the receiver is in dotall mode, false if not.
293 return IOBOOL(self, DATA(self)->options & PCRE_DOTALL);
297 IoObject *IoRegex_extended(IoRegex *self, IoObject *locals, IoMessage *m)
299 /*doc Regex extended
300 Returns a clone of the receiver with the extended option turned on,
301 or self if the receiver itself has the option turned on.
303 In extended mode, a Regex ignores any whitespace character in the pattern except
304 when escaped or inside a character class. This allows you to write clearer patterns
305 that may be broken up into several lines.
307 Additionally, you can put comments in the pattern. A comment starts with a "#"
308 character and continues to the end of the line, unless the "#" is escaped or is
309 inside a character class.""
312 return IoRegex_cloneWithOptions_(self, DATA(self)->options | PCRE_EXTENDED);
315 IoObject *IoRegex_notExtended(IoRegex *self, IoObject *locals, IoMessage *m)
317 /*doc Regex notExtended
318 The reverse of extended.
321 return IoRegex_cloneWithOptions_(self, DATA(self)->options & ~PCRE_EXTENDED);
324 IoObject *IoRegex_isExtended(IoRegex *self, IoObject *locals, IoMessage *m)
326 /*doc Regex isExtended
327 Returns true if the receiver is in extended mode, false if not.
329 return IOBOOL(self, DATA(self)->options & PCRE_EXTENDED);
333 IoObject *IoRegex_multiline(IoRegex *self, IoObject *locals, IoMessage *m)
335 /*doc Regex multiline
336 Returns a clone of the receiver with the multiline option turned on,
337 or self if the receiver itself has the option turned on.
339 In multiline mode, "^" matches at the beginning of the string and at
340 the beginning of each line; and "$" matches at the end of the string,
341 and at the end of each line.
342 By default "^" only matches at the beginning of the string, and "$"
343 only matches at the end of the string.
345 Example:
346 <code>
347 Io> "A\nB\nC" allMatchesForRegex("^.")
348 ==> list("A")
350 Io> "A\nB\nC" allMatchesForRegex("^." asRegex multiline)
351 ==> list("A", "B", "C")
352 </code>
355 return IoRegex_cloneWithOptions_(self, DATA(self)->options | PCRE_MULTILINE);
358 IoObject *IoRegex_notMultiline(IoRegex *self, IoObject *locals, IoMessage *m)
360 /*doc Regex notMultiline
361 The reverse of multiline.
364 return IoRegex_cloneWithOptions_(self, DATA(self)->options & ~PCRE_MULTILINE);
367 IoObject *IoRegex_isMultiline(IoRegex *self, IoObject *locals, IoMessage *m)
369 /*doc Regex isMultiline
370 Returns true if the receiver is in multiline mode, false if not.
373 return IOBOOL(self, DATA(self)->options & PCRE_MULTILINE);
377 /* ------------------------------------------------------------------------------------------------*/
378 /* Private */
380 static IoRegex *IoRegex_cloneWithOptions_(IoRegex *self, int options)
382 IoRegex *clone = 0;
384 if (options == DATA(self)->options)
385 return self;
387 clone = IOCLONE(self);
388 DATA(clone)->options = options;
389 return clone;