1 //metadoc Regex copyright Steve Dekorte 2005, Daniel Rosengren 2007
2 //metadoc Regex license BSD revised
3 //metadoc Regex category RegularExpressions")
5 The Regex addon adds support for Perl regular expressions
6 using the <a href=http://www.pcre.org/>PCRE</a> library by Philip Hazel.
10 Io> "11aabb" allMatchesOfRegex("aa*")
13 Io> re := "(wom)(bat)" asRegex
14 Io> "wombats are cuddly" matchesOfRegex(re) replaceAllWith("$2$1!")
15 ==> batwom!s are cuddly
19 Some people, when confronted with a problem, think
20 "I know, I'll use regular expressions."
21 Now they have two problems.
23 <strong>Jamie Zawinski</strong>
33 #define DATA(self) ((IoRegexData *)IoObject_dataPointer(self))
35 static IoRegex
*IoRegex_cloneWithOptions_(IoRegex
*self
, int options
);
38 IoTag
*IoRegex_newTag(void *state
)
40 IoTag
*tag
= IoTag_newWithName_("Regex");
41 IoTag_state_(tag
, state
);
42 IoTag_freeFunc_(tag
, (IoTagFreeFunc
*)IoRegex_free
);
43 IoTag_cloneFunc_(tag
, (IoTagCloneFunc
*)IoRegex_rawClone
);
44 IoTag_markFunc_(tag
, (IoTagMarkFunc
*)IoRegex_mark
);
48 IoRegex
*IoRegex_proto(void *state
)
50 IoObject
*self
= IoObject_new(state
);
51 IoObject_tag_(self
, IoRegex_newTag(state
));
53 IoObject_setDataPointer_(self
, calloc(1, sizeof(IoRegexData
)));
54 DATA(self
)->pattern
= IOSYMBOL("");
56 IoState_registerProtoWithFunc_(state
, self
, IoRegex_proto
);
59 IoMethodTable methodTable
[] = {
60 {"with", IoRegex_with
},
62 {"pattern", IoRegex_pattern
},
63 {"captureCount", IoRegex_captureCount
},
64 {"nameToIndexMap", IoRegex_nameToIndexMap
},
66 {"version", IoRegex_version
},
70 {"caseless", IoRegex_caseless
},
71 {"notCaseless", IoRegex_notCaseless
},
72 {"isCaseless", IoRegex_isCaseless
},
74 {"dotAll", IoRegex_dotAll
},
75 {"notDotAll", IoRegex_notDotAll
},
76 {"isDotAll", IoRegex_isDotAll
},
78 {"extended", IoRegex_extended
},
79 {"notExtended", IoRegex_notExtended
},
80 {"isExtended", IoRegex_isExtended
},
82 {"multiline", IoRegex_multiline
},
83 {"notMultiline", IoRegex_notMultiline
},
84 {"isMultiline", IoRegex_isMultiline
},
89 IoObject_addMethodTable_(self
, methodTable
);
95 IoRegex
*IoRegex_rawClone(IoRegex
*proto
)
97 IoObject
*self
= IoObject_rawClonePrimitive(proto
);
98 IoObject_setDataPointer_(self
, calloc(1, sizeof(IoRegexData
)));
99 DATA(self
)->pattern
= IOREF(DATA(proto
)->pattern
);
103 IoRegex
*IoRegex_newWithPattern_(void *state
, IoSymbol
*pattern
)
105 IoRegex
*self
= IOCLONE(IoState_protoWithInitFunction_(state
, IoRegex_proto
));
106 DATA(self
)->pattern
= IOREF(pattern
);
110 void IoRegex_free(IoRegex
*self
)
112 if (DATA(self
)->regex
)
113 Regex_free(DATA(self
)->regex
);
117 void IoRegex_mark(IoRegex
*self
)
119 IoObject_shouldMark(DATA(self
)->pattern
);
120 if (DATA(self
)->nameToIndexMap
)
121 IoObject_shouldMark(DATA(self
)->nameToIndexMap
);
125 Regex
*IoRegex_rawRegex(IoRegex
*self
)
127 Regex
*regex
= DATA(self
)->regex
;
133 DATA(self
)->regex
= regex
= Regex_newFromPattern_withOptions_(
134 CSTRING(DATA(self
)->pattern
),
138 error
= (char *)Regex_error(regex
);
140 IoState_error_(IOSTATE
, 0, error
);
146 /* ------------------------------------------------------------------------------------------------*/
148 IoObject
*IoRegex_with(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
150 /*doc Regex with(pattern)
151 Returns a new Regex created from the given pattern string.
154 return IoRegex_newWithPattern_(IOSTATE
, IoMessage_locals_symbolArgAt_(m
, locals
, 0));
158 IoObject
*IoRegex_pattern(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
161 Returns the pattern string that the receiver was created from.
164 return DATA(self
)->pattern
;
167 IoObject
*IoRegex_captureCount(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
169 /*doc Regex captureCount
170 Returns the number of captures defined by the pattern.
173 return IONUMBER(IoRegex_rawRegex(self
)->captureCount
);
176 IoObject
*IoRegex_nameToIndexMap(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
178 /*doc Regex nameToIndexMap
179 Returns a Map that maps capture names to capture indices.
182 IoMap
*map
= DATA(self
)->nameToIndexMap
;
183 NamedCapture
*namedCaptures
= 0, *capture
= 0;
188 map
= DATA(self
)->nameToIndexMap
= IOREF(IoMap_new(IOSTATE
));
190 capture
= namedCaptures
= Regex_namedCaptures(IoRegex_rawRegex(self
));
195 while (capture
->name
)
197 IoMap_rawAtPut(map
, IOSYMBOL(capture
->name
), IONUMBER(capture
->index
));
206 IoObject
*IoRegex_version(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
209 Returns a string with PCRE version information.
212 return IOSYMBOL(pcre_version());
216 /* ------------------------------------------------------------------------------------------------*/
219 IoObject
*IoRegex_caseless(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
222 Returns a case insensitive clone of the receiver, or self if the receiver itself is
227 Io> "WORD" matchesRegex("[a-z]+")
230 Io> "WORD" matchesRegex("[a-z]+" asRegex caseless)
235 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
| PCRE_CASELESS
);
238 IoObject
*IoRegex_notCaseless(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
240 /*doc Regex notCaseless
241 The reverse of caseless.
244 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
& ~PCRE_CASELESS
);
247 IoObject
*IoRegex_isCaseless(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
249 /*doc Regex isCaseless
250 Returns true if the receiver is case insensitive, false if not.
253 return IOBOOL(self
, DATA(self
)->options
& PCRE_CASELESS
);
257 IoObject
*IoRegex_dotAll(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
260 Returns a clone of the receiver with the dotall option turned on,
261 or self if the receiver itself has the option turned on.
263 In dotall mode, "." matches any character, including newline. By default
264 it matches any character <em>except</em> newline.
268 Io> "A\nB" matchesOfRegex(".+") next string
271 Io> "A\nB" matchesOfRegex(".+" asRegex dotAll) next string
275 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
| PCRE_DOTALL
);
278 IoObject
*IoRegex_notDotAll(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
280 /*doc Regex notDotAll
281 The reverse of dotAll.
284 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
& ~PCRE_DOTALL
);
287 IoObject
*IoRegex_isDotAll(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
290 Returns true if the receiver is in dotall mode, false if not.
293 return IOBOOL(self
, DATA(self
)->options
& PCRE_DOTALL
);
297 IoObject
*IoRegex_extended(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
300 Returns a clone of the receiver with the extended option turned on,
301 or self if the receiver itself has the option turned on.
303 In extended mode, a Regex ignores any whitespace character in the pattern except
304 when escaped or inside a character class. This allows you to write clearer patterns
305 that may be broken up into several lines.
307 Additionally, you can put comments in the pattern. A comment starts with a "#"
308 character and continues to the end of the line, unless the "#" is escaped or is
309 inside a character class.""
312 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
| PCRE_EXTENDED
);
315 IoObject
*IoRegex_notExtended(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
317 /*doc Regex notExtended
318 The reverse of extended.
321 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
& ~PCRE_EXTENDED
);
324 IoObject
*IoRegex_isExtended(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
326 /*doc Regex isExtended
327 Returns true if the receiver is in extended mode, false if not.
329 return IOBOOL(self
, DATA(self
)->options
& PCRE_EXTENDED
);
333 IoObject
*IoRegex_multiline(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
335 /*doc Regex multiline
336 Returns a clone of the receiver with the multiline option turned on,
337 or self if the receiver itself has the option turned on.
339 In multiline mode, "^" matches at the beginning of the string and at
340 the beginning of each line; and "$" matches at the end of the string,
341 and at the end of each line.
342 By default "^" only matches at the beginning of the string, and "$"
343 only matches at the end of the string.
347 Io> "A\nB\nC" allMatchesForRegex("^.")
350 Io> "A\nB\nC" allMatchesForRegex("^." asRegex multiline)
351 ==> list("A", "B", "C")
355 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
| PCRE_MULTILINE
);
358 IoObject
*IoRegex_notMultiline(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
360 /*doc Regex notMultiline
361 The reverse of multiline.
364 return IoRegex_cloneWithOptions_(self
, DATA(self
)->options
& ~PCRE_MULTILINE
);
367 IoObject
*IoRegex_isMultiline(IoRegex
*self
, IoObject
*locals
, IoMessage
*m
)
369 /*doc Regex isMultiline
370 Returns true if the receiver is in multiline mode, false if not.
373 return IOBOOL(self
, DATA(self
)->options
& PCRE_MULTILINE
);
377 /* ------------------------------------------------------------------------------------------------*/
380 static IoRegex
*IoRegex_cloneWithOptions_(IoRegex
*self
, int options
)
384 if (options
== DATA(self
)->options
)
387 clone
= IOCLONE(self
);
388 DATA(clone
)->options
= options
;