1 """Regexp-based split and replace using the obsolete regex module.
3 This module is only for backward compatibility. These operations
4 are now provided by the new regular expression module, "re".
6 sub(pat, repl, str): replace first occurrence of pattern in string
7 gsub(pat, repl, str): replace all occurrences of pattern in string
8 split(str, pat, maxsplit): split string using pattern as delimiter
9 splitx(str, pat, maxsplit): split string using pattern as delimiter plus
14 warnings
.warn("the regsub module is deprecated; please use re.sub()",
17 # Ignore further deprecation warnings about this module
18 warnings
.filterwarnings("ignore", "", DeprecationWarning, __name__
)
22 __all__
= ["sub","gsub","split","splitx","capwords"]
24 # Replace first occurrence of pattern pat in string str by replacement
25 # repl. If the pattern isn't found, the string is returned unchanged.
26 # The replacement may contain references \digit to subpatterns and
27 # escaped backslashes. The pattern may be a string or an already
30 def sub(pat
, repl
, str):
32 if prog
.search(str) >= 0:
35 str = str[:a
] + expand(repl
, regs
, str) + str[b
:]
39 # Replace all (non-overlapping) occurrences of pattern pat in string
40 # str by replacement repl. The same rules as for sub() apply.
41 # Empty matches for the pattern are replaced only when not adjacent to
42 # a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
44 def gsub(pat
, repl
, str):
49 while prog
.search(str, start
) >= 0:
52 if a
== b
== start
and not first
:
53 if start
>= len(str) or prog
.search(str, start
+1) < 0:
57 new
= new
+ str[start
:a
] + expand(repl
, regs
, str)
60 new
= new
+ str[start
:]
64 # Split string str in fields separated by delimiters matching pattern
65 # pat. Only non-empty matches for the pattern are considered, so e.g.
66 # split('abc', '') returns ['abc'].
67 # The optional 3rd argument sets the number of splits that are performed.
69 def split(str, pat
, maxsplit
= 0):
70 return intsplit(str, pat
, maxsplit
, 0)
72 # Split string str in fields separated by delimiters matching pattern
73 # pat. Only non-empty matches for the pattern are considered, so e.g.
74 # split('abc', '') returns ['abc']. The delimiters are also included
76 # The optional 3rd argument sets the number of splits that are performed.
79 def splitx(str, pat
, maxsplit
= 0):
80 return intsplit(str, pat
, maxsplit
, 1)
82 # Internal function used to implement split() and splitx().
84 def intsplit(str, pat
, maxsplit
, retain
):
89 while prog
.search(str, next
) >= 0:
97 res
.append(str[start
:a
])
101 splitcount
= splitcount
+ 1
102 if (maxsplit
and (splitcount
>= maxsplit
)):
104 res
.append(str[start
:])
108 # Capitalize words split using a pattern
110 def capwords(str, pat
='[^a-zA-Z0-9_]+'):
111 words
= splitx(str, pat
)
112 for i
in range(0, len(words
), 2):
113 words
[i
] = words
[i
].capitalize()
114 return "".join(words
)
117 # Internal subroutines:
118 # compile(pat): compile a pattern, caching already compiled patterns
119 # expand(repl, regs, str): expand \digit escapes in replacement string
122 # Manage a cache of compiled regular expressions.
124 # If the pattern is a string a compiled version of it is returned. If
125 # the pattern has been used before we return an already compiled
126 # version from the cache; otherwise we compile it now and save the
127 # compiled version in the cache, along with the syntax it was compiled
128 # with. Instead of a string, a compiled regular expression can also
134 if type(pat
) != type(''):
135 return pat
# Assume it is a compiled regex
136 key
= (pat
, regex
.get_syntax())
138 prog
= cache
[key
] # Get it from the cache
140 prog
= cache
[key
] = regex
.compile(pat
)
149 # Expand \digit in the replacement.
150 # Each occurrence of \digit is replaced by the substring of str
151 # indicated by regs[digit]. To include a literal \ in the
152 # replacement, double it; other \ escapes are left unchanged (i.e.
153 # the \ and the following character are both copied).
155 def expand(repl
, regs
, str):
163 if c
!= '\\' or i
>= len(repl
):
168 a
, b
= regs
[ord(c
)-ord0
]
177 # Test program, reads sequences "pat repl str" from stdin.
178 # Optional argument specifies pattern used to split lines.
187 if sys
.stdin
.isatty(): sys
.stderr
.write('--> ')
188 line
= sys
.stdin
.readline()
190 if line
[-1] == '\n': line
= line
[:-1]
191 fields
= split(line
, delpat
)
193 print 'Sorry, not three fields'
194 print 'split:', `fields`
196 [pat
, repl
, str] = split(line
, delpat
)
197 print 'sub :', `
sub(pat
, repl
, str)`
198 print 'gsub:', `
gsub(pat
, repl
, str)`