1 From: Tony Balinski <ajbj@free.fr>
2 Subject: Extending the split() macro built-in
4 This patch extends split()'s functionality. It allows limited splitting,
5 where only a certain number of elements should be retrieved, and also
6 allows the dropping of the last element found if it is empty.
8 The limited count avoids the perhaps unnecessary overhead of generating a
9 large array if only the first few elements are to be used. The dropping of
10 the empty last element allows simple reconstruction after simple splits,
11 very useful for lines. For example, given:
13 a = get_range(0, $text_length)
14 lines = split(a, "\n", "lastnotnull")
16 for (i = 0; i < lines[]; i++)
19 assuming that all lines are '\n' terminated, b == a at the end. Otherwise
20 we have to resort to something like:
22 a = get_range(0, $text_length)
23 lines = split(a, "\n")
26 for (i = 0; i < lines[]; i++) {
31 to make a == b at the end. which is trickier (albeit more general).
35 source/macro.c | 102 ++++++++++++++++++++++++++++++++++++++++++++-------------
36 1 file changed, 80 insertions(+), 22 deletions(-)
38 diff --quilt old/source/macro.c new/source/macro.c
39 --- old/source/macro.c
40 +++ new/source/macro.c
41 @@ -3885,14 +3885,39 @@ static int stringCompareMS(WindowInfo *w
45 -** This function is intended to split strings into an array of substrings
46 -** Importatnt note: It should always return at least one entry with key 0
47 -** split("", ",") result[0] = ""
48 -** split("1,2", ",") result[0] = "1" result[1] = "2"
49 -** split("1,2,", ",") result[0] = "1" result[1] = "2" result[2] = ""
51 -** This behavior is specifically important when used to break up
53 +** This function is intended to split strings into an array of substrings.
55 +** array = split(string, separator[, searchType][, count][, "lastnotnull"])
57 +** Mandatory arguments:
58 +** string: string to split,
59 +** string: separator string or pattern marking where to split
60 +** Optional arguments:
61 +** searchType: separator search type (default is "literal") to use to find
62 +** occurrences of separator in string.
63 +** count: maximum number of pieces in the returned array (default is
64 +** infinite, must be greater than zero); if smaller than or equal to
65 +** the number of separators found in string, the last piece will
66 +** contain the remainder of the string to split (a count of 1 produces
67 +** a single result in the returned array, equal to the original
69 +** keyword "lastnotnull": if present, this stops an empty string being
70 +** returned in the last entry of the array if the string to split ends
71 +** with the separator. This has the effect of returning an empty array
72 +** if the string to split is originally empty. Otherwise, the returned
73 +** array will always contain at least one element.
75 +** Important note: It should always return at least one entry with key 0
76 +** unless "lastnotnull" is present.
79 +** split("", ",") result[0] = ""
80 +** split(",", ",") result[0] = "" result[1] = ""
81 +** split("1,2", ",") result[0] = "1" result[1] = "2"
82 +** split("1,2,", ",") result[0] = "1" result[1] = "2" result[2] = ""
84 +** This behavior is specifically important when used to break up
85 +** array sub-scripts (unless "lastnotnull" is present)
88 static int splitMS(WindowInfo *window, DataValue *argList, int nArgs,
89 @@ -3905,8 +3930,13 @@ static int splitMS(WindowInfo *window, D
90 char indexStr[TYPE_INT_STR_SIZE(int)], *allocIndexStr;
95 + int haveSearchType = False;
96 + int haveCount = False;
98 + int lastnotnull = False;
99 + int haveLastnotnull = False;
101 + if (nArgs < 2 || nArgs > 4) {
102 return(wrongNArgsErr(errMsg));
104 if (!readStringArg(argList[0], &sourceStr, stringStorage[0], errMsg)) {
105 @@ -3925,16 +3955,40 @@ static int splitMS(WindowInfo *window, D
106 *errMsg = "second argument must be a non-empty string: %s";
109 - if (nArgs > 2 && readStringArg(argList[2], &typeSplitStr, stringStorage[2], errMsg)) {
110 - if (!StringToSearchType(typeSplitStr, &searchType)) {
112 + /* get the search type and maximum element count */
113 + searchType = SEARCH_LITERAL;
114 + for (indexNum = 2; indexNum < nArgs; indexNum++) {
115 + if (!readStringArg(argList[indexNum], &typeSplitStr,
116 + stringStorage[indexNum], errMsg)) {
117 + *errMsg = "non-scalar arguments not allowed: %s";
120 + if (strcmp(typeSplitStr, "lastnotnull") == 0) {
121 + lastnotnull = True;
122 + if (haveLastnotnull) {
123 + *errMsg = "\"lastnotnull\" specified more than once: %s";
126 + } else if (StringToSearchType(typeSplitStr, &searchType)) {
127 + if (haveSearchType) {
128 + *errMsg = "split search type supplied more than once: %s";
131 + haveSearchType = True;
132 + } else if (!haveCount &&
133 + readIntArg(argList[indexNum], &count, errMsg)) {
136 + *errMsg = "split maximum count must be greater than 0: %s";
140 *errMsg = "unrecognized argument to %s";
145 - searchType = SEARCH_LITERAL;
149 result->tag = ARRAY_TAG;
150 result->val.arrayPtr = ArrayNew();
152 @@ -3951,9 +4005,13 @@ static int splitMS(WindowInfo *window, D
155 strcpy(allocIndexStr, indexStr);
156 - found = SearchString(sourceStr, splitStr, SEARCH_FORWARD, searchType,
157 - False, beginPos, &foundStart, &foundEnd,
158 - NULL, NULL, GetWindowDelimiters(window));
159 + if (haveCount && --count == 0) {
162 + found = SearchString(sourceStr, splitStr, SEARCH_FORWARD,
163 + searchType, False, beginPos, &foundStart, &foundEnd,
164 + NULL, NULL, GetWindowDelimiters(window));
166 elementEnd = found ? foundStart : strLength;
167 elementLen = elementEnd - lastEnd;
168 element.tag = STRING_TAG;
169 @@ -3987,8 +4045,8 @@ static int splitMS(WindowInfo *window, D
171 strcpy(allocIndexStr, indexStr);
172 element.tag = STRING_TAG;
173 - if (lastEnd == strLength) {
174 - /* The pattern mathed the end of the string. Add an empty chunk. */
175 + if (lastEnd == strLength && !lastnotnull) {
176 + /* The pattern matched the end of the string. Add an empty chunk. */
177 element.val.str.rep = PERM_ALLOC_STR("");
178 element.val.str.len = 0;
180 @@ -4019,7 +4077,7 @@ static int splitMS(WindowInfo *window, D
181 found = SearchString(sourceStr, splitStr, SEARCH_FORWARD,
182 searchType, False, strLength, &foundStart, &foundEnd,
183 NULL, NULL, GetWindowDelimiters(window));
185 + if (found && !lastnotnull) {
187 sprintf(indexStr, "%d", indexNum);
188 allocIndexStr = AllocString(strlen(indexStr) + 1);