1 /* eslint-disable no-useless-concat */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // NamedCaptureGroup class represents a named capturing group in a regular expression
7 class NamedCaptureGroup {
8 // The named of this capturing group
11 // The capturing group
17 constructor(name, capture) {
19 this.#capture = capture;
34 // Setter for the matched result based on the match groups
35 setMatch(matchGroups) {
36 this.#match = matchGroups[this.#name];
40 // Base class for different part of a street address regular expression.
41 // The regular expression is constructed with prefix, pattern, suffix
42 // and separator to extract "value" part.
43 // For examplem, when we write "apt 4." to for floor number, its prefix is `apt`,
44 // suffix is `.` and value to represent apartment number is `4`.
45 class StreetAddressPartRegExp extends NamedCaptureGroup {
46 constructor(name, prefix, pattern, suffix, sep, optional = false) {
47 prefix = prefix ?? "";
48 suffix = suffix ?? "";
51 `((?:${prefix})(?<${name}>${pattern})(?:${suffix})(?:${sep})+)${
58 // A regular expression to match the street number portion of a street address,
59 class StreetNumberRegExp extends StreetAddressPartRegExp {
60 static PREFIX = "((no|°|º|number)(\\.|-|\\s)*)?"; // From chromium source
62 static PATTERN = "\\d+\\w?";
64 // TODO: possible suffix : (th\\.|\\.)?
67 constructor(sep, optional) {
69 StreetNumberRegExp.name,
70 StreetNumberRegExp.PREFIX,
71 StreetNumberRegExp.PATTERN,
72 StreetNumberRegExp.SUFFIX,
79 // A regular expression to match the street name portion of a street address,
80 class StreetNameRegExp extends StreetAddressPartRegExp {
83 static PATTERN = "(?:[^\\s,]+(?:[^\\S\\r\\n]+[^\\s,]+)*?)"; // From chromium source
85 // TODO: Should we consider suffix like (ave|st)?
88 constructor(sep, optional) {
90 StreetNameRegExp.name,
91 StreetNameRegExp.PREFIX,
92 StreetNameRegExp.PATTERN,
93 StreetNameRegExp.SUFFIX,
100 // A regular expression to match the apartment number portion of a street address,
101 class ApartmentNumberRegExp extends StreetAddressPartRegExp {
102 static keyword = "apt|apartment|wohnung|apto|-" + "|unit|suite|ste|#|room"; // From chromium source // Firefox specific
103 static PREFIX = `(${ApartmentNumberRegExp.keyword})(\\.|\\s|-)*`;
105 static PATTERN = "\\w*([-|\\/]\\w*)?";
107 static SUFFIX = "(\\.|\\s|-)*(ª)?"; // From chromium source
109 constructor(sep, optional) {
111 ApartmentNumberRegExp.name,
112 ApartmentNumberRegExp.PREFIX,
113 ApartmentNumberRegExp.PATTERN,
114 ApartmentNumberRegExp.SUFFIX,
121 // A regular expression to match the floor number portion of a street address,
122 class FloorNumberRegExp extends StreetAddressPartRegExp {
124 "floor|flur|fl|og|obergeschoss|ug|untergeschoss|geschoss|andar|piso|º" + // From chromium source
125 "|level|lvl"; // Firefox specific
126 static PREFIX = `(${FloorNumberRegExp.keyword})?(\\.|\\s|-)*`; // TODO
127 static PATTERN = "\\d{1,3}\\w?";
128 static SUFFIX = `(st|nd|rd|th)?(\\.|\\s|-)*(${FloorNumberRegExp.keyword})?`; // TODO
130 constructor(sep, optional) {
132 FloorNumberRegExp.name,
133 FloorNumberRegExp.PREFIX,
134 FloorNumberRegExp.PATTERN,
135 FloorNumberRegExp.SUFFIX,
143 * Class represents a street address with the following fields:
149 export class StructuredStreetAddress {
150 #street_number = null;
152 #apartment_number = null;
153 #floor_number = null;
155 // If name_first is true, then the street name is given first,
156 // otherwise the street number is given first.
164 this.#street_number = name_first
165 ? street_name?.toString()
166 : street_number?.toString();
167 this.#street_name = name_first
168 ? street_number?.toString()
169 : street_name?.toString();
170 this.#apartment_number = apartment_number?.toString();
171 this.#floor_number = floor_number?.toString();
174 get street_number() {
175 return this.#street_number;
179 return this.#street_name;
182 get apartment_number() {
183 return this.#apartment_number;
187 return this.#floor_number;
192 street number: ${this.#street_number}\n
193 street name: ${this.#street_name}\n
194 apartment number: ${this.#apartment_number}\n
195 floor number: ${this.#floor_number}\n
200 export class AddressParser {
202 * Parse street address with the following pattern.
203 * street number, street name, apartment number(optional), floor number(optional)
204 * For example, 2 Harrison St #175 floor 2
206 * @param {string} address The street address to be parsed.
207 * @returns {StructuredStreetAddress}
209 static parseStreetAddress(address) {
214 const separator = "(\\s|,|$)";
217 new StreetNumberRegExp(separator),
218 new StreetNameRegExp(separator),
219 new ApartmentNumberRegExp(separator, true),
220 new FloorNumberRegExp(separator, true),
223 if (AddressParser.parse(address, regexpes)) {
224 return new StructuredStreetAddress(
226 ...regexpes.map(regexp => regexp.match)
230 // Swap the street number and name.
231 const regexpesReverse = [
238 if (AddressParser.parse(address, regexpesReverse)) {
239 return new StructuredStreetAddress(
241 ...regexpesReverse.map(regexp => regexp.match)
248 static parse(address, regexpes) {
251 merge_whitespace: true,
253 address = AddressParser.normalizeString(address, options);
255 const match = address.match(
256 new RegExp(`^(${regexpes.map(regexp => regexp.capture).join("")})$`, "i")
262 regexpes.forEach(regexp => regexp.setMatch(match.groups));
263 return regexpes.reduce((acc, current) => {
264 return { ...acc, [current.name]: current.match };
268 static normalizeString(s, options) {
269 if (typeof s != "string") {
273 if (options.ignore_case) {
277 // process punctuation before whitespace because if a punctuation
278 // is replaced with whitespace, we might want to merge it later
279 if (options.remove_punctuation) {
280 s = AddressParser.replacePunctuation(s, "");
281 } else if ("replace_punctuation" in options) {
282 const replace = options.replace_punctuation;
283 s = AddressParser.replacePunctuation(s, replace);
286 // process whitespace
287 if (options.merge_whitespace) {
288 s = AddressParser.mergeWhitespace(s);
289 } else if (options.remove_whitespace) {
290 s = AddressParser.removeWhitespace(s);
296 static replacePunctuation(s, replace) {
297 const regex = /\p{Punctuation}/gu;
298 return s?.replace(regex, replace);
301 static removePunctuation(s) {
302 return s?.replace(/[.,\/#!$%\^&\*;:{}=\-_~()]/g, "");
305 static replaceControlCharacters(s) {
306 return s?.replace(/[\t\n\r]/g, " ");
309 static removeWhitespace(s) {
310 return s?.replace(/[\s]/g, "");
313 static mergeWhitespace(s) {
314 return s?.replace(/\s{2,}/g, " ");