4 Simple parser for spec compliant toml files
6 A simple toml parser for files that comply with the spec.
7 Should only be used to parse `pyproject.toml` for `install_deps.py`.
9 IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
12 from __future__
import annotations
19 STRING_RE
= re
.compile(r
'"(?:\\.|[^\\"\n])*"|\'[^
\'\n]*\'')
20 SINGLE_KEY_RE = re.compile(rf'{STRING_RE
.pattern
}|
[A
-Za
-z0
-9_-]+')
21 KEY_RE = re.compile(rf'{WS}
(?
:{SINGLE_KEY_RE
.pattern
}){WS}
(?
:\
.{WS}
(?
:{SINGLE_KEY_RE
.pattern
}){WS}
)*')
22 EQUALS_RE = re.compile(rf'={WS}
')
23 WS_RE = re.compile(WS)
25 _SUBTABLE = rf'(?P
<subtable
>^\
[(?P
<is_list
>\
[)?
(?P
<path
>{KEY_RE
.pattern
})\
]\
]?
)'
26 EXPRESSION_RE = re.compile(rf'^
(?
:{_SUBTABLE}|
{KEY_RE
.pattern
}=)', re.MULTILINE)
28 LIST_WS_RE = re.compile(rf'{WS}
((#[^\n]*)?\n{WS})*')
29 LEFTOVER_VALUE_RE
= re
.compile(r
'[^,}\]\t\n#]+')
32 def parse_key(value
: str):
33 for match
in SINGLE_KEY_RE
.finditer(value
):
34 if match
[0][0] == '"':
35 yield json
.loads(match
[0])
36 elif match
[0][0] == '\'':
42 def get_target(root
: dict, paths
: list[str], is_list
=False):
45 for index
, key
in enumerate(paths
, 1):
46 use_list
= is_list
and index
== len(paths
)
47 result
= target
.get(key
)
49 result
= [] if use_list
else {}
52 if isinstance(result
, dict):
60 assert isinstance(target
, dict)
64 def parse_enclosed(data
: str, index
: int, end
: str, ws_re
: re
.Pattern
):
67 if match
:= ws_re
.match(data
, index
):
70 while data
[index
] != end
:
71 index
= yield True, index
73 if match
:= ws_re
.match(data
, index
):
76 if data
[index
] == ',':
79 if match
:= ws_re
.match(data
, index
):
82 assert data
[index
] == end
83 yield False, index
+ 1
86 def parse_value(data
: str, index
: int):
87 if data
[index
] == '[':
90 indices
= parse_enclosed(data
, index
, ']', LIST_WS_RE
)
91 valid
, index
= next(indices
)
93 index
, value
= parse_value(data
, index
)
95 valid
, index
= indices
.send(index
)
99 if data
[index
] == '{':
102 indices
= parse_enclosed(data
, index
, '}', WS_RE
)
103 valid
, index
= next(indices
)
105 valid
, index
= indices
.send(parse_kv_pair(data
, index
, result
))
109 if match
:= STRING_RE
.match(data
, index
):
110 return match
.end(), json
.loads(match
[0]) if match
[0][0] == '"' else match
[0][1:-1]
112 match
= LEFTOVER_VALUE_RE
.match(data
, index
)
114 value
= match
[0].strip()
118 dt
.time
.fromisoformat
,
119 dt
.date
.fromisoformat
,
120 dt
.datetime
.fromisoformat
,
121 {'true': True, 'false': False}.get
,
129 return match
.end(), value
132 def parse_kv_pair(data
: str, index
: int, target
: dict):
133 match
= KEY_RE
.match(data
, index
)
137 *keys
, key
= parse_key(match
[0])
139 match
= EQUALS_RE
.match(data
, match
.end())
143 index
, value
= parse_value(data
, index
)
144 get_target(target
, keys
)[key
] = value
148 def parse_toml(data
: str):
154 match
= EXPRESSION_RE
.search(data
, index
)
158 if match
.group('subtable'):
160 path
, is_list
= match
.group('path', 'is_list')
161 target
= get_target(root
, list(parse_key(path
)), bool(is_list
))
164 index
= parse_kv_pair(data
, match
.start(), target
)
165 assert index
is not None
172 from pathlib
import Path
174 parser
= argparse
.ArgumentParser()
175 parser
.add_argument('infile', type=Path
, help='The TOML file to read as input')
176 args
= parser
.parse_args()
178 with args
.infile
.open('r', encoding
='utf-8') as file:
182 if isinstance(obj
, (dt
.date
, dt
.time
, dt
.datetime
)):
183 return obj
.isoformat()
185 print(json
.dumps(parse_toml(data
), default
=default
))
188 if __name__
== '__main__':