Release 2024.10.22
[yt-dlp.git] / devscripts / tomlparse.py
blobac9ea3170738738103d4ee6b74506afcbd2e15e5
1 #!/usr/bin/env python3
3 """
4 Simple parser for spec compliant toml files
6 A simple toml parser for files that comply with the spec.
7 Should only be used to parse `pyproject.toml` for `install_deps.py`.
9 IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
10 """
12 from __future__ import annotations
14 import datetime as dt
15 import json
16 import re
18 WS = r'(?:[\ \t]*)'
19 STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
20 SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
21 KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
22 EQUALS_RE = re.compile(rf'={WS}')
23 WS_RE = re.compile(WS)
25 _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
26 EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
28 LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
29 LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
32 def parse_key(value: str):
33 for match in SINGLE_KEY_RE.finditer(value):
34 if match[0][0] == '"':
35 yield json.loads(match[0])
36 elif match[0][0] == '\'':
37 yield match[0][1:-1]
38 else:
39 yield match[0]
42 def get_target(root: dict, paths: list[str], is_list=False):
43 target = root
45 for index, key in enumerate(paths, 1):
46 use_list = is_list and index == len(paths)
47 result = target.get(key)
48 if result is None:
49 result = [] if use_list else {}
50 target[key] = result
52 if isinstance(result, dict):
53 target = result
54 elif use_list:
55 target = {}
56 result.append(target)
57 else:
58 target = result[-1]
60 assert isinstance(target, dict)
61 return target
64 def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
65 index += 1
67 if match := ws_re.match(data, index):
68 index = match.end()
70 while data[index] != end:
71 index = yield True, index
73 if match := ws_re.match(data, index):
74 index = match.end()
76 if data[index] == ',':
77 index += 1
79 if match := ws_re.match(data, index):
80 index = match.end()
82 assert data[index] == end
83 yield False, index + 1
86 def parse_value(data: str, index: int):
87 if data[index] == '[':
88 result = []
90 indices = parse_enclosed(data, index, ']', LIST_WS_RE)
91 valid, index = next(indices)
92 while valid:
93 index, value = parse_value(data, index)
94 result.append(value)
95 valid, index = indices.send(index)
97 return index, result
99 if data[index] == '{':
100 result = {}
102 indices = parse_enclosed(data, index, '}', WS_RE)
103 valid, index = next(indices)
104 while valid:
105 valid, index = indices.send(parse_kv_pair(data, index, result))
107 return index, result
109 if match := STRING_RE.match(data, index):
110 return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
112 match = LEFTOVER_VALUE_RE.match(data, index)
113 assert match
114 value = match[0].strip()
115 for func in [
116 int,
117 float,
118 dt.time.fromisoformat,
119 dt.date.fromisoformat,
120 dt.datetime.fromisoformat,
121 {'true': True, 'false': False}.get,
123 try:
124 value = func(value)
125 break
126 except Exception:
127 pass
129 return match.end(), value
132 def parse_kv_pair(data: str, index: int, target: dict):
133 match = KEY_RE.match(data, index)
134 if not match:
135 return None
137 *keys, key = parse_key(match[0])
139 match = EQUALS_RE.match(data, match.end())
140 assert match
141 index = match.end()
143 index, value = parse_value(data, index)
144 get_target(target, keys)[key] = value
145 return index
148 def parse_toml(data: str):
149 root = {}
150 target = root
152 index = 0
153 while True:
154 match = EXPRESSION_RE.search(data, index)
155 if not match:
156 break
158 if match.group('subtable'):
159 index = match.end()
160 path, is_list = match.group('path', 'is_list')
161 target = get_target(root, list(parse_key(path)), bool(is_list))
162 continue
164 index = parse_kv_pair(data, match.start(), target)
165 assert index is not None
167 return root
170 def main():
171 import argparse
172 from pathlib import Path
174 parser = argparse.ArgumentParser()
175 parser.add_argument('infile', type=Path, help='The TOML file to read as input')
176 args = parser.parse_args()
178 with args.infile.open('r', encoding='utf-8') as file:
179 data = file.read()
181 def default(obj):
182 if isinstance(obj, (dt.date, dt.time, dt.datetime)):
183 return obj.isoformat()
185 print(json.dumps(parse_toml(data), default=default))
188 if __name__ == '__main__':
189 main()