Add files via upload
[LoveSoStrong.git] / parse_message_file.py
blobad08fdf9d65afeef6e0b36f454d7d3f17a3d3925
1 #!/usr/bin/env python
3 from __future__ import absolute_import, division, print_function, unicode_literals
4 import xml.etree.ElementTree as ET
5 from xml.dom import minidom
6 import json
7 import zlib
8 import gzip
9 import bz2
10 import sys
11 import os
12 import io
14 try:
15 import lzma
16 except ImportError:
17 try:
18 from backports import lzma
19 except ImportError:
20 lzma = None
22 try:
23 from io import StringIO
24 except ImportError:
25 try:
26 from cStringIO import StringIO
27 except ImportError:
28 from StringIO import StringIO
30 PY2 = sys.version_info[0] == 2
32 # Compatibility for different string types between Python 2 and 3
33 try:
34 unicode_type = unicode
35 str_type = basestring
36 except NameError:
37 unicode_type = str
38 str_type = str
40 __program_name__ = "LoveSoStrong";
41 __project__ = __program_name__;
42 __project_url__ = "https://repo.or.cz/LoveSoStrong.git";
43 __version_info__ = (0, 0, 1, "RC 1", 1);
44 __version_date_info__ = (2024, 8, 14, "RC 1", 1);
45 __version_date__ = str(__version_date_info__[0]) + "." + str(__version_date_info__[1]).zfill(2) + "." + str(__version_date_info__[2]).zfill(2);
46 __revision__ = __version_info__[3];
47 __revision_id__ = "$Id$";
48 if(__version_info__[4] is not None):
49 __version_date_plusrc__ = __version_date__ + "-" + str(__version_date_info__[4]);
50 if(__version_info__[4] is None):
51 __version_date_plusrc__ = __version_date__;
52 if(__version_info__[3] is not None):
53 __version__ = str(__version_info__[0]) + "." + str(__version_info__[1]) + "." + str(__version_info__[2]) + " " + str(__version_info__[3]);
54 if(__version_info__[3] is None):
55 __version__ = str(__version_info__[0]) + "." + str(__version_info__[1]) + "." + str(__version_info__[2]);
57 class ZlibFile:
58 def __init__(self, file_path=None, fileobj=None, mode='rb', level=9, wbits=15, encoding=None, errors=None, newline=None):
59 if file_path is None and fileobj is None:
60 raise ValueError("Either file_path or fileobj must be provided")
61 if file_path is not None and fileobj is not None:
62 raise ValueError("Only one of file_path or fileobj should be provided")
64 self.file_path = file_path
65 self.fileobj = fileobj
66 self.mode = mode
67 self.level = level
68 self.wbits = wbits
69 self.encoding = encoding
70 self.errors = errors
71 self.newline = newline
72 self._compressed_data = b''
73 self._decompressed_data = b''
74 self._position = 0
75 self._text_mode = 't' in mode
77 # Force binary mode for internal handling
78 internal_mode = mode.replace('t', 'b')
80 if 'w' in mode or 'a' in mode or 'x' in mode:
81 self.file = open(file_path, internal_mode) if file_path else fileobj
82 self._compressor = zlib.compressobj(level, zlib.DEFLATED, wbits)
83 elif 'r' in mode:
84 if file_path:
85 if os.path.exists(file_path):
86 self.file = open(file_path, internal_mode)
87 self._load_file()
88 else:
89 raise FileNotFoundError("No such file: '{}'".format(file_path))
90 elif fileobj:
91 self.file = fileobj
92 self._load_file()
93 else:
94 raise ValueError("Mode should be 'rb' or 'wb'")
96 def write(self, data):
97 """Write data to the file, compressing it in the process."""
98 if 'w' not in self.mode and 'a' not in self.mode and 'x' not in self.mode:
99 raise IOError("File not open for writing")
101 if self._text_mode and isinstance(data, str):
102 data = data.encode(self.encoding or 'utf-8', errors=self.errors)
104 compressed_data = self._compressor.compress(data)
105 self.file.write(compressed_data)
107 def close(self):
108 """Close the file, writing any remaining compressed data."""
109 if 'w' in self.mode or 'a' in self.mode or 'x' in self.mode:
110 self.file.write(self._compressor.flush())
111 self.file.close()
113 def _load_file(self):
114 """Load and decompress the file content."""
115 self._compressed_data = self.file.read()
116 self._decompressed_data = zlib.decompress(self._compressed_data, self.wbits)
117 self.file.close()
119 def read(self, size=-1):
120 """Read and return the decompressed data."""
121 if size == -1:
122 size = len(self._decompressed_data) - self._position
123 data = self._decompressed_data[self._position:self._position + size]
124 self._position += size
125 return data
127 def readline(self):
128 """Read and return a single line from the decompressed data."""
129 newline_pos = self._decompressed_data.find(b'\n', self._position)
130 if newline_pos == -1:
131 return self.read() # Read until the end of the data
132 line = self._decompressed_data[self._position:newline_pos + 1]
133 self._position = newline_pos + 1
134 return line
136 def __enter__(self):
137 return self
139 def __exit__(self, exc_type, exc_val, exc_tb):
140 self.close()
143 def open_compressed_file(filename):
144 """ Open a file, trying various compression methods if available. """
145 if filename.endswith('.gz'):
146 return gzip.open(filename, 'rt', encoding='utf-8')
147 elif filename.endswith('.bz2'):
148 return bz2.open(filename, 'rt', encoding='utf-8')
149 elif filename.endswith('.xz') or filename.endswith('.lzma'):
150 if lzma:
151 return lzma.open(filename, 'rt', encoding='utf-8')
152 else:
153 raise ImportError("lzma module is not available")
154 elif filename.endswith('.zl') or filename.endswith('.zz'):
155 return ZlibFile(file_path=filename, mode='rb')
156 else:
157 return io.open(filename, 'r', encoding='utf-8')
159 def save_compressed_file(data, filename):
160 """ Save data to a file, using various compression methods if specified. """
161 if filename.endswith('.gz'):
162 with gzip.open(filename, 'wt', encoding='utf-8') as file:
163 file.write(data)
164 elif filename.endswith('.bz2'):
165 with bz2.open(filename, 'wt', encoding='utf-8') as file:
166 file.write(data)
167 elif filename.endswith('.xz') or filename.endswith('.lzma'):
168 if lzma:
169 with lzma.open(filename, 'wt', encoding='utf-8') as file:
170 file.write(data)
171 else:
172 raise ImportError("lzma module is not available")
173 elif filename.endswith('.zl') or filename.endswith('.zz'):
174 with ZlibFile(file_path=filename, mode='wb') as file:
175 if isinstance(data, str):
176 file.write(data.encode('utf-8'))
177 else:
178 file.write(data)
179 else:
180 with io.open(filename, 'w', encoding='utf-8') as file:
181 file.write(data)
183 def parse_line(line):
184 """ Parse a line in the format 'var: value' and return the key and value. """
185 parts = line.split(":", 1)
186 if len(parts) == 2:
187 return parts[0].strip(), parts[1].strip()
188 return None, None
190 def validate_non_negative_integer(value, key, line_number):
191 """ Utility to validate that a given value is a non-negative integer """
192 try:
193 int_value = int(value)
194 if int_value < 0:
195 raise ValueError("Negative value '{0}' for key '{1}' on line {2}".format(value, key, line_number))
196 return int_value
197 except ValueError as e:
198 raise ValueError("Invalid integer '{0}' for key '{1}' on line {2}".format(value, key, line_number))
200 def parse_file(filename, validate_only=False, verbose=False):
201 with open_compressed_file(filename) as file:
202 lines = file.readlines()
203 return parse_lines(lines, validate_only, verbose)
205 def parse_string(data, validate_only=False, verbose=False):
206 lines = StringIO(data).readlines()
207 return parse_lines(lines, validate_only, verbose)
209 def parse_lines(lines, validate_only=False, verbose=False):
210 services = []
211 current_service = None
212 in_section = {
213 'user_list': False,
214 'message_list': False,
215 'message_thread': False,
216 'user_info': False,
217 'message_post': False,
218 'bio_body': False,
219 'message_body': False,
220 'comment_section': False,
221 'include_service': False,
222 'include_users': False,
223 'include_messages': False,
224 'category_list': False,
225 'description_body': False,
226 'include_categories': False,
227 'categorization_list': False,
228 'info_body': False,
229 'poll_list': False,
230 'poll_body': False,
232 include_files = []
233 user_id = None
234 current_bio = None
235 current_message = None
236 current_thread = None
237 current_category = None
238 current_info = None
239 current_poll = None
240 current_polls = []
241 categorization_values = {'Categories': [], 'Forums': []}
242 category_ids = {'Categories': set(), 'Forums': set()}
243 post_id = 1
245 def parse_include_files(file_list):
246 included_services = []
247 for include_file in file_list:
248 included_services.extend(parse_file(include_file, validate_only, verbose))
249 return included_services
251 def parse_include_users(file_list):
252 users = {}
253 for include_file in file_list:
254 included_users = parse_file(include_file, validate_only, verbose)
255 for service in included_users:
256 users.update(service['Users'])
257 return users
259 def parse_include_messages(file_list):
260 messages = []
261 for include_file in file_list:
262 included_messages = parse_file(include_file, validate_only, verbose)
263 for service in included_messages:
264 messages.extend(service['MessageThreads'])
265 return messages
267 def parse_include_categories(file_list):
268 categories = []
269 for include_file in file_list:
270 included_categories = parse_file(include_file, validate_only, verbose)
271 for service in included_categories:
272 categories.extend(service['Categories'])
273 return categories
275 try:
276 for line_number, line in enumerate(lines, 1):
277 line = line.strip()
278 if line == "--- Include Service Start ---":
279 in_section['include_service'] = True
280 include_files = []
281 if verbose:
282 print("Line {0}: {1} (Starting include service section)".format(line_number, line))
283 continue
284 elif line == "--- Include Service End ---":
285 in_section['include_service'] = False
286 if verbose:
287 print("Line {0}: {1} (Ending include service section)".format(line_number, line))
288 services.extend(parse_include_files(include_files))
289 continue
290 elif in_section['include_service']:
291 include_files.append(line)
292 if verbose:
293 print("Line {0}: {1} (Including file for service)".format(line_number, line))
294 continue
295 elif line == "--- Include Users Start ---":
296 in_section['include_users'] = True
297 include_files = []
298 if verbose:
299 print("Line {0}: {1} (Starting include users section)".format(line_number, line))
300 continue
301 elif line == "--- Include Users End ---":
302 in_section['include_users'] = False
303 if verbose:
304 print("Line {0}: {1} (Ending include users section)".format(line_number, line))
305 if current_service:
306 current_service['Users'].update(parse_include_users(include_files))
307 continue
308 elif in_section['include_users']:
309 include_files.append(line)
310 if verbose:
311 print("Line {0}: {1} (Including file for users)".format(line_number, line))
312 continue
313 elif line == "--- Include Messages Start ---":
314 in_section['include_messages'] = True
315 include_files = []
316 if verbose:
317 print("Line {0}: {1} (Starting include messages section)".format(line_number, line))
318 continue
319 elif line == "--- Include Messages End ---":
320 in_section['include_messages'] = False
321 if verbose:
322 print("Line {0}: {1} (Ending include messages section)".format(line_number, line))
323 if current_service:
324 current_service['MessageThreads'].extend(parse_include_messages(include_files))
325 continue
326 elif in_section['include_messages']:
327 include_files.append(line)
328 if verbose:
329 print("Line {0}: {1} (Including file for messages)".format(line_number, line))
330 continue
331 elif line == "--- Include Categories Start ---":
332 in_section['include_categories'] = True
333 include_files = []
334 if verbose:
335 print("Line {0}: {1} (Starting include categories section)".format(line_number, line))
336 continue
337 elif line == "--- Include Categories End ---":
338 in_section['include_categories'] = False
339 if verbose:
340 print("Line {0}: {1} (Ending include categories section)".format(line_number, line))
341 if current_service:
342 current_service['Categories'].extend(parse_include_categories(include_files))
343 for category in current_service['Categories']:
344 kind_split = category.get('Kind', '').split(",")
345 category['Type'] = kind_split[0].strip() if len(kind_split) > 0 else ""
346 category['Level'] = kind_split[1].strip() if len(kind_split) > 1 else ""
347 category_ids[category['Type']].add(category['ID'])
348 continue
349 elif in_section['include_categories']:
350 include_files.append(line)
351 if verbose:
352 print("Line {0}: {1} (Including file for categories)".format(line_number, line))
353 continue
354 elif line == "--- Start Archive Service ---":
355 current_service = {'Users': {}, 'MessageThreads': [], 'Categories': [], 'Interactions': [], 'Categorization': {}, 'Info': ''}
356 if verbose:
357 print("Line {0}: {1} (Starting new archive service)".format(line_number, line))
358 continue
359 elif line == "--- End Archive Service ---":
360 services.append(current_service)
361 current_service = None
362 if verbose:
363 print("Line {0}: {1} (Ending archive service)".format(line_number, line))
364 continue
365 elif line == "--- Start Comment Section ---":
366 in_section['comment_section'] = True
367 if verbose:
368 print("Line {0}: {1} (Starting comment section)".format(line_number, line))
369 continue
370 elif line == "--- End Comment Section ---":
371 in_section['comment_section'] = False
372 if verbose:
373 print("Line {0}: {1} (Ending comment section)".format(line_number, line))
374 continue
375 elif in_section['comment_section']:
376 if verbose:
377 print("Line {0}: {1} (Comment)".format(line_number, line))
378 continue
379 elif line == "--- Start Category List ---":
380 in_section['category_list'] = True
381 current_category = {}
382 if verbose:
383 print("Line {0}: {1} (Starting category list)".format(line_number, line))
384 continue
385 elif line == "--- End Category List ---":
386 in_section['category_list'] = False
387 if current_category:
388 kind_split = current_category.get('Kind', '').split(",")
389 current_category['Type'] = kind_split[0].strip() if len(kind_split) > 0 else ""
390 current_category['Level'] = kind_split[1].strip() if len(kind_split) > 1 else ""
391 if current_category['Type'] not in categorization_values:
392 raise ValueError("Invalid 'Type' value '{0}' on line {1}. Expected one of {2}.".format(current_category['Type'], line_number, categorization_values.keys()))
393 if current_category['InSub'] != 0 and current_category['InSub'] not in category_ids[current_category['Type']]:
394 raise ValueError("InSub value '{0}' on line {1} does not match any existing ID values.".format(current_category['InSub'], line_number))
395 current_service['Categories'].append(current_category)
396 category_ids[current_category['Type']].add(current_category['ID'])
397 current_category = None
398 if verbose:
399 print("Line {0}: {1} (Ending category list)".format(line_number, line))
400 continue
401 elif line == "--- Start Categorization List ---":
402 in_section['categorization_list'] = True
403 current_service['Categorization'] = {}
404 if verbose:
405 print("Line {0}: {1} (Starting categorization list)".format(line_number, line))
406 continue
407 elif line == "--- End Categorization List ---":
408 in_section['categorization_list'] = False
409 if verbose:
410 print("Line {0}: {1} (Ending categorization list)".format(line_number, line))
411 categorization_values = current_service['Categorization']
412 continue
413 elif line == "--- Start Info Body ---":
414 in_section['info_body'] = True
415 if current_service:
416 current_info = []
417 if verbose:
418 print("Line {0}: {1} (Starting info body)".format(line_number, line))
419 continue
420 elif line == "--- End Info Body ---":
421 in_section['info_body'] = False
422 if current_service and current_info is not None:
423 current_service['Info'] = "\n".join(current_info)
424 current_info = None
425 if verbose:
426 print("Line {0}: {1} (Ending info body)".format(line_number, line))
427 continue
428 elif in_section['info_body']:
429 if current_service and current_info is not None:
430 current_info.append(line)
431 if verbose:
432 print("Line {0}: {1}".format(line_number, line))
433 continue
434 elif line == "--- Start Poll List ---":
435 in_section['poll_list'] = True
436 current_polls = []
437 if verbose:
438 print("Line {0}: {1} (Starting poll list)".format(line_number, line))
439 continue
440 elif line == "--- End Poll List ---":
441 in_section['poll_list'] = False
442 if current_message:
443 current_message['Polls'] = current_polls
444 if verbose:
445 print("Line {0}: {1} (Ending poll list)".format(line_number, line))
446 continue
447 elif in_section['poll_list'] and line == "--- Start Poll Body ---":
448 in_section['poll_body'] = True
449 current_poll = {}
450 if verbose:
451 print("Line {0}: {1} (Starting poll body)".format(line_number, line))
452 continue
453 elif in_section['poll_body'] and line == "--- End Poll Body ---":
454 in_section['poll_body'] = False
455 if current_poll is not None:
456 current_polls.append(current_poll)
457 current_poll = None
458 if verbose:
459 print("Line {0}: {1} (Ending poll body)".format(line_number, line))
460 continue
461 elif in_section['poll_body']:
462 key, value = parse_line(line)
463 if key and current_poll is not None:
464 if key in ['Answers', 'Results', 'Percentage']:
465 current_poll[key] = [item.strip() for item in value.split(',')]
466 else:
467 current_poll[key] = value
468 continue
469 elif current_service is not None:
470 key, value = parse_line(line)
471 if key == "Entry":
472 current_service['Entry'] = validate_non_negative_integer(value, "Entry", line_number)
473 elif key == "Service":
474 current_service['Service'] = value
475 elif key == "Categories":
476 current_service['Categorization']['Categories'] = [category.strip() for category in value.split(",")]
477 if verbose:
478 print("Line {0}: Categories set to {1}".format(line_number, current_service['Categorization']['Categories']))
479 elif key == "Forums":
480 current_service['Categorization']['Forums'] = [forum.strip() for forum in value.split(",")]
481 if verbose:
482 print("Line {0}: Forums set to {1}".format(line_number, current_service['Categorization']['Forums']))
483 elif in_section['category_list']:
484 if key == "Kind":
485 current_category['Kind'] = value
486 elif key == "ID":
487 current_category['ID'] = validate_non_negative_integer(value, "ID", line_number)
488 elif key == "InSub":
489 current_category['InSub'] = validate_non_negative_integer(value, "InSub", line_number)
490 elif key == "Headline":
491 current_category['Headline'] = value
492 elif key == "Description":
493 current_category['Description'] = value
494 elif line == "--- Start User List ---":
495 in_section['user_list'] = True
496 if verbose:
497 print("Line {0}: {1} (Starting user list)".format(line_number, line))
498 continue
499 elif line == "--- End User List ---":
500 in_section['user_list'] = False
501 if verbose:
502 print("Line {0}: {1} (Ending user list)".format(line_number, line))
503 continue
504 elif line == "--- Start User Info ---":
505 in_section['user_info'] = True
506 if verbose:
507 print("Line {0}: {1} (Starting user info)".format(line_number, line))
508 continue
509 elif line == "--- End User Info ---":
510 in_section['user_info'] = False
511 user_id = None
512 if verbose:
513 print("Line {0}: {1} (Ending user info)".format(line_number, line))
514 continue
515 elif line == "--- Start Message List ---":
516 in_section['message_list'] = True
517 if verbose:
518 print("Line {0}: {1} (Starting message list)".format(line_number, line))
519 continue
520 elif line == "--- End Message List ---":
521 in_section['message_list'] = False
522 if verbose:
523 print("Line {0}: {1} (Ending message list)".format(line_number, line))
524 continue
525 elif line == "--- Start Message Thread ---":
526 in_section['message_thread'] = True
527 current_thread = {'Title': '', 'Messages': []}
528 post_id = 1
529 if verbose:
530 print("Line {0}: {1} (Starting message thread)".format(line_number, line))
531 continue
532 elif line == "--- End Message Thread ---":
533 in_section['message_thread'] = False
534 current_service['MessageThreads'].append(current_thread)
535 current_thread = None
536 if verbose:
537 print("Line {0}: {1} (Ending message thread)".format(line_number, line))
538 continue
539 elif line == "--- Start Message Post ---":
540 in_section['message_post'] = True
541 current_message = {}
542 if verbose:
543 print("Line {0}: {1} (Starting message post)".format(line_number, line))
544 continue
545 elif line == "--- End Message Post ---":
546 in_section['message_post'] = False
547 if current_message:
548 current_thread['Messages'].append(current_message)
549 current_message = None
550 if verbose:
551 print("Line {0}: {1} (Ending message post)".format(line_number, line))
552 continue
553 elif in_section['message_list'] and key == "Interactions":
554 current_service['Interactions'] = [interaction.strip() for interaction in value.split(",")]
555 if verbose:
556 print("Line {0}: Interactions set to {1}".format(line_number, current_service['Interactions']))
557 elif in_section['message_list'] and key == "Status":
558 current_service['Status'] = [status.strip() for status in value.split(",")]
559 if verbose:
560 print("Line {0}: Status set to {1}".format(line_number, current_service['Status']))
561 elif key == "Info":
562 current_info = []
563 in_section['info_body'] = True
564 if verbose:
565 print("Line {0}: {1} (Starting info body)".format(line_number, line))
566 elif in_section['user_list'] and in_section['user_info']:
567 if key == "User":
568 user_id = validate_non_negative_integer(value, "User", line_number)
569 current_service['Users'][user_id] = {'Bio': ""}
570 if verbose:
571 print("Line {0}: User ID set to {1}".format(line_number, user_id))
572 elif key == "Name":
573 if user_id is not None:
574 current_service['Users'][user_id]['Name'] = value
575 if verbose:
576 print("Line {0}: Name set to {1}".format(line_number, value))
577 elif key == "Handle":
578 if user_id is not None:
579 current_service['Users'][user_id]['Handle'] = value
580 if verbose:
581 print("Line {0}: Handle set to {1}".format(line_number, value))
582 elif key == "Location":
583 if user_id is not None:
584 current_service['Users'][user_id]['Location'] = value
585 if verbose:
586 print("Line {0}: Location set to {1}".format(line_number, value))
587 elif key == "Joined":
588 if user_id is not None:
589 current_service['Users'][user_id]['Joined'] = value
590 if verbose:
591 print("Line {0}: Joined date set to {1}".format(line_number, value))
592 elif key == "Birthday":
593 if user_id is not None:
594 current_service['Users'][user_id]['Birthday'] = value
595 if verbose:
596 print("Line {0}: Birthday set to {1}".format(line_number, value))
597 elif line == "--- Start Bio Body ---":
598 if user_id is not None:
599 current_bio = []
600 in_section['bio_body'] = True
601 if verbose:
602 print("Line {0}: Starting bio body".format(line_number))
603 elif line == "--- End Bio Body ---":
604 if user_id is not None and current_bio is not None:
605 current_service['Users'][user_id]['Bio'] = "\n".join(current_bio)
606 current_bio = None
607 in_section['bio_body'] = False
608 if verbose:
609 print("Line {0}: Ending bio body".format(line_number))
610 elif in_section['bio_body'] and current_bio is not None:
611 current_bio.append(line)
612 if verbose:
613 print("Line {0}: Adding to bio body: {1}".format(line_number, line))
614 elif in_section['message_list'] and in_section['message_thread']:
615 if key == "Thread":
616 current_thread['Thread'] = validate_non_negative_integer(value, "Thread", line_number)
617 if verbose:
618 print("Line {0}: Thread ID set to {1}".format(line_number, value))
619 elif key == "Category":
620 current_thread['Category'] = [category.strip() for category in value.split(",")]
621 if verbose:
622 print("Line {0}: Category set to {1}".format(line_number, current_thread['Category']))
623 elif key == "Forum":
624 current_thread['Forum'] = [forum.strip() for forum in value.split(",")]
625 if verbose:
626 print("Line {0}: Forum set to {1}".format(line_number, current_thread['Forum']))
627 elif key == "Title":
628 current_thread['Title'] = value
629 if verbose:
630 print("Line {0}: Title set to {1}".format(line_number, value))
631 elif key == "Type":
632 current_thread['Type'] = value
633 if verbose:
634 print("Line {0}: Type set to {1}".format(line_number, value))
635 elif key == "State":
636 current_thread['State'] = value
637 if verbose:
638 print("Line {0}: State set to {1}".format(line_number, value))
639 elif key == "Author":
640 current_message['Author'] = value
641 if verbose:
642 print("Line {0}: Author set to {1}".format(line_number, value))
643 elif key == "Time":
644 current_message['Time'] = value
645 if verbose:
646 print("Line {0}: Time set to {1}".format(line_number, value))
647 elif key == "Date":
648 current_message['Date'] = value
649 if verbose:
650 print("Line {0}: Date set to {1}".format(line_number, value))
651 elif key == "SubType":
652 current_message['SubType'] = value
653 if verbose:
654 print("Line {0}: SubType set to {1}".format(line_number, value))
655 elif key == "Post":
656 post_value = validate_non_negative_integer(value, "Post", line_number)
657 current_message['Post'] = post_value
658 if 'post_ids' not in current_thread:
659 current_thread['post_ids'] = set()
660 current_thread['post_ids'].add(post_value)
661 if verbose:
662 print("Line {0}: Post ID set to {1}".format(line_number, post_value))
663 elif key == "Nested":
664 nested_value = validate_non_negative_integer(value, "Nested", line_number)
665 if nested_value != 0 and nested_value not in current_thread.get('post_ids', set()):
666 raise ValueError(
667 "Nested value '{0}' on line {1} does not match any existing Post values in the current thread. Existing Post IDs: {2}".format(
668 nested_value, line_number, list(current_thread.get('post_ids', set())))
670 current_message['Nested'] = nested_value
671 if verbose:
672 print("Line {0}: Nested set to {1}".format(line_number, nested_value))
673 elif line == "--- Start Message Body ---":
674 if current_message is not None:
675 current_message['Message'] = []
676 in_section['message_body'] = True
677 if verbose:
678 print("Line {0}: Starting message body".format(line_number))
679 elif line == "--- End Message Body ---":
680 if current_message is not None and 'Message' in current_message:
681 current_message['Message'] = "\n".join(current_message['Message'])
682 in_section['message_body'] = False
683 if verbose:
684 print("Line {0}: Ending message body".format(line_number))
685 elif in_section['message_body'] and current_message is not None and 'Message' in current_message:
686 current_message['Message'].append(line)
687 if verbose:
688 print("Line {0}: Adding to message body: {1}".format(line_number, line))
690 if validate_only:
691 return True, "", ""
693 return services
695 except Exception as e:
696 if validate_only:
697 return False, "Error: {0}".format(str(e)), lines[line_number - 1]
698 else:
699 raise
701 def display_services(services):
702 for service in services:
703 print("Service Entry: {0}".format(service['Entry']))
704 print("Service: {0}".format(service['Service']))
706 if 'Info' in service and service['Info']:
707 print("Info: {0}".format(service['Info'].strip().replace("\n", "\n ")))
709 print("Interactions: {0}".format(', '.join(service['Interactions'])))
710 print("Status: {0}".format(', '.join(service.get('Status', []))))
712 if 'Categorization' in service and service['Categorization']:
713 for category_type, category_levels in service['Categorization'].items():
714 print("{0}: {0}".format(category_type, ', '.join(category_levels)))
716 print("Category List:")
717 for category in service['Categories']:
718 print(" Type: {0}, Level: {1}".format(category.get('Type', 'N/A'), category.get('Level', 'N/A')))
719 print(" ID: {0}".format(category['ID']))
720 print(" InSub: {0}".format(category['InSub']))
721 print(" Headline: {0}".format(category['Headline']))
722 print(" Description: {0}".format(category['Description'].strip().replace("\n", "\n ")))
723 print("")
725 print("User List:")
726 for user_id, user_info in service['Users'].items():
727 print(" User ID: {0}".format(user_id))
728 print(" Name: {0}".format(user_info['Name']))
729 print(" Handle: {0}".format(user_info['Handle']))
730 print(" Location: {0}".format(user_info.get('Location', 'N/A')))
731 print(" Joined: {0}".format(user_info.get('Joined', 'N/A')))
732 print(" Birthday: {0}".format(user_info.get('Birthday', 'N/A')))
733 print(" Bio:")
734 print(" {0}".format(user_info.get('Bio', '').strip().replace("\n", "\n ")))
735 print("")
737 print("Message Threads:")
738 for idx, thread in enumerate(service['MessageThreads']):
739 print(" --- Message Thread {0} ---".format(idx + 1))
740 if thread['Title']:
741 print(" Title: {0}".format(thread['Title']))
742 if 'Category' in thread:
743 print(" Category: {0}".format(', '.join(thread['Category'])))
744 if 'Forum' in thread:
745 print(" Forum: {0}".format(', '.join(thread['Forum'])))
746 if 'Type' in thread:
747 print(" Type: {0}".format(thread['Type']))
748 if 'State' in thread:
749 print(" State: {0}".format(thread['State']))
751 for message in thread['Messages']:
752 print(" {0} ({1} on {2}): [{3}] Post ID: {4} Nested: {5}".format(
753 message['Author'], message['Time'], message['Date'],
754 message.get('SubType', 'Post' if message['Post'] == 1 or message['Nested'] == 0 else 'Reply'),
755 message['Post'], message['Nested']))
757 # Indent each line of the message body but keep it at the same level
758 print(" {0}".format(message['Message'].strip().replace("\n", "\n ")))
760 if 'Polls' in message and message['Polls']:
761 print(" Polls:")
762 for poll in message['Polls']:
763 print(" Poll {0}:".format(poll.get('Num', 'N/A')))
764 print(" Question: {0}".format(poll.get('Question', 'N/A')))
765 print(" Answers: {0}".format(", ".join(poll.get('Answers', []))))
766 print(" Results: {0}".format(", ".join(str(r) for r in poll.get('Results', []))))
767 print(" Percentage: {0}".format(", ".join("{:.2f}".format(float(p)) for p in poll.get('Percentage', []))))
768 print(" Votes: {0}".format(poll.get('Votes', 'N/A')))
769 print("")
771 def save_services_to_file(services, filename, line_ending="lf"):
772 """Save the services data structure to a file with optional compression based on file extension."""
773 output = []
775 for service in services:
776 output.append("--- Start Archive Service ---")
778 output.append("Entry: {0}".format(service.get('Entry', 'N/A')))
779 output.append("Service: {0}".format(service.get('Service', 'N/A')))
781 if 'Info' in service:
782 output.append("Info: {0}".format(service.get('Info', '<No information provided>')))
784 if 'Interactions' in service:
785 output.append("Interactions: {0}".format(", ".join(service['Interactions'])))
787 if 'Status' in service:
788 output.append("Status: {0}".format(", ".join(service['Status'])))
790 if 'Categories' in service and service['Categories']:
791 output.append("Categories:")
792 for category in service['Categories']:
793 output.append(" Type: {0}, Level: {1}".format(category.get('Type', 'N/A'), category.get('Level', 'N/A')))
794 output.append(" ID: {0}".format(category.get('ID', 'N/A')))
795 output.append(" InSub: {0}".format(category.get('InSub', 'N/A')))
796 output.append(" Headline: {0}".format(category.get('Headline', 'N/A')))
797 output.append(" Description: {0}".format(category.get('Description', '')))
799 if 'MessageThreads' in service and service['MessageThreads']:
800 output.append("Message Threads:")
801 for thread in service['MessageThreads']:
802 output.append(" --- Start Message Thread ---")
803 output.append(" Thread: {0}".format(thread.get('Thread', 'N/A')))
804 output.append(" Title: {0}".format(thread.get('Title', 'N/A')))
805 output.append(" Category: {0}".format(", ".join(thread.get('Category', []))))
806 output.append(" Forum: {0}".format(", ".join(thread.get('Forum', []))))
807 output.append(" Type: {0}".format(thread.get('Type', 'N/A')))
808 output.append(" State: {0}".format(thread.get('State', 'N/A')))
810 if 'Messages' in thread and thread['Messages']:
811 for message in thread['Messages']:
812 output.append(" --- Start Message Post ---")
813 output.append(" Author: {0}".format(message.get('Author', 'N/A')))
814 output.append(" Time: {0}".format(message.get('Time', 'N/A')))
815 output.append(" Date: {0}".format(message.get('Date', 'N/A')))
816 output.append(" SubType: {0}".format(message.get('SubType', 'N/A')))
817 output.append(" Post: {0}".format(message.get('Post', 'N/A')))
818 output.append(" Nested: {0}".format(message.get('Nested', 'N/A')))
820 if 'Message' in message:
821 output.append(" Message:")
822 output.append(" {0}".format(message['Message']))
824 if 'Polls' in message and message['Polls']:
825 output.append(" Polls:")
826 output.append(" --- Start Poll List ---")
827 for poll in message['Polls']:
828 output.append(" --- Start Poll Body ---")
829 output.append(" Num: {0}".format(poll.get('Num', 'N/A')))
830 output.append(" Question: {0}".format(poll.get('Question', 'N/A')))
831 output.append(" Answers: {0}".format(", ".join(poll.get('Answers', []))))
832 output.append(" Results: {0}".format(", ".join(str(r) for r in poll.get('Results', []))))
833 output.append(" Percentage: {0}".format(", ".join("{:.2f}".format(float(p)) for p in poll.get('Percentage', []))))
834 output.append(" Votes: {0}".format(poll.get('Votes', 'N/A')))
835 output.append(" --- End Poll Body ---")
836 output.append(" --- End Poll List ---")
837 output.append(" --- End Message Post ---")
838 output.append(" --- End Message Thread ---")
840 if 'Users' in service and service['Users']:
841 output.append("User List:")
842 for user_id, user in service['Users'].items():
843 output.append(" User ID: {0}".format(user_id))
844 output.append(" Name: {0}".format(user.get('Name', 'N/A')))
845 output.append(" Handle: {0}".format(user.get('Handle', 'N/A')))
846 output.append(" Location: {0}".format(user.get('Location', 'N/A')))
847 output.append(" Joined: {0}".format(user.get('Joined', 'N/A')))
848 output.append(" Birthday: {0}".format(user.get('Birthday', 'N/A')))
849 output.append(" Bio:")
850 output.append(" {0}".format(user.get('Bio', '').replace("\n", "\n ")))
852 output.append("--- End Archive Service ---")
853 output.append("")
855 # Join all output lines with the appropriate line ending
856 data = "\n".join(output)
858 # Save the data to the file with the appropriate compression
859 save_compressed_file(data, filename)
862 def to_json(services):
863 """ Convert the services data structure to JSON """
864 return json.dumps(services, indent=2)
866 def from_json(json_str):
867 """ Convert a JSON string back to the services data structure """
868 return json.loads(json_str)
870 def load_from_json_file(json_filename):
871 """ Load the services data structure from a JSON file """
872 with open_compressed_file(json_filename) as file:
873 return json.load(file)
875 def to_xml(services):
876 """ Convert the services data structure to an XML string """
877 root = ET.Element("Services")
879 for service in services:
880 service_elem = ET.SubElement(root, "Service")
881 for key, value in service.items():
882 if isinstance(value, list):
883 list_elem = ET.SubElement(service_elem, key)
884 for item in value:
885 if isinstance(item, dict):
886 item_elem = ET.SubElement(list_elem, key[:-1]) # singular form
887 for subkey, subvalue in item.items():
888 sub_elem = ET.SubElement(item_elem, subkey)
889 sub_elem.text = unicode_type(subvalue)
890 else:
891 item_elem = ET.SubElement(list_elem, key[:-1])
892 item_elem.text = unicode_type(item)
893 elif isinstance(value, dict):
894 dict_elem = ET.SubElement(service_elem, key)
895 for subkey, subvalue in value.items():
896 sub_elem = ET.SubElement(dict_elem, subkey)
897 if isinstance(subvalue, list):
898 for sub_item in subvalue:
899 sub_item_elem = ET.SubElement(sub_elem, subkey[:-1])
900 sub_item_elem.text = unicode_type(sub_item)
901 else:
902 sub_elem.text = unicode_type(subvalue)
903 else:
904 elem = ET.SubElement(service_elem, key)
905 elem.text = unicode_type(value)
907 # Convert to string
908 xml_str = ET.tostring(root, encoding='utf-8')
909 if PY2:
910 xml_str = xml_str.decode('utf-8') # Convert bytes to str in Python 2
911 # Make the XML string pretty
912 xml_str = minidom.parseString(xml_str).toprettyxml(indent=" ")
913 return xml_str
915 def from_xml(xml_str):
916 """ Convert an XML string back to the services data structure """
917 services = []
918 root = ET.fromstring(xml_str)
920 for service_elem in root.findall('Service'):
921 service = {}
922 for child in service_elem:
923 if list(child): # If there are nested elements
924 if child.tag in service:
925 service[child.tag].append(parse_xml_element(child))
926 else:
927 service[child.tag] = [parse_xml_element(child)]
928 else:
929 service[child.tag] = child.text
930 services.append(service)
932 return services
934 def parse_xml_element(element):
935 """ Helper function to parse XML elements into a dictionary """
936 result = {}
937 for child in element:
938 if list(child):
939 result[child.tag] = parse_xml_element(child)
940 else:
941 result[child.tag] = child.text
942 return result
944 def open_compressed_file(filename):
945 """ Open a file, trying various compression methods if available. """
946 if filename.endswith('.gz'):
947 import gzip
948 return gzip.open(filename, 'rt', encoding='utf-8') if not PY2 else gzip.open(filename, 'r')
949 elif filename.endswith('.bz2'):
950 import bz2
951 return bz2.open(filename, 'rt', encoding='utf-8') if not PY2 else bz2.open(filename, 'r')
952 elif filename.endswith('.xz') or filename.endswith('.lzma'):
953 try:
954 import lzma
955 except ImportError:
956 from backports import lzma
957 return lzma.open(filename, 'rt', encoding='utf-8') if not PY2 else lzma.open(filename, 'r')
958 else:
959 return open(filename, 'r', encoding='utf-8') if not PY2 else open(filename, 'r')
961 def save_compressed_file(data, filename):
962 """ Save data to a file, using various compression methods if specified. """
963 if filename.endswith('.gz'):
964 import gzip
965 with gzip.open(filename, 'wt', encoding='utf-8') if not PY2 else gzip.open(filename, 'w') as file:
966 if PY2:
967 file.write(data.encode('utf-8'))
968 else:
969 file.write(data)
970 elif filename.endswith('.bz2'):
971 import bz2
972 with bz2.open(filename, 'wt', encoding='utf-8') if not PY2 else bz2.open(filename, 'w') as file:
973 if PY2:
974 file.write(data.encode('utf-8'))
975 else:
976 file.write(data)
977 elif filename.endswith('.xz') or filename.endswith('.lzma'):
978 try:
979 import lzma
980 except ImportError:
981 from backports import lzma
982 with lzma.open(filename, 'wt', encoding='utf-8') if not PY2 else lzma.open(filename, 'w') as file:
983 if PY2:
984 file.write(data.encode('utf-8'))
985 else:
986 file.write(data)
987 else:
988 with open(filename, 'w', encoding='utf-8') if not PY2 else open(filename, 'w') as file:
989 if PY2:
990 file.write(data.encode('utf-8'))
991 else:
992 file.write(data)
994 def load_from_xml_file(xml_filename):
995 """ Load the services data structure from an XML file """
996 with open_compressed_file(xml_filename) as file:
997 xml_str = file.read()
998 return from_xml(xml_str)
1000 def save_to_xml_file(services, xml_filename):
1001 """ Save the services data structure to an XML file """
1002 xml_str = to_xml(services)
1003 save_compressed_file(xml_str, xml_filename)
1005 def save_to_json_file(services, json_filename):
1006 """ Save the services data structure to a JSON file """
1007 json_data = json.dumps(services, indent=2)
1008 save_compressed_file(json_data, json_filename)
1010 def services_to_string(services):
1011 """Convert the services structure into a string format suitable for saving to a file."""
1012 output = []
1014 for service in services:
1015 output.append("--- Start Archive Service ---")
1017 output.append("Entry: {0}".format(service.get('Entry', 'N/A')))
1018 output.append("Service: {0}".format(service.get('Service', 'N/A')))
1020 if 'Info' in service:
1021 output.append("Info: {0}".format(service.get('Info', '<No information provided>')))
1023 if 'Interactions' in service:
1024 output.append("Interactions: {0}".format(", ".join(service['Interactions'])))
1026 if 'Status' in service:
1027 output.append("Status: {0}".format(", ".join(service['Status'])))
1029 if 'Categories' in service and service['Categories']:
1030 output.append("Categories:")
1031 for category in service['Categories']:
1032 output.append(" Type: {0}, Level: {1}".format(category.get('Type', 'N/A'), category.get('Level', 'N/A')))
1033 output.append(" ID: {0}".format(category.get('ID', 'N/A')))
1034 output.append(" InSub: {0}".format(category.get('InSub', 'N/A')))
1035 output.append(" Headline: {0}".format(category.get('Headline', 'N/A')))
1036 output.append(" Description: {0}".format(category.get('Description', '')))
1038 if 'MessageThreads' in service and service['MessageThreads']:
1039 output.append("Message Threads:")
1040 for thread in service['MessageThreads']:
1041 output.append(" --- Start Message Thread ---")
1042 output.append(" Thread: {0}".format(thread.get('Thread', 'N/A')))
1043 output.append(" Title: {0}".format(thread.get('Title', 'N/A')))
1044 output.append(" Category: {0}".format(", ".join(thread.get('Category', []))))
1045 output.append(" Forum: {0}".format(", ".join(thread.get('Forum', []))))
1046 output.append(" Type: {0}".format(thread.get('Type', 'N/A')))
1047 output.append(" State: {0}".format(thread.get('State', 'N/A')))
1049 if 'Messages' in thread and thread['Messages']:
1050 for message in thread['Messages']:
1051 output.append(" --- Start Message Post ---")
1052 output.append(" Author: {0}".format(message.get('Author', 'N/A')))
1053 output.append(" Time: {0}".format(message.get('Time', 'N/A')))
1054 output.append(" Date: {0}".format(message.get('Date', 'N/A')))
1055 output.append(" SubType: {0}".format(message.get('SubType', 'N/A')))
1056 output.append(" Post: {0}".format(message.get('Post', 'N/A')))
1057 output.append(" Nested: {0}".format(message.get('Nested', 'N/A')))
1059 if 'Message' in message:
1060 output.append(" Message:")
1061 output.append(" {0}".format(message['Message']))
1063 if 'Polls' in message and message['Polls']:
1064 output.append(" Polls:")
1065 output.append(" --- Start Poll List ---")
1066 for poll in message['Polls']:
1067 output.append(" --- Start Poll Body ---")
1068 output.append(" Num: {0}".format(poll.get('Num', 'N/A')))
1069 output.append(" Question: {0}".format(poll.get('Question', 'N/A')))
1070 output.append(" Answers: {0}".format(", ".join(poll.get('Answers', []))))
1071 output.append(" Results: {0}".format(", ".join(str(r) for r in poll.get('Results', []))))
1072 output.append(" Percentage: {0}".format(", ".join("{:.2f}".format(float(p)) for p in poll.get('Percentage', []))))
1073 output.append(" Votes: {0}".format(poll.get('Votes', 'N/A')))
1074 output.append(" --- End Poll Body ---")
1075 output.append(" --- End Poll List ---")
1076 output.append(" --- End Message Post ---")
1077 output.append(" --- End Message Thread ---")
1079 if 'Users' in service and service['Users']:
1080 output.append("User List:")
1081 for user_id, user in service['Users'].items():
1082 output.append(" User ID: {0}".format(user_id))
1083 output.append(" Name: {0}".format(user.get('Name', 'N/A')))
1084 output.append(" Handle: {0}".format(user.get('Handle', 'N/A')))
1085 output.append(" Location: {0}".format(user.get('Location', 'N/A')))
1086 output.append(" Joined: {0}".format(user.get('Joined', 'N/A')))
1087 output.append(" Birthday: {0}".format(user.get('Birthday', 'N/A')))
1088 output.append(" Bio:")
1089 output.append(" {0}".format(user.get('Bio', '').replace("\n", "\n ")))
1091 output.append("--- End Archive Service ---")
1092 output.append("")
1094 return "\n".join(output)
1096 line_sep = {"lf": "\n", "cr": "\r", "crlf": "\r\n"}
1097 return line_sep.get(line_ending, "\n").join(lines)
1099 def save_services_to_file(services, filename, line_ending="lf"):
1100 """ Save the services data structure to a file in the original text format """
1101 data = services_to_string(services, line_ending)
1102 save_compressed_file(data, filename)
1104 def init_empty_service(entry, service_name, info=''):
1105 """ Initialize an empty service structure """
1106 return {
1107 'Entry': entry,
1108 'Service': service_name,
1109 'Users': {},
1110 'MessageThreads': [],
1111 'Categories': [],
1112 'Interactions': [],
1113 'Categorization': {},
1114 'Info': info,
1117 def add_user(service, user_id, name, handle, location='', joined='', birthday='', bio=''):
1118 """ Add a user to the service """
1119 service['Users'][user_id] = {
1120 'Name': name,
1121 'Handle': handle,
1122 'Location': location,
1123 'Joined': joined,
1124 'Birthday': birthday,
1125 'Bio': bio
1128 def add_category(service, kind, category_type, category_level, category_id, insub, headline, description):
1129 category = {
1130 'Kind': "{0}, {1}".format(kind, category_level),
1131 'Type': category_type,
1132 'Level': category_level,
1133 'ID': category_id,
1134 'InSub': insub,
1135 'Headline': headline,
1136 'Description': description
1138 service['Categories'].append(category)
1139 if category_type not in service['Categorization']:
1140 service['Categorization'][category_type] = []
1141 if category_level not in service['Categorization'][category_type]:
1142 service['Categorization'][category_type].append(category_level)
1143 if insub != 0:
1144 if not any(cat['ID'] == insub for cat in service['Categories']):
1145 raise ValueError("InSub value '{0}' does not match any existing ID in service.".format(insub))
1147 def add_message_thread(service, thread_id, title='', category='', forum='', thread_type='', state=''):
1148 """ Add a message thread to the service """
1149 thread = {
1150 'Thread': thread_id,
1151 'Title': title,
1152 'Category': category.split(',') if category else [],
1153 'Forum': forum.split(',') if forum else [],
1154 'Type': thread_type,
1155 'State': state,
1156 'Messages': []
1158 service['MessageThreads'].append(thread)
1160 def add_message_post(service, thread_id, author, time, date, subtype, post_id, nested, message):
1161 thread = next((t for t in service['MessageThreads'] if t['Thread'] == thread_id), None)
1162 if thread is not None:
1163 new_post = {
1164 'Author': author,
1165 'Time': time,
1166 'Date': date,
1167 'SubType': subtype,
1168 'Post': post_id,
1169 'Nested': nested,
1170 'Message': message
1172 thread['Messages'].append(new_post)
1173 else:
1174 raise ValueError("Thread ID {0} not found in service.".format(thread_id))
1176 def add_poll(service, thread_id, post_id, poll_num, question, answers, results, percentages, votes):
1177 thread = next((t for t in service['MessageThreads'] if t['Thread'] == thread_id), None)
1178 if thread is not None:
1179 message = next((m for m in thread['Messages'] if m['Post'] == post_id), None)
1180 if message is not None:
1181 if 'Polls' not in message:
1182 message['Polls'] = []
1183 new_poll = {
1184 'Num': poll_num,
1185 'Question': question,
1186 'Answers': answers,
1187 'Results': results,
1188 'Percentage': percentages,
1189 'Votes': votes
1191 message['Polls'].append(new_poll)
1192 else:
1193 raise ValueError("Post ID {0} not found in thread {1}.".format(post_id, thread_id))
1194 else:
1195 raise ValueError("Thread ID {0} not found in service.".format(thread_id))
1197 def remove_user(service, user_id):
1198 if user_id in service['Users']:
1199 del service['Users'][user_id]
1200 else:
1201 raise ValueError("User ID {0} not found in service.".format(user_id))
1203 def remove_category(service, category_id):
1204 category = next((c for c in service['Categories'] if c['ID'] == category_id), None)
1205 if category:
1206 service['Categories'].remove(category)
1207 else:
1208 raise ValueError("Category ID {0} not found in service.".format(category_id))
1210 def remove_message_thread(service, thread_id):
1211 thread = next((t for t in service['MessageThreads'] if t['Thread'] == thread_id), None)
1212 if thread:
1213 service['MessageThreads'].remove(thread)
1214 else:
1215 raise ValueError("Thread ID {0} not found in service.".format(thread_id))
1217 def remove_message_post(service, thread_id, post_id):
1218 thread = next((t for t in service['MessageThreads'] if t['Thread'] == thread_id), None)
1219 if thread is not None:
1220 message = next((m for m in thread['Messages'] if m['Post'] == post_id), None)
1221 if message is not None:
1222 thread['Messages'].remove(message)
1223 else:
1224 raise ValueError("Post ID {0} not found in thread {1}.".format(post_id, thread_id))
1225 else:
1226 raise ValueError("Thread ID {0} not found in service.".format(thread_id))
1228 def add_service(services, entry, service_name, info=None):
1229 new_service = {
1230 'Entry': entry,
1231 'Service': service_name,
1232 'Info': info if info else '',
1233 'Interactions': [],
1234 'Status': [],
1235 'Categorization': {'Categories': [], 'Forums': []},
1236 'Categories': [],
1237 'Users': {},
1238 'MessageThreads': []
1240 services.append(new_service)
1241 return new_service # Return the newly created service
1243 def remove_service(services, entry):
1244 service = next((s for s in services if s['Entry'] == entry), None)
1245 if service:
1246 services.remove(service)
1247 else:
1248 raise ValueError("Service entry {0} not found.".format(entry))