1 from __future__
import absolute_import
, division
, print_function
, unicode_literals
12 from backports
import lzma
17 from io
import StringIO
20 from cStringIO
import StringIO
22 from StringIO
import StringIO
24 PY2
= sys
.version_info
[0] == 2
26 def open_compressed_file(filename
):
27 """ Open a file, trying various compression methods if available. """
28 if filename
.endswith('.gz'):
29 return gzip
.open(filename
, 'rt', encoding
='utf-8')
30 elif filename
.endswith('.bz2'):
31 return bz2
.open(filename
, 'rt', encoding
='utf-8')
32 elif filename
.endswith('.xz') or filename
.endswith('.lzma'):
34 return lzma
.open(filename
, 'rt', encoding
='utf-8')
36 raise ImportError("lzma module is not available")
38 return io
.open(filename
, 'r', encoding
='utf-8')
40 def save_compressed_file(data
, filename
):
41 """ Save data to a file, using various compression methods if specified. """
42 if filename
.endswith('.gz'):
43 with gzip
.open(filename
, 'wt', encoding
='utf-8') as file:
45 elif filename
.endswith('.bz2'):
46 with bz2
.open(filename
, 'wt', encoding
='utf-8') as file:
48 elif filename
.endswith('.xz') or filename
.endswith('.lzma'):
50 with lzma
.open(filename
, 'wt', encoding
='utf-8') as file:
53 raise ImportError("lzma module is not available")
55 with io
.open(filename
, 'w', encoding
='utf-8') as file:
59 """ Parse a line in the format 'var: value' and return the key and value. """
60 parts
= line
.split(":", 1)
62 key
= parts
[0].strip()
63 value
= parts
[1].strip()
67 def validate_non_negative_integer(value
, variable_name
, line_number
):
68 """ Validate and convert a value to a non-negative integer. """
70 int_value
= int(value
)
75 raise ValueError("{0} on line {1} should be a non-negative integer, but got '{2}'.".format(variable_name
, line_number
, value
))
77 def parse_file(filename
, validate_only
=False, verbose
=False):
78 with
open_compressed_file(filename
) as file:
79 lines
= file.readlines()
80 return parse_lines(lines
, validate_only
, verbose
)
82 def parse_string(data
, validate_only
=False, verbose
=False):
83 lines
= StringIO(data
).readlines()
84 return parse_lines(lines
, validate_only
, verbose
)
86 def parse_lines(lines
, validate_only
=False, verbose
=False):
88 current_service
= None
90 in_message_list
= False
91 in_message_thread
= False
93 in_message_post
= False
95 in_message_body
= False
96 in_comment_section
= False
97 in_include_service
= False
98 in_include_users
= False
99 in_include_messages
= False
100 in_category_list
= False
101 in_description_body
= False
102 in_include_categories
= False
103 in_categorization_list
= False
107 current_message
= None
108 current_thread
= None
109 current_category
= None
110 categorization_values
= []
111 category_ids
= {'Categories': set(), 'Forums': set()}
114 def parse_include_files(file_list
):
115 included_services
= []
116 for include_file
in file_list
:
117 included_services
.extend(parse_file(include_file
, validate_only
, verbose
))
118 return included_services
120 def parse_include_users(file_list
):
122 for include_file
in file_list
:
123 included_users
= parse_file(include_file
, validate_only
, verbose
)
124 for service
in included_users
:
125 users
.update(service
['Users'])
128 def parse_include_messages(file_list
):
130 for include_file
in file_list
:
131 included_messages
= parse_file(include_file
, validate_only
, verbose
)
132 for service
in included_messages
:
133 messages
.extend(service
['MessageThreads'])
136 def parse_include_categories(file_list
):
138 for include_file
in file_list
:
139 included_categories
= parse_file(include_file
, validate_only
, verbose
)
140 for service
in included_categories
:
141 categories
.extend(service
['Categories'])
145 for line_number
, line
in enumerate(lines
, 1):
147 if line
== "--- Include Service Start ---":
148 in_include_service
= True
151 print("Line {0}: {1} (Starting include service section)".format(line_number
, line
))
153 elif line
== "--- Include Service End ---":
154 in_include_service
= False
156 print("Line {0}: {1} (Ending include service section)".format(line_number
, line
))
157 services
.extend(parse_include_files(include_files
))
159 elif in_include_service
:
160 include_files
.append(line
)
162 print("Line {0}: {1} (Including file for service)".format(line_number
, line
))
164 elif line
== "--- Include Users Start ---":
165 in_include_users
= True
168 print("Line {0}: {1} (Starting include users section)".format(line_number
, line
))
170 elif line
== "--- Include Users End ---":
171 in_include_users
= False
173 print("Line {0}: {1} (Ending include users section)".format(line_number
, line
))
175 current_service
['Users'].update(parse_include_users(include_files
))
177 elif in_include_users
:
178 include_files
.append(line
)
180 print("Line {0}: {1} (Including file for users)".format(line_number
, line
))
182 elif line
== "--- Include Messages Start ---":
183 in_include_messages
= True
186 print("Line {0}: {1} (Starting include messages section)".format(line_number
, line
))
188 elif line
== "--- Include Messages End ---":
189 in_include_messages
= False
191 print("Line {0}: {1} (Ending include messages section)".format(line_number
, line
))
193 current_service
['MessageThreads'].extend(parse_include_messages(include_files
))
195 elif in_include_messages
:
196 include_files
.append(line
)
198 print("Line {0}: {1} (Including file for messages)".format(line_number
, line
))
200 elif line
== "--- Include Categories Start ---":
201 in_include_categories
= True
204 print("Line {0}: {1} (Starting include categories section)".format(line_number
, line
))
206 elif line
== "--- Include Categories End ---":
207 in_include_categories
= False
209 print("Line {0}: {1} (Ending include categories section)".format(line_number
, line
))
211 current_service
['Categories'].extend(parse_include_categories(include_files
))
212 for category
in current_service
['Categories']:
213 category_ids
[category
['Type']].add(category
['ID'])
215 elif in_include_categories
:
216 include_files
.append(line
)
218 print("Line {0}: {1} (Including file for categories)".format(line_number
, line
))
220 elif line
== "--- Start Archive Service ---":
221 current_service
= {'Users': {}, 'MessageThreads': [], 'Categories': [], 'Interactions': [], 'Categorization': []}
223 print("Line {0}: {1} (Starting new archive service)".format(line_number
, line
))
225 elif line
== "--- End Archive Service ---":
226 services
.append(current_service
)
227 current_service
= None
229 print("Line {0}: {1} (Ending archive service)".format(line_number
, line
))
231 elif line
== "--- Start Comment Section ---":
232 in_comment_section
= True
234 print("Line {0}: {1} (Starting comment section)".format(line_number
, line
))
236 elif line
== "--- End Comment Section ---":
237 in_comment_section
= False
239 print("Line {0}: {1} (Ending comment section)".format(line_number
, line
))
241 elif in_comment_section
:
243 print("Line {0}: {1} (Comment)".format(line_number
, line
))
245 elif line
== "--- Start Category List ---":
246 in_category_list
= True
247 current_category
= {}
249 print("Line {0}: {1} (Starting category list)".format(line_number
, line
))
251 elif line
== "--- End Category List ---":
252 in_category_list
= False
254 kind_split
= current_category
.get('Kind', '').split(",")
255 current_category
['Type'] = kind_split
[0].strip()
256 current_category
['Level'] = kind_split
[1].strip()
257 if current_category
['Type'] not in categorization_values
:
258 raise ValueError("Invalid 'Type' value '{0}' on line {1}. Expected one of {2}.".format(current_category
['Type'], line_number
, categorization_values
))
259 if current_category
['InSub'] != 0 and current_category
['InSub'] not in category_ids
[current_category
['Type']]:
260 raise ValueError("InSub value '{0}' on line {1} does not match any existing ID values.".format(current_category
['InSub'], line_number
))
261 current_service
['Categories'].append(current_category
)
262 category_ids
[current_category
['Type']].add(current_category
['ID'])
263 current_category
= None
265 print("Line {0}: {1} (Ending category list)".format(line_number
, line
))
267 elif line
== "--- Start Categorization List ---":
268 in_categorization_list
= True
269 current_service
['Categorization'] = {}
271 print("Line {0}: {1} (Starting categorization list)".format(line_number
, line
))
273 elif line
== "--- End Categorization List ---":
274 in_categorization_list
= False
276 print("Line {0}: {1} (Ending categorization list)".format(line_number
, line
))
277 categorization_values
= set(current_service
['Categorization'].keys())
279 elif current_service
is not None:
280 key
, value
= parse_line(line
)
282 current_service
['Entry'] = validate_non_negative_integer(value
, "Entry", line_number
)
283 elif key
== "Service":
284 current_service
['Service'] = value
285 elif key
== "Categories":
286 current_service
['Categorization']['Categories'] = [category
.strip() for category
in value
.split(",")]
288 print("Line {0}: Categories set to {1}".format(line_number
, current_service
['Categorization']['Categories']))
289 elif key
== "Forums":
290 current_service
['Categorization']['Forums'] = [forum
.strip() for forum
in value
.split(",")]
292 print("Line {0}: Forums set to {1}".format(line_number
, current_service
['Categorization']['Forums']))
293 elif in_category_list
:
295 current_category
['Kind'] = value
297 current_category
['ID'] = validate_non_negative_integer(value
, "ID", line_number
)
299 current_category
['InSub'] = validate_non_negative_integer(value
, "InSub", line_number
)
300 elif key
== "Headline":
301 current_category
['Headline'] = value
302 elif key
== "Description":
303 current_category
['Description'] = value
304 elif line
== "--- Start User List ---":
307 print("Line {0}: {1} (Starting user list)".format(line_number
, line
))
309 elif line
== "--- End User List ---":
312 print("Line {0}: {1} (Ending user list)".format(line_number
, line
))
314 elif line
== "--- Start User Info ---":
317 print("Line {0}: {1} (Starting user info)".format(line_number
, line
))
319 elif line
== "--- End User Info ---":
323 print("Line {0}: {1} (Ending user info)".format(line_number
, line
))
325 elif line
== "--- Start Message List ---":
326 in_message_list
= True
328 print("Line {0}: {1} (Starting message list)".format(line_number
, line
))
330 elif line
== "--- End Message List ---":
331 in_message_list
= False
333 print("Line {0}: {1} (Ending message list)".format(line_number
, line
))
335 elif line
== "--- Start Message Thread ---":
336 in_message_thread
= True
337 current_thread
= {'Title': '', 'Messages': []}
340 print("Line {0}: {1} (Starting message thread)".format(line_number
, line
))
342 elif line
== "--- End Message Thread ---":
343 in_message_thread
= False
344 current_service
['MessageThreads'].append(current_thread
)
345 current_thread
= None
347 print("Line {0}: {1} (Ending message thread)".format(line_number
, line
))
349 elif line
== "--- Start Message Post ---":
350 in_message_post
= True
353 print("Line {0}: {1} (Starting message post)".format(line_number
, line
))
355 elif line
== "--- End Message Post ---":
356 in_message_post
= False
358 current_thread
['Messages'].append(current_message
)
359 current_message
= None
361 print("Line {0}: {1} (Ending message post)".format(line_number
, line
))
363 elif in_message_list
and key
== "Interactions":
364 current_service
['Interactions'] = [interaction
.strip() for interaction
in value
.split(",")]
366 print("Line {0}: Interactions set to {1}".format(line_number
, current_service
['Interactions']))
368 if in_user_list
and in_user_info
:
370 user_id
= validate_non_negative_integer(value
, "User", line_number
)
371 current_service
['Users'][user_id
] = {'Bio': ""}
373 print("Line {0}: User ID set to {1}".format(line_number
, user_id
))
375 if user_id
is not None:
376 current_service
['Users'][user_id
]['Name'] = value
378 print("Line {0}: Name set to {1}".format(line_number
, value
))
379 elif key
== "Handle":
380 if user_id
is not None:
381 current_service
['Users'][user_id
]['Handle'] = value
383 print("Line {0}: Handle set to {1}".format(line_number
, value
))
384 elif key
== "Location":
385 if user_id
is not None:
386 current_service
['Users'][user_id
]['Location'] = value
388 print("Line {0}: Location set to {1}".format(line_number
, value
))
389 elif key
== "Joined":
390 if user_id
is not None:
391 current_service
['Users'][user_id
]['Joined'] = value
393 print("Line {0}: Joined date set to {1}".format(line_number
, value
))
394 elif key
== "Birthday":
395 if user_id
is not None:
396 current_service
['Users'][user_id
]['Birthday'] = value
398 print("Line {0}: Birthday set to {1}".format(line_number
, value
))
399 elif line
== "--- Start Bio Body ---":
400 if user_id
is not None:
404 print("Line {0}: Starting bio body".format(line_number
))
405 elif line
== "--- End Bio Body ---":
406 if user_id
is not None and current_bio
is not None:
407 current_service
['Users'][user_id
]['Bio'] = "\n".join(current_bio
)
411 print("Line {0}: Ending bio body".format(line_number
))
412 elif in_bio_body
and current_bio
is not None:
413 current_bio
.append(line
)
415 print("Line {0}: Adding to bio body: {1}".format(line_number
, line
))
416 elif in_message_list
and in_message_thread
:
418 current_thread
['Thread'] = validate_non_negative_integer(value
, "Thread", line_number
)
420 print("Line {0}: Thread ID set to {1}".format(line_number
, value
))
421 elif key
== "Category":
422 current_thread
['Category'] = [category
.strip() for category
in value
.split(",")]
424 print("Line {0}: Category set to {1}".format(line_number
, current_thread
['Category']))
426 current_thread
['Forum'] = [forum
.strip() for forum
in value
.split(",")]
428 print("Line {0}: Forum set to {1}".format(line_number
, current_thread
['Forum']))
430 current_thread
['Title'] = value
432 print("Line {0}: Title set to {1}".format(line_number
, value
))
433 elif key
== "Author":
434 current_message
['Author'] = value
436 print("Line {0}: Author set to {1}".format(line_number
, value
))
438 current_message
['Time'] = value
440 print("Line {0}: Time set to {1}".format(line_number
, value
))
442 current_message
['Date'] = value
444 print("Line {0}: Date set to {1}".format(line_number
, value
))
447 if message_type
not in current_service
['Interactions']:
448 raise ValueError("Unexpected message type '{0}' found on line {1}. Expected one of {2}".format(message_type
, line_number
, current_service
['Interactions']))
449 current_message
['Type'] = message_type
451 print("Line {0}: Type set to {1}".format(line_number
, message_type
))
453 post_value
= validate_non_negative_integer(value
, "Post", line_number
)
454 current_message
['Post'] = post_value
455 if 'post_ids' not in current_thread
:
456 current_thread
['post_ids'] = set()
457 current_thread
['post_ids'].add(post_value
)
459 print("Line {0}: Post ID set to {1}".format(line_number
, post_value
))
460 elif key
== "Nested":
461 nested_value
= validate_non_negative_integer(value
, "Nested", line_number
)
462 if nested_value
!= 0 and nested_value
not in current_thread
.get('post_ids', set()):
464 "Nested value '{0}' on line {1} does not match any existing Post values in the current thread. Existing Post IDs: {2}".format(
465 nested_value
, line_number
, list(current_thread
.get('post_ids', set())))
467 current_message
['Nested'] = nested_value
469 print("Line {0}: Nested set to {1}".format(line_number
, nested_value
))
470 elif line
== "--- Start Message Body ---":
471 if current_message
is not None:
472 current_message
['Message'] = []
473 in_message_body
= True
475 print("Line {0}: Starting message body".format(line_number
))
476 elif line
== "--- End Message Body ---":
477 if current_message
is not None and 'Message' in current_message
:
478 current_message
['Message'] = "\n".join(current_message
['Message'])
479 in_message_body
= False
481 print("Line {0}: Ending message body".format(line_number
))
482 elif in_message_body
and current_message
is not None and 'Message' in current_message
:
483 current_message
['Message'].append(line
)
485 print("Line {0}: Adding to message body: {1}".format(line_number
, line
))
486 except Exception as e
:
488 return False, "Error: {0}".format(str(e
)), lines
[line_number
- 1]
497 def display_services(services
):
498 for service
in services
:
499 print("Service Entry: {0}".format(service
['Entry']))
500 print("Service: {0}".format(service
['Service']))
501 print("Interactions: {0}".format(', '.join(service
['Interactions'])))
502 if 'Categorization' in service
and service
['Categorization']:
503 for category_type
, category_levels
in service
['Categorization'].items():
504 print("{0}: {0}".format(category_type
, ', '.join(category_levels
)))
505 print("Category List:")
506 for category
in service
['Categories']:
507 print(" Type: {0}, Level: {1}".format(category
['Type'], category
['Level']))
508 print(" ID: {0}".format(category
['ID']))
509 print(" InSub: {0}".format(category
['InSub']))
510 print(" Headline: {0}".format(category
['Headline']))
511 print(" Description: {0}".format(category
['Description'].strip()))
514 for user_id
, user_info
in service
['Users'].items():
515 print(" User ID: {0}".format(user_id
))
516 print(" Name: {0}".format(user_info
['Name']))
517 print(" Handle: {0}".format(user_info
['Handle']))
518 print(" Location: {0}".format(user_info
.get('Location', '')))
519 print(" Joined: {0}".format(user_info
.get('Joined', '')))
520 print(" Birthday: {0}".format(user_info
.get('Birthday', '')))
521 print(" Bio: {0}".format(user_info
.get('Bio', '').strip()))
523 print("Message Threads:")
524 for idx
, thread
in enumerate(service
['MessageThreads']):
525 print(" --- Message Thread {0} ---".format(idx
+1))
527 print(" Title: {0}".format(thread
['Title']))
528 if 'Category' in thread
:
529 print(" Category: {0}".format(', '.join(thread
['Category'])))
530 if 'Forum' in thread
:
531 print(" Forum: {0}".format(', '.join(thread
['Forum'])))
532 for message
in thread
['Messages']:
533 print(" {0} ({1} on {2}): [{3}] Post ID: {4} Nested: {5}".format(
534 message
['Author'], message
['Time'], message
['Date'], message
['Type'], message
['Post'], message
['Nested']))
535 print(" {0}".format(message
['Message'].strip()))
538 def to_json(services
):
539 """ Convert the services data structure to JSON """
540 return json
.dumps(services
, indent
=2)
542 def from_json(json_str
):
543 """ Convert a JSON string back to the services data structure """
544 return json
.loads(json_str
)
546 def load_from_json_file(json_filename
):
547 """ Load the services data structure from a JSON file """
548 with
open_compressed_file(json_filename
) as file:
549 return json
.load(file)
551 def save_to_json_file(services
, json_filename
):
552 """ Save the services data structure to a JSON file """
553 json_data
= json
.dumps(services
, indent
=2)
554 save_compressed_file(json_data
, json_filename
)
556 def services_to_string(services
, line_ending
="lf"):
557 """ Convert the services data structure back to the original text format """
559 for service
in services
:
560 lines
.append("--- Start Archive Service ---")
561 lines
.append("Entry: {0}".format(service
['Entry']))
562 lines
.append("Service: {0}".format(service
['Service']))
564 lines
.append("--- Start User List ---")
565 for user_id
, user_info
in service
['Users'].items():
566 lines
.append("--- Start User Info ---")
567 lines
.append("User: {0}".format(user_id
))
568 lines
.append("Name: {0}".format(user_info
['Name']))
569 lines
.append("Handle: {0}".format(user_info
['Handle']))
570 if 'Location' in user_info
:
571 lines
.append("Location: {0}".format(user_info
['Location']))
572 if 'Joined' in user_info
:
573 lines
.append("Joined: {0}".format(user_info
['Joined']))
574 if 'Birthday' in user_info
:
575 lines
.append("Birthday: {0}".format(user_info
['Birthday']))
576 if 'Bio' in user_info
:
578 lines
.append("--- Start Bio Body ---")
579 lines
.extend(user_info
['Bio'].split("\n"))
580 lines
.append("--- End Bio Body ---")
581 lines
.append("--- End User Info ---")
582 lines
.append("--- End User List ---")
584 if 'Categorization' in service
and service
['Categorization']:
585 lines
.append("--- Start Categorization List ---")
586 for category_type
, category_levels
in service
['Categorization'].items():
587 lines
.append("{0}: {1}".format(category_type
, ', '.join(category_levels
)))
588 lines
.append("--- End Categorization List ---")
590 if 'Categories' in service
and service
['Categories']:
591 for category
in service
['Categories']:
592 lines
.append("--- Start Category List ---")
593 lines
.append("Kind: {0}, {1}".format(category
['Type'], category
['Level']))
594 lines
.append("ID: {0}".format(category
['ID']))
595 lines
.append("InSub: {0}".format(category
['InSub']))
596 lines
.append("Headline: {0}".format(category
['Headline']))
597 lines
.append("Description: {0}".format(category
['Description']))
598 lines
.append("--- End Category List ---")
600 lines
.append("--- Start Message List ---")
601 lines
.append("Interactions: {0}".format(', '.join(service
['Interactions'])))
602 for thread
in service
['MessageThreads']:
603 lines
.append("--- Start Message Thread ---")
604 lines
.append("Thread: {0}".format(thread
['Thread']))
605 if 'Category' in thread
:
606 lines
.append("Category: {0}".format(', '.join(thread
['Category'])))
607 if 'Forum' in thread
:
608 lines
.append("Forum: {0}".format(', '.join(thread
['Forum'])))
609 if 'Title' in thread
:
610 lines
.append("Title: {0}".format(thread
['Title']))
611 for message
in thread
['Messages']:
612 lines
.append("--- Start Message Post ---")
613 lines
.append("Author: {0}".format(message
['Author']))
614 lines
.append("Time: {0}".format(message
['Time']))
615 lines
.append("Date: {0}".format(message
['Date']))
616 lines
.append("Type: {0}".format(message
['Type']))
617 lines
.append("Post: {0}".format(message
['Post']))
618 lines
.append("Nested: {0}".format(message
['Nested']))
619 lines
.append("Message:")
620 lines
.append("--- Start Message Body ---")
621 lines
.extend(message
['Message'].split("\n"))
622 lines
.append("--- End Message Body ---")
623 lines
.append("--- End Message Post ---")
624 lines
.append("--- End Message Thread ---")
625 lines
.append("--- End Message List ---")
627 lines
.append("--- End Archive Service ---")
629 line_sep
= {"lf": "\n", "cr": "\r", "crlf": "\r\n"}
630 return line_sep
.get(line_ending
, "\n").join(lines
)
632 def save_services_to_file(services
, filename
, line_ending
="lf"):
633 """ Save the services data structure to a file in the original text format """
634 data
= services_to_string(services
, line_ending
)
635 save_compressed_file(data
, filename
)