Re-write docstrings and tweak Readme file
[somegraph.git] / AptRdepends2JSON.py
blob0194492181a765d84ec7b5d8861fc8132a51a2e7
1 """Go from Debian apt-rdepends output to JSON for a D3.js chart."""
3 class AptRdepends2JSON:
4 """Convert Debian apt-rdepends output to JSON for a D3.js chart.
6 In its man page, we read that apt-rdepends "performs recursive dependency
7 listings similar to apt-cache". For instance, in the following snippet
8 apt-rdepends has printed out the recursive dependencies for the two Debian
9 packages "tar" and "libustr-1.0-1":
11 tar
12 PreDepends: libacl1 (>= 2.2.51-8)
13 PreDepends: libc6 (>= 2.17)
14 PreDepends: libselinux1 (>= 1.32)
15 libustr-1.0-1
16 Depends: libc6 (>= 2.4)
17 PreDepends: multiarch-support
19 Based on this snippet, we can then produce the following JSON for the
20 D3.js chart:
24 "imports": [
25 "libacl1",
26 "libc6",
27 "libselinux1"
29 "name": "tar"
32 "imports": [
33 "libc6",
34 "multiarch-support"
36 "name": "libustr-1.0-1"
39 """
41 def __init__(self, aptrdepends_output, d3_json):
42 """We set the following attributes:
44 1. aptrdepends_output. The name of a file generated by something like...
46 apt-rdepends aptitude > aptrdepends_output
48 2. d3_json. The name of some JSON file that we will generate based on
49 the contents of aptrdepends_output file.
51 3. package_list. json.dump generates d3_json file by serializing the
52 package_list list.
53 """
55 self.aptrdepends_output = aptrdepends_output
56 self.d3_json = d3_json
57 self.package_list = []
59 def trim_dependency_line(self, my_string):
60 """Remove leading and trailing characters that apt-rdepends inserts
61 into the my_string argument.
63 apt-rdepends specifies "Depends", "Pre-Depends", "strictly earlier",
64 "exactly equal", etc relations between packages.
66 For now, we're only interested in package names...
68 >>> my_object = AptRdepends2JSON('apt-rdepends_aptitude',\
69 'apt-rdepends_aptitude.json')
70 >>> f = open(my_object.aptrdepends_output)
71 >>> lines = f.readlines()
72 >>> print(lines[1].rstrip('\\n'))
73 Depends: aptitude-common (= 0.8.11-7)
74 >>> my_package_name = my_object.trim_dependency_line(lines[1])
75 >>> print(my_package_name)
76 aptitude-common
77 >>> print(lines[43].rstrip('\\n'))
78 PreDepends: libc6 (>= 2.28)
79 >>> my_package_name = my_object.trim_dependency_line(lines[43])
80 >>> print(my_package_name)
81 libc6
82 """
84 trim_dependency_line_regex = re.compile('^\\s+\\S*Depends\\S\\s(\\S+)')
85 my_match = trim_dependency_line_regex.match(my_string)
86 return my_match.group(1)
88 def check_for_dependency_lines(self, my_string):
89 """Return True if my_string argument matches ^\\s+\\S*Depends regex.
91 Among other things, in the following snippet apt-rdepends is saying
92 that the recursive dependencies of libgcc1 are gcc-4.9-base, libc6, and
93 multiarch-support...
95 libgcc1
96 Depends: gcc-4.9-base (= 4.9.0-7)
97 Depends: libc6 (>= 2.14)
98 PreDepends: multiarch-support
100 >>> my_object = AptRdepends2JSON('apt-rdepends_aptitude',\
101 'apt-rdepends_aptitude.json')
102 >>> f = open(my_object.aptrdepends_output)
103 >>> lines = f.readlines()
104 >>> print(lines[0].rstrip('\\n'))
105 aptitude
106 >>> my_object.check_for_dependency_lines(lines[0]) == None
107 True
108 >>> print(lines[1].rstrip('\\n'))
109 Depends: aptitude-common (= 0.8.11-7)
110 >>> bool(my_object.check_for_dependency_lines(lines[1])) == True
111 True
112 >>> print(lines[43].rstrip('\\n'))
113 PreDepends: libc6 (>= 2.28)
114 >>> bool(my_object.check_for_dependency_lines(lines[21])) == True
115 True
118 this_is_a_dependency_line = re.compile('^\\s+\\S*Depends')
119 return this_is_a_dependency_line.match(my_string)
121 def read_aptrdepends_output(self):
122 """Loop over lines of self.aptrdepends_output file, and construct
123 self.d3_json list for json.dump serialization.
125 This method takes apt-rdepends idiosyncracies into account. We're doing
126 the following...
128 1. self.d3_json list was initialized in __init__
130 2. We loop over every line in self.aptrdepends_output file
132 3. If line matches ^\\s+\\S*Depends regex, do the following...
134 3A. Chop off extraneous " (Pre)Depends: " substring
136 3B. Chop off extraneous " (...something...) " substring
138 3C. Append resulting string to imports list of current dictionary in
139 loop
141 4. If line does not match ^\\s+\\S*Depends regex, append new dictionary
142 to self.d3_json. New dictionary is initialized like so...
143 {'name':current_line_in_loop,'imports':[]}
145 >>> my_aptrdepends = 'apt-rdepends_aptitude'
146 >>> my_json = 'apt-rdepends_aptitude.json'
147 >>> my_object = AptRdepends2JSON(my_aptrdepends, my_json)
148 >>> my_object.read_aptrdepends_output()
149 >>> print(my_object.package_list['children'][0])
150 {'name': 'aptitude', 'imports': ['aptitude-common', 'libapt-pkg5.0', 'libboost-iostreams1.67.0', 'libboost-system1.67.0', 'libc6', 'libcwidget3v5', 'libgcc1', 'libncursesw6', 'libsigc++-2.0-0v5', 'libsqlite3-0', 'libstdc++6', 'libtinfo6', 'libxapian30']}
153 aptrdepends_file = open(self.aptrdepends_output, 'r')
155 for rdepends_line in aptrdepends_file:
157 if self.check_for_dependency_lines(rdepends_line):
159 dependency = self.trim_dependency_line(rdepends_line)
160 self.package_list[-1]['imports'].append(dependency)
162 else:
164 chomp = rdepends_line.rstrip('\n')
165 self.package_list.append({'name':chomp, 'imports':[]})
167 self.package_list = {'name': 'recursive_aptitude_dependencies',\
168 'children': self.package_list}
169 json_file = open(self.d3_json, 'w')
170 json.dump(self.package_list, json_file, indent=4)
171 json_file.close()
173 if __name__ == "__main__":
174 import re
175 import json
177 import doctest
178 doctest.testmod()