Update git submodules
[LibreOffice.git] / bin / flat-odf-cleanup.py
blob1a1bf18024b78ec016a69d83198b7cc3d9f15c93
1 #!/usr/bin/python3
2 # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
4 # This file is part of the LibreOffice project.
6 # This Source Code Form is subject to the terms of the Mozilla Public
7 # License, v. 2.0. If a copy of the MPL was not distributed with this
8 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
11 import sys
12 # sadly need lxml because the python one doesn't preserve namespace prefixes
13 # and type-detection looks for the string "office:document"
14 from lxml import etree as ET
15 #import xml.etree.ElementTree as ET
17 def get_used_p_styles(root):
18 elementnames = [
19 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}p",
20 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}h",
21 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}alphabetical-index-entry-template",
22 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}bibliography-entry-template",
23 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}illustration-index-entry-template",
24 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-source-style",
25 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}object-index-entry-template",
26 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-index-entry-template",
27 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-of-content-entry-template",
28 ".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-index-entry-template",
31 # document content
32 ps = sum([root.findall(e) for e in elementnames], [])
33 usedpstyles = set()
34 usedcondstyles = set()
35 for p in ps:
36 usedpstyles.add(p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name"))
37 if p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}cond-style-name"):
38 usedcondstyles.add(p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}cond-style-name"))
39 if p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names"):
40 for style in p.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names").split(" "):
41 usedpstyles.add(style)
42 for shape in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}text-style-name]"):
43 usedpstyles.add(shape.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}text-style-name"))
44 for tabletemplate in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:table:1.0}paragraph-style-name]"):
45 usedpstyles.add(tabletemplate.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}paragraph-style-name"))
46 for page in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}register-truth-ref-style-name]"):
47 usedpstyles.add(page.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}register-truth-ref-style-name"))
48 for form in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:form:1.0}text-style-name]"):
49 usedpstyles.add(form.get("{urn:oasis:names:tc:opendocument:xmlns:form:1.0}text-style-name"))
50 # conditional styles
51 for condstyle in usedcondstyles:
52 for map_ in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='paragraph'][@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name='" + condstyle + "']/{urn:oasis:names:tc:opendocument:xmlns:style:1.0}map"):
53 usedpstyles.add(map_.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}apply-style-name"))
54 # other styles
55 for notesconfig in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}default-style-name]"):
56 usedpstyles.add(notesconfig.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}default-style-name"))
57 return usedpstyles
59 def add_parent_styles(usedstyles, styles):
60 size = -1
61 while size != len(usedstyles):
62 size = len(usedstyles)
63 for style in styles:
64 if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedstyles:
65 if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"):
66 usedstyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"))
67 # only for paragraph styles and master-pages
68 if style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"):
69 usedstyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"))
71 def remove_unused_styles(root, usedstyles, styles, name):
72 for style in styles:
73 print(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
74 if not(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedstyles):
75 print("removing unused " + name + " " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
76 # it is really dumb that there is no parent pointer in dom
77 try:
78 root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}automatic-styles").remove(style)
79 except ValueError:
80 root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}styles").remove(style)
82 def remove_unused_drawings(root, useddrawings, drawings, name):
83 for drawing in drawings:
84 print(drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name"))
85 if not(drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name") in useddrawings):
86 print("removing unused " + name + " " + drawing.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}name"))
87 root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}styles").remove(drawing)
89 def collect_all_attribute(usedstyles, attribute):
90 for element in root.findall(".//*[@" + attribute + "]"):
91 usedstyles.add(element.get(attribute))
93 def collect_all_attribute_list(usedstyles, attribute):
94 for element in root.findall(".//*[@" + attribute + "]"):
95 for style in element.get(attribute).split(" "):
96 usedstyles.add(style)
98 def remove_unused(root):
99 # 1) find all elements that may reference page styles - this gets rid of some paragraphs
100 usedpstyles = get_used_p_styles(root)
101 print(usedpstyles)
102 usedtstyles = set()
103 tables = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table")
104 print(tables)
105 for table in tables:
106 usedtstyles.add(table.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name"))
107 pstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='paragraph']")
108 tstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table']")
109 usedmasterpages = {"Standard"} # assume this is the default on page 1
110 # only automatic styles may have page breaks in LO, so no need to chase parents or nexts
111 for pstyle in pstyles:
112 print(pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
113 if pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedpstyles:
114 usedmasterpages.add(pstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page-name"))
115 for tstyle in tstyles:
116 if tstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedtstyles:
117 usedmasterpages.add(tstyle.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page-name"))
118 for node in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}master-page-name]"):
119 usedmasterpages.add(node.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}master-page-name"))
120 for node in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}master-page-name]"):
121 usedmasterpages.add(node.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}master-page-name"))
122 print(usedmasterpages)
123 # iterate parent/next until no more masterpage is added
124 size = -1
125 while size != len(usedmasterpages):
126 size = len(usedmasterpages)
127 for mp in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page"):
128 if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedmasterpages:
129 if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"):
130 usedmasterpages.add(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}parent-style-name"))
131 if mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"):
132 usedmasterpages.add(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}next-style-name"))
133 # remove unused masterpages
134 for mp in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page"):
135 if not(mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedmasterpages):
136 print("removing unused master page " + mp.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
137 # there is no way to get the parent element???
138 root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}master-styles").remove(mp)
140 # 2) remove unused paragraph styles
141 usedpstyles = get_used_p_styles(root)
143 add_parent_styles(usedpstyles, pstyles)
144 remove_unused_styles(root, usedpstyles, pstyles, "paragraph style")
146 # 3) unused list styles - keep referenced from still used paragraph styles
147 usedliststyles = set()
148 for style in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}list-style-name]"):
149 usedliststyles.add(style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}list-style-name"))
150 for list_ in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
151 usedliststyles.add(list_.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name"))
152 for listitem in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-item[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-override]"):
153 usedliststyles.add(listitem.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-override"))
154 for numpara in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}numbered-paragraph[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
155 usedliststyles.add(list_.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name"))
156 # ignore ones that are children of style:graphic-properties, those must be handled as the containing style
157 # there is no inheritance for these
158 liststyles = root.findall("./*/{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-style")
159 remove_unused_styles(root, usedliststyles, liststyles, "list style")
161 # 4) unused text styles
162 usedtextstyles = set()
163 usedsectionstyles = set()
164 usedrubystyles = set()
166 sections = {
167 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}alphabetical-index",
168 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}bibliography",
169 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}illustration-index",
170 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-title",
171 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}object-index",
172 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}section",
173 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-of-content",
174 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}table-index",
175 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-index",
177 texts = {
178 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}a",
179 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-bibliography",
180 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-chapter",
181 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-link-end",
182 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-link-start",
183 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-page-number",
184 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-span",
185 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-tab-stop",
186 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-entry-text",
187 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}index-title-template",
188 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}linenumbering-configuration",
189 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-level-style-number",
190 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list-level-style-bullet",
191 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}outline-level-style",
192 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}ruby-text",
193 "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}span",
195 for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name]"):
196 style = element.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}style-name")
197 if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}ruby":
198 usedrubystyles.add(style)
199 elif element.tag in sections:
200 usedsectionstyles.add(style)
201 elif element.tag in texts:
202 usedtextstyles.add(style)
204 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style-name")
205 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}leader-text-style")
206 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}text-line-through-text-style")
207 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}visited-style-name")
208 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}main-entry-style-name")
209 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}citation-style-name")
210 collect_all_attribute(usedtextstyles, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}citation-body-style-name")
211 for span in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}span[@{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names]"):
212 for style in span.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}class-names").split(" "):
213 usedtextstyles.add(style)
214 textstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='text']")
215 add_parent_styles(usedtextstyles, textstyles)
216 remove_unused_styles(root, usedtextstyles, textstyles, "text style")
218 # 5) unused ruby styles - can't have parents?
219 rubystyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='ruby']")
220 remove_unused_styles(root, usedrubystyles, rubystyles, "ruby style")
222 # 6) unused section styles - can't have parents?
223 sectionstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='section']")
224 remove_unused_styles(root, usedsectionstyles, sectionstyles, "section style")
226 # 7) presentation styles
227 usedpresentationstyles = set()
229 collect_all_attribute(usedpresentationstyles, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}style-name")
230 collect_all_attribute_list(usedpresentationstyles, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}class-names")
232 presentationstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='presentation']")
233 add_parent_styles(usedpresentationstyles, presentationstyles)
234 remove_unused_styles(root, usedpresentationstyles, presentationstyles, "presentation style")
236 # 8) graphic styles
237 pages = {
238 "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}page",
239 "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}notes",
240 "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}handout-master",
241 "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}master-page",
243 usedgraphicstyles = set()
244 useddrawingpagestyles = set()
245 for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}style-name]"):
246 style = element.get("{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}style-name")
247 if element.tag in pages:
248 useddrawingpagestyles.add(style)
249 else:
250 usedgraphicstyles.add(style)
251 collect_all_attribute_list(usedgraphicstyles, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}class-names")
253 graphicstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='graphic']")
254 add_parent_styles(usedgraphicstyles, graphicstyles)
255 remove_unused_styles(root, usedgraphicstyles, graphicstyles, "graphic style")
257 # 9) drawing-page styles
258 drawingpagestyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='drawing-page']")
259 add_parent_styles(useddrawingpagestyles, drawingpagestyles)
260 remove_unused_styles(root, useddrawingpagestyles, drawingpagestyles, "drawing-page style")
262 # 10) page layouts
263 usedpagelayouts = set()
264 collect_all_attribute(usedpagelayouts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}page-layout-name")
265 pagelayouts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}page-layout")
266 remove_unused_styles(root, usedpagelayouts, pagelayouts, "page layout")
268 # 11) presentation page layouts
269 usedpresentationpagelayouts = set()
270 collect_all_attribute(usedpresentationpagelayouts, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}presentation-page-layout-name")
271 presentationpagelayouts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}presentation-page-layout")
272 remove_unused_styles(root, usedpresentationpagelayouts, presentationpagelayouts, "presentation page layout")
274 # 12) table (column/row/cell) styles
275 usedtablestyles = set()
276 usedtablecolumnstyles = set()
277 usedtablerowstyles = set()
278 usedtablecellstyles = set()
280 tables = {
281 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table",
282 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table:background",
284 tablecells = {
285 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}covered-table-cell",
286 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-cell",
287 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}body",
288 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}even-columns",
289 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}even-rows",
290 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}first-column",
291 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}first-row",
292 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}last-column",
293 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}last-row",
294 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}odd-columns",
295 "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}odd-rows",
297 for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name]"):
298 style = element.get("{urn:oasis:names:tc:opendocument:xmlns:table:1.0}style-name")
299 if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-column":
300 usedtablecolumnstyles.add(style)
301 elif element.tag == "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}table-row":
302 usedtablerowstyles.add(style)
303 elif element.tag in tables:
304 usedtablestyles.add(style)
305 elif element.tag in tablecells:
306 usedtablecellstyles.add(style)
308 for element in root.findall(".//*[@{urn:oasis:names:tc:opendocument:xmlns:database:1.0}style-name]"):
309 style = element.get("{urn:oasis:names:tc:opendocument:xmlns:database:1.0}style-name")
310 if element.tag == "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}column":
311 usedtablecolumnstyles.add(style)
312 else: # db:query db:table-representation
313 usedtablestyles.add(style)
315 collect_all_attribute(usedtablerowstyles, "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}default-row-style-name")
316 collect_all_attribute(usedtablecellstyles, "{urn:oasis:names:tc:opendocument:xmlns:database:1.0}default-cell-style-name")
317 collect_all_attribute(usedtablecellstyles, "{urn:oasis:names:tc:opendocument:xmlns:table:1.0}default-cell-style-name")
319 tablecolumstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table-column']")
320 tablerowstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table-row']")
321 tablecellstyles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style[@{urn:oasis:names:tc:opendocument:xmlns:style:1.0}family='table-cell']")
322 add_parent_styles(usedtablestyles, tstyles)
323 add_parent_styles(usedtablecolumnstyles, tablecolumstyles)
324 add_parent_styles(usedtablerowstyles, tablerowstyles)
325 add_parent_styles(usedtablecellstyles, tablecellstyles)
326 remove_unused_styles(root, usedtstyles, tstyles, "table style")
327 remove_unused_styles(root, usedtablecolumnstyles, tablecolumstyles, "table column style")
328 remove_unused_styles(root, usedtablerowstyles, tablerowstyles, "table row style")
329 remove_unused_styles(root, usedtablecellstyles, tablecellstyles, "table cell style")
331 # 13) gradients
332 usedgradients = set()
333 collect_all_attribute(usedgradients, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}fill-gradient-name")
334 collect_all_attribute(usedgradients, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}opacity-name")
335 gradients = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}gradient")
336 remove_unused_drawings(root, usedgradients, gradients, "gradient")
338 # 14) hatchs
339 usedhatchs = set()
340 collect_all_attribute(usedhatchs, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}fill-hatch-name")
341 hatchs = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}hatch")
342 remove_unused_drawings(root, usedhatchs, hatchs, "hatch")
344 # 15) bitmaps
345 usedbitmaps = set()
346 collect_all_attribute(usedbitmaps, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}fill-image-name")
347 bitmaps = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}bitmap")
348 remove_unused_drawings(root, usedbitmaps, bitmaps, "bitmap")
350 # 16) markers
351 usedmarkers = set()
352 collect_all_attribute(usedmarkers, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}marker-start")
353 collect_all_attribute(usedmarkers, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}marker-end")
354 markers = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}marker")
355 remove_unused_drawings(root, usedmarkers, markers, "marker")
357 # 17) stroke-dash
358 usedstrokedashs = set()
359 collect_all_attribute(usedstrokedashs, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}stroke-dash")
360 collect_all_attribute_list(usedstrokedashs, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}stroke-dash-names")
361 strokedashs = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}stroke-dash")
362 remove_unused_drawings(root, usedstrokedashs, strokedashs, "stroke-dash")
364 # TODO 3 other styles
366 # 13) unused font-face-decls
367 usedfonts = set()
368 collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name")
369 collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name-asian")
370 collect_all_attribute(usedfonts, "{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-name-complex")
371 fonts = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}font-face")
372 for font in fonts:
373 if not(font.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name") in usedfonts):
374 print("removing unused font-face " + font.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
375 root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}font-face-decls").remove(font)
377 # 14) remove rsid attributes
378 styles = root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}style")
379 for style in styles:
380 tp = style.find(".//{urn:oasis:names:tc:opendocument:xmlns:style:1.0}text-properties")
381 if tp is not None:
382 if "{http://openoffice.org/2009/office}rsid" in tp.attrib:
383 print("removing rsid from " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
384 del tp.attrib["{http://openoffice.org/2009/office}rsid"]
385 if "{http://openoffice.org/2009/office}paragraph-rsid" in tp.attrib:
386 print("removing paragraph-rsid from " + style.get("{urn:oasis:names:tc:opendocument:xmlns:style:1.0}name"))
387 del tp.attrib["{http://openoffice.org/2009/office}paragraph-rsid"]
389 # 15) unused user field decls
390 useduserfields = set()
391 for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-get"):
392 useduserfields.add(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
393 for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-input"):
394 useduserfields.add(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
395 for field in root.findall(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-decl"):
396 if not(field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name") in useduserfields):
397 print("removing unused user-field-decl " + field.get("{urn:oasis:names:tc:opendocument:xmlns:text:1.0}name"))
398 root.find(".//{urn:oasis:names:tc:opendocument:xmlns:text:1.0}user-field-decls").remove(field)
400 # remove office:settings
401 settings = root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}settings")
402 if settings is not None:
403 root.remove(settings)
405 # scripts are almost never needed
406 scripts = root.find(".//{urn:oasis:names:tc:opendocument:xmlns:office:1.0}scripts")
407 if scripts is not None:
408 root.remove(scripts)
410 # remove theme
411 theme = root.find(".//{urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0}theme")
412 if theme is not None:
413 theme.getparent().remove(theme)
415 # TODO: replace embedded image with some tiny one
416 # TODO: perhaps replace text with xxx (optionally)?
418 if __name__ == "__main__":
419 infile = sys.argv[1]
420 outfile = sys.argv[2]
422 dom = ET.parse(infile)
423 root = dom.getroot()
425 remove_unused(root)
427 # write output
428 dom.write(outfile, encoding='utf-8', xml_declaration=True)
431 TODO
432 chart:style-name
433 -> chart
434 style:data-style-name
435 -> data style
436 style:percentage-data-style-name
437 -> data style
440 # vim: set shiftwidth=4 softtabstop=4 expandtab: