* Filters/, *Queryable/, beagled/Filter.cs, beagled/LuceneCommon.cs: Allow keyword...
[beagle.git] / Filters / FilterPPT.cs
blobba6306a053bfffadb78b33e4f25f69b24b9217e8
1 //
2 // FilterPPT.cs
3 //
4 // Copyright (C) 2004 Novell, Inc.
5 //
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a
9 // copy of this software and associated documentation files (the "Software"),
10 // to deal in the Software without restriction, including without limitation
11 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 // and/or sell copies of the Software, and to permit persons to whom the
13 // Software is furnished to do so, subject to the following conditions:
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 // DEALINGS IN THE SOFTWARE.
28 using System;
29 using System.Collections;
30 using System.IO;
31 using System.Text;
32 using Gsf;
34 using Beagle.Daemon;
35 using Beagle.Util;
37 internal class RecordType
40 public enum TypeCode {
41 Unknown = 0,
42 Document = 1000,
43 DocumentAtom = 1001,
44 EndDocument = 1002,
45 Slide = 1006,
46 SlideAtom = 1007,
47 Notes = 1008,
48 NotesAtom = 1009,
49 Environment = 1010,
50 SlidePersistAtom = 1011,
51 SSlideLayoutAtom = 1015,
52 MainMaster = 1016,
53 SSSlideInfoAtom = 1017,
54 SlideViewInfo = 1018,
55 GuideAtom = 1019,
56 ViewInfo = 1020,
57 ViewInfoAtom = 1021,
58 SlideViewInfoAtom = 1022,
59 VBAInfo = 1023,
60 VBAInfoAtom = 1024,
61 SSDocInfoAtom = 1025,
62 Summary = 1026,
63 DocRoutingSlip = 1030,
64 OutlineViewInfo = 1031,
65 SorterViewInfo = 1032,
66 ExObjList = 1033,
67 ExObjListAtom = 1034,
68 PPDrawingGroup = 1035, //FIXME: Office Art File Format Docu
69 PPDrawing = 1036, //FIXME: Office Art File Format Docu
70 NamedShows = 1040, // don't know if container
71 NamedShow = 1041,
72 NamedShowSlides = 1042, // don't know if container
73 List = 2000,
74 FontCollection = 2005,
75 BookmarkCollection = 2019,
76 SoundCollAtom = 2021,
77 Sound = 2022,
78 SoundData = 2023,
79 BookmarkSeedAtom = 2025,
80 ColorSchemeAtom = 2032,
81 ExObjRefAtom = 3009,
82 OEShapeAtom = 3009,
83 OEPlaceholderAtom = 3011,
84 GPointAtom = 3024,
85 GRatioAtom = 3031,
86 OutlineTextRefAtom = 3998,
87 TextHeaderAtom = 3999,
88 TextCharsAtom = 4000,
89 StyleTextPropAtom = 4001,
90 BaseTextPropAtom = 4002,
91 TxMasterStyleAtom = 4003,
92 TxCFStyleAtom = 4004,
93 TxPFStyleAtom = 4005,
94 TextRulerAtom = 4006,
95 TextBookmarkAtom = 4007,
96 TextBytesAtom = 4008,
97 TxSIStyleAtom = 4009,
98 TextSpecInfoAtom = 4010,
99 DefaultRulerAtom = 4011,
100 FontEntityAtom = 4023,
101 FontEmbeddedData = 4024,
102 CString = 4026,
103 MetaFile = 4033,
104 ExOleObjAtom = 4035,
105 SrKinsoku = 4040,
106 HandOut = 4041,
107 ExEmbed = 4044,
108 ExEmbedAtom = 4045,
109 ExLink = 4046,
110 BookmarkEntityAtom = 4048,
111 ExLinkAtom = 4049,
112 SrKinsokuAtom = 4050,
113 ExHyperlinkAtom = 4051,
114 ExHyperlink = 4055,
115 SlideNumberMCAtom = 4056,
116 HeadersFooters = 4057,
117 HeadersFootersAtom = 4058,
118 TxInteractiveInfoAtom = 4063,
119 CharFormatAtom = 4066,
120 ParaFormatAtom = 4067,
121 RecolorInfoAtom = 4071,
122 ExQuickTimeMovie = 4074,
123 ExQuickTimeMovieData = 4075,
124 ExControl = 4078,
125 SlideListWithText = 4080,
126 InteractiveInfo = 4082,
127 InteractiveInfoAtom = 4083,
128 UserEditAtom = 4085,
129 CurrentUserAtom = 4086,
130 DateTimeMCAtom = 4087,
131 GenericDateMCAtom = 4088,
132 FooterMCAtom = 4090,
133 ExControlAtom = 4091,
134 ExMediaAtom = 4100,
135 ExVideo = 4101,
136 ExAviMovie = 4102,
137 ExMCIMovie = 4103,
138 ExMIDIAudio = 4109,
139 ExCDAudio = 4110,
140 ExWAVAudioEmbedded = 4111,
141 ExWAVAudioLink = 4112,
142 ExOleObjStg = 4113,
143 ExCDAudioAtom = 4114,
144 ExWAVAudioEmbeddedAtom = 4115,
145 AnimationInfoAtom = 4116,
146 RTFDateTimeMCAtom = 4117,
147 ProgTags = 5000, // don't know if container
148 ProgStringTag = 5001,
149 ProgBinaryTag = 5002,
150 BinaryTagData = 5003,
151 PrintOptions = 6000,
152 PersistPtrFullBlock = 6001, // don't know if container
153 PersistPtrIncrementalBlock = 6002, // don't know if container
154 GScalingAtom = 10001,
155 GRColorAtom = 10002,
156 EscherDggContainer = 0xf000, /* Drawing Group Container */
157 EscherDgg = 0xf006,
158 EscherCLSID = 0xf016,
159 EscherOPT = 0xf00b,
160 EscherBStoreContainer = 0xf001,
161 EscherBSE = 0xf007,
162 EscherBlip_START = 0xf018, /* Blip types are between */
163 EscherBlip_END = 0xf117, /* these two values */
164 EscherDgContainer = 0xf002, /* Drawing Container */
165 EscherDg = 0xf008,
166 EscherRegroupItems = 0xf118,
167 EscherColorScheme = 0xf120, /* bug in docs */
168 EscherSpgrContainer = 0xf003,
169 EscherSpContainer = 0xf004,
170 EscherSpgr = 0xf009,
171 EscherSp = 0xf00a,
172 EscherTextbox = 0xf00c,
173 EscherClientTextbox = 0xf00d,
174 EscherAnchor = 0xf00e,
175 EscherChildAnchor = 0xf00f,
176 EscherClientAnchor = 0xf010,
177 EscherClientData = 0xf011,
178 EscherSolverContainer = 0xf005,
179 EscherConnectorRule = 0xf012, /* bug in docs */
180 EscherAlignRule = 0xf013,
181 EscherArcRule = 0xf014,
182 EscherClientRule = 0xf015,
183 EscherCalloutRule = 0xf017,
184 EscherSelection = 0xf119,
185 EscherColorMRU = 0xf11a,
186 EscherDeletedPspl = 0xf11d, /* bug in docs */
187 EscherSplitMenuColors = 0xf11e,
188 EscherOleObject = 0xf11f,
189 EscherUserDefined = 0xf122,
192 public TypeCode typecode;
193 public string name;
194 public bool is_container;
195 public bool do_read;
196 public int min_record_size;
197 public int max_record_size;
198 RecordType (TypeCode typecode, string name, bool is_container, bool do_read, int min_record_size, int max_record_size)
200 this.typecode = typecode;
201 this.name = name;
202 this.is_container = is_container;
203 this.do_read = do_read;
204 this.min_record_size = min_record_size;
205 this.max_record_size = max_record_size;
208 static RecordType[] types =
210 new RecordType ( TypeCode.Unknown, "Unknown", false, true, -1, -1 ),
211 new RecordType ( TypeCode.Document, "Document", true, true, -1, -1 ),
212 new RecordType ( TypeCode.DocumentAtom, "DocumentAtom", false, true, -1, -1 ),
213 new RecordType ( TypeCode.EndDocument, "EndDocument", false, true, -1, -1 ),
214 new RecordType ( TypeCode.Slide, "Slide", true, true, -1, -1 ),
215 new RecordType ( TypeCode.SlideAtom, "SlideAtom", false, true, -1, -1 ),
216 new RecordType ( TypeCode.Notes, "Notes", true, true, -1, -1 ),
217 new RecordType ( TypeCode.NotesAtom, "NotesAtom", false, true, -1, -1 ),
218 new RecordType ( TypeCode.Environment, "Environment", true, true, -1, -1 ),
219 new RecordType ( TypeCode.SlidePersistAtom, "SlidePersistAtom", false, true, -1, -1 ),
220 new RecordType ( TypeCode.SSlideLayoutAtom, "SSlideLayoutAtom", false, true, -1, -1 ),
221 new RecordType ( TypeCode.MainMaster, "MainMaster", true, true, -1, -1 ),
222 new RecordType ( TypeCode.SSSlideInfoAtom, "SSSlideInfoAtom", false, true, -1, -1 ),
223 new RecordType ( TypeCode.SlideViewInfo, "SlideViewInfo", true, true, -1, -1 ),
224 new RecordType ( TypeCode.GuideAtom, "GuideAtom", false, true, -1, -1 ),
225 new RecordType ( TypeCode.ViewInfo, "ViewInfo", true, true, -1, -1 ),
226 new RecordType ( TypeCode.ViewInfoAtom, "ViewInfoAtom", false, true, -1, -1 ),
227 new RecordType ( TypeCode.SlideViewInfoAtom, "SlideViewInfoAtom", false, true, -1, -1 ),
228 new RecordType ( TypeCode.VBAInfo, "VBAInfo", true, true, -1, -1 ),
229 new RecordType ( TypeCode.VBAInfoAtom, "VBAInfoAtom", false, true, -1, -1 ),
230 new RecordType ( TypeCode.SSDocInfoAtom, "SSDocInfoAtom", false, true, -1, -1 ),
231 new RecordType ( TypeCode.Summary, "Summary", true, true, -1, -1 ),
232 new RecordType ( TypeCode.DocRoutingSlip, "DocRoutingSlip", false, true, -1, -1 ),
233 new RecordType ( TypeCode.OutlineViewInfo, "OutlineViewInfo", true, true, -1, -1 ),
234 new RecordType ( TypeCode.SorterViewInfo, "SorterViewInfo", true, true, -1, -1 ),
235 new RecordType ( TypeCode.ExObjList, "ExObjList", true, true, -1, -1 ),
236 new RecordType ( TypeCode.ExObjListAtom, "ExObjListAtom", false, true, -1, -1 ),
237 new RecordType ( TypeCode.PPDrawingGroup, "PPDrawingGroup", true, true, -1, -1 ), //FIXME: Office Art File Format Docu
238 new RecordType ( TypeCode.PPDrawing, "PPDrawing", true, true, -1, -1 ), //FIXME: Office Art File Format Docu
239 new RecordType ( TypeCode.NamedShows, "NamedShows", false, true, -1, -1 ), // don't know if container
240 new RecordType ( TypeCode.NamedShow, "NamedShow", true, true, -1, -1 ),
241 new RecordType ( TypeCode.NamedShowSlides, "NamedShowSlides", false, true, -1, -1 ), // don't know if container
242 new RecordType ( TypeCode.List, "List", true, true, -1, -1 ),
243 new RecordType ( TypeCode.FontCollection, "FontCollection", true, true, -1, -1 ),
244 new RecordType ( TypeCode.BookmarkCollection, "BookmarkCollection", true, true, -1, -1 ),
245 new RecordType ( TypeCode.SoundCollAtom, "SoundCollAtom", false, true, -1, -1 ),
246 new RecordType ( TypeCode.Sound, "Sound", true, true, -1, -1 ),
247 new RecordType ( TypeCode.SoundData, "SoundData", false, true, -1, -1 ),
248 new RecordType ( TypeCode.BookmarkSeedAtom, "BookmarkSeedAtom", false, true, -1, -1 ),
249 new RecordType ( TypeCode.ColorSchemeAtom, "ColorSchemeAtom", false, true, -1, -1 ),
250 new RecordType ( TypeCode.ExObjRefAtom, "ExObjRefAtom", false, true, -1, -1 ),
251 new RecordType ( TypeCode.OEShapeAtom, "OEShapeAtom", false, true, -1, -1 ),
252 new RecordType ( TypeCode.OEPlaceholderAtom, "OEPlaceholderAtom", false, true, -1, -1 ),
253 new RecordType ( TypeCode.GPointAtom, "GPointAtom", false, true, -1, -1 ),
254 new RecordType ( TypeCode.GRatioAtom, "GRatioAtom", false, true, -1, -1 ),
255 new RecordType ( TypeCode.OutlineTextRefAtom, "OutlineTextRefAtom", false, true, -1, -1 ),
256 new RecordType ( TypeCode.TextHeaderAtom, "TextHeaderAtom", false, true, -1, -1 ),
257 new RecordType ( TypeCode.TextCharsAtom, "TextCharsAtom", false, true, -1, -1 ),
258 new RecordType ( TypeCode.StyleTextPropAtom, "StyleTextPropAtom", false, true, -1, -1 ),
259 new RecordType ( TypeCode.BaseTextPropAtom, "BaseTextPropAtom", false, true, -1, -1 ),
260 new RecordType ( TypeCode.TxMasterStyleAtom, "TxMasterStyleAtom", false, true, -1, -1 ),
261 new RecordType ( TypeCode.TxCFStyleAtom, "TxCFStyleAtom", false, true, -1, -1 ),
262 new RecordType ( TypeCode.TxPFStyleAtom, "TxPFStyleAtom", false, true, -1, -1 ),
263 new RecordType ( TypeCode.TextRulerAtom, "TextRulerAtom", false, true, -1, -1 ),
264 new RecordType ( TypeCode.TextBookmarkAtom, "TextBookmarkAtom", false, true, -1, -1 ),
265 new RecordType ( TypeCode.TextBytesAtom, "TextBytesAtom", false, true, -1, -1 ),
266 new RecordType ( TypeCode.TxSIStyleAtom, "TxSIStyleAtom", false, true, -1, -1 ),
267 new RecordType ( TypeCode.TextSpecInfoAtom, "TextSpecInfoAtom", false, true, -1, -1 ),
268 new RecordType ( TypeCode.DefaultRulerAtom, "DefaultRulerAtom", false, true, -1, -1 ),
269 new RecordType ( TypeCode.FontEntityAtom, "FontEntityAtom", false, true, -1, -1 ),
270 new RecordType ( TypeCode.FontEmbeddedData, "FontEmbeddedData", false, true, -1, -1 ),
271 new RecordType ( TypeCode.CString, "CString", false, true, -1, -1 ),
272 new RecordType ( TypeCode.MetaFile, "MetaFile", false, true, -1, -1 ),
273 new RecordType ( TypeCode.ExOleObjAtom, "ExOleObjAtom", false, true, -1, -1 ),
274 new RecordType ( TypeCode.SrKinsoku, "SrKinsoku", true, true, -1, -1 ),
275 new RecordType ( TypeCode.HandOut, "HandOut", true, true, -1, -1 ),
276 new RecordType ( TypeCode.ExEmbed, "ExEmbed", true, true, -1, -1 ),
277 new RecordType ( TypeCode.ExEmbedAtom, "ExEmbedAtom", false, true, -1, -1 ),
278 new RecordType ( TypeCode.ExLink, "ExLink", true, true, -1, -1 ),
279 new RecordType ( TypeCode.BookmarkEntityAtom, "BookmarkEntityAtom", false, true, -1, -1 ),
280 new RecordType ( TypeCode.ExLinkAtom, "ExLinkAtom", false, true, -1, -1 ),
281 new RecordType ( TypeCode.SrKinsokuAtom, "SrKinsokuAtom", false, true, -1, -1 ),
282 new RecordType ( TypeCode.ExHyperlinkAtom, "ExHyperlinkAtom", false, true, -1, -1 ),
283 new RecordType ( TypeCode.ExHyperlink, "ExHyperlink", true, true, -1, -1 ),
284 new RecordType ( TypeCode.SlideNumberMCAtom, "SlideNumberMCAtom", false, true, -1, -1 ),
285 new RecordType ( TypeCode.HeadersFooters, "HeadersFooters", true, true, -1, -1 ),
286 new RecordType ( TypeCode.HeadersFootersAtom, "HeadersFootersAtom", false, true, -1, -1 ),
287 new RecordType ( TypeCode.TxInteractiveInfoAtom, "TxInteractiveInfoAtom", false, true, -1, -1 ),
288 new RecordType ( TypeCode.CharFormatAtom, "CharFormatAtom", false, true, -1, -1 ),
289 new RecordType ( TypeCode.ParaFormatAtom, "ParaFormatAtom", false, true, -1, -1 ),
290 new RecordType ( TypeCode.RecolorInfoAtom, "RecolorInfoAtom", false, true, -1, -1 ),
291 new RecordType ( TypeCode.ExQuickTimeMovie, "ExQuickTimeMovie", true, true, -1, -1 ),
292 new RecordType ( TypeCode.ExQuickTimeMovieData, "ExQuickTimeMovieData", false, true, -1, -1 ),
293 new RecordType ( TypeCode.ExControl, "ExControl", true, true, -1, -1 ),
294 new RecordType ( TypeCode.SlideListWithText, "SlideListWithText", true, true, -1, -1 ),
295 new RecordType ( TypeCode.InteractiveInfo, "InteractiveInfo", true, true, -1, -1 ),
296 new RecordType ( TypeCode.InteractiveInfoAtom, "InteractiveInfoAtom", false, true, -1, -1 ),
297 new RecordType ( TypeCode.UserEditAtom, "UserEditAtom", false, true, -1, -1 ),
298 new RecordType ( TypeCode.CurrentUserAtom, "CurrentUserAtom", false, true, -1, -1 ),
299 new RecordType ( TypeCode.DateTimeMCAtom, "DateTimeMCAtom", false, true, -1, -1 ),
300 new RecordType ( TypeCode.GenericDateMCAtom, "GenericDateMCAtom", false, true, -1, -1 ),
301 new RecordType ( TypeCode.FooterMCAtom, "FooterMCAtom", false, true, -1, -1 ),
302 new RecordType ( TypeCode.ExControlAtom, "ExControlAtom", false, true, -1, -1 ),
303 new RecordType ( TypeCode.ExMediaAtom, "ExMediaAtom", false, true, -1, -1 ),
304 new RecordType ( TypeCode.ExVideo, "ExVideo", true, true, -1, -1 ),
305 new RecordType ( TypeCode.ExAviMovie, "ExAviMovie", true, true, -1, -1 ),
306 new RecordType ( TypeCode.ExMCIMovie, "ExMCIMovie", true, true, -1, -1 ),
307 new RecordType ( TypeCode.ExMIDIAudio, "ExMIDIAudio", true, true, -1, -1 ),
308 new RecordType ( TypeCode.ExCDAudio, "ExCDAudio", true, true, -1, -1 ),
309 new RecordType ( TypeCode.ExWAVAudioEmbedded, "ExWAVAudioEmbedded", true, true, -1, -1 ),
310 new RecordType ( TypeCode.ExWAVAudioLink, "ExWAVAudioLink", true, true, -1, -1 ),
311 new RecordType ( TypeCode.ExOleObjStg, "ExOleObjStg", false, true, -1, -1 ),
312 new RecordType ( TypeCode.ExCDAudioAtom, "ExCDAudioAtom", false, true, -1, -1 ),
313 new RecordType ( TypeCode.ExWAVAudioEmbeddedAtom, "ExWAVAudioEmbeddedAtom", false, true, -1, -1 ),
314 new RecordType ( TypeCode.AnimationInfoAtom, "AnimationInfoAtom", false, true, -1, -1 ),
315 new RecordType ( TypeCode.RTFDateTimeMCAtom, "RTFDateTimeMCAtom", false, true, -1, -1 ),
316 new RecordType ( TypeCode.ProgTags, "ProgTags", false, true, -1, -1 ), // don't know if container
317 new RecordType ( TypeCode.ProgStringTag, "ProgStringTag", true, true, -1, -1 ),
318 new RecordType ( TypeCode.ProgBinaryTag, "ProgBinaryTag", true, true, -1, -1 ),
319 new RecordType ( TypeCode.BinaryTagData, "BinaryTagData", false, true, -1, -1 ),
320 new RecordType ( TypeCode.PrintOptions, "PrintOptions", false, true, -1, -1 ),
321 new RecordType ( TypeCode.PersistPtrFullBlock, "PersistPtrFullBlock", false, true, -1, -1 ), // don't know if container
322 new RecordType ( TypeCode.PersistPtrIncrementalBlock, "PersistPtrIncrementalBlock", false, true, -1, -1 ),
323 new RecordType ( TypeCode.GScalingAtom, "GScalingAtom", false, true, -1, -1 ),
324 new RecordType ( TypeCode.GRColorAtom, "GRColorAtom", false, true, -1, -1 ),
326 new RecordType ( TypeCode.EscherDggContainer, "EscherDggContainer", true, true, -1, -1 ),
327 new RecordType ( TypeCode.EscherDgg, "EscherDgg", false, true, -1, -1 ),
328 new RecordType ( TypeCode.EscherCLSID, "EscherCLSID", false, true, -1, -1 ),
329 new RecordType ( TypeCode.EscherOPT, "EscherOPT", false, true, -1, -1 ),
330 new RecordType ( TypeCode.EscherBStoreContainer, "EscherBStoreContainer", true, true, -1, -1 ),
331 new RecordType ( TypeCode.EscherBSE, "EscherBSE", false, true, -1, -1 ),
332 new RecordType ( TypeCode.EscherBlip_START, "EscherBlip_START", false, true, -1, -1 ),
333 new RecordType ( TypeCode.EscherBlip_END, "EscherBlip_END", false, true, -1, -1 ),
334 new RecordType ( TypeCode.EscherDgContainer, "EscherDgContainer", true, true, -1, -1 ),
335 new RecordType ( TypeCode.EscherDg, "EscherDg", false, true, -1, -1 ),
336 new RecordType ( TypeCode.EscherRegroupItems, "EscherRegroupItems", false, true, -1, -1 ),
337 new RecordType ( TypeCode.EscherColorScheme, "EscherColorScheme", false, true, -1, -1 ),
338 new RecordType ( TypeCode.EscherSpgrContainer, "EscherSpgrContainer", true, true, -1, -1 ),
339 new RecordType ( TypeCode.EscherSpContainer, "EscherSpContainer", true, true, -1, -1 ),
340 new RecordType ( TypeCode.EscherSpgr, "EscherSpgr", false, true, -1, -1 ),
341 new RecordType ( TypeCode.EscherSp, "EscherSp", false, true, -1, -1 ),
342 new RecordType ( TypeCode.EscherTextbox, "EscherTextbox", false, true, -1, -1 ),
343 new RecordType ( TypeCode.EscherClientTextbox, "EscherClientTextbox", true, true, -1, -1 ),
344 new RecordType ( TypeCode.EscherAnchor, "EscherAnchor", false, true, -1, -1 ),
345 new RecordType ( TypeCode.EscherChildAnchor, "EscherChildAnchor", false, true, -1, -1 ),
346 new RecordType ( TypeCode.EscherClientAnchor, "EscherClientAnchor", false, true, -1, -1 ),
347 new RecordType ( TypeCode.EscherClientData, "EscherClientData", true, true, -1, -1 ),
348 new RecordType ( TypeCode.EscherSolverContainer, "EscherSolverContainer", true, true, -1, -1 ),
349 new RecordType ( TypeCode.EscherConnectorRule, "EscherConnectorRule", false, true, -1, -1 ),
350 new RecordType ( TypeCode.EscherAlignRule, "EscherAlignRule", false, true, -1, -1 ),
351 new RecordType ( TypeCode.EscherArcRule, "EscherArcRule", false, true, -1, -1 ),
352 new RecordType ( TypeCode.EscherClientRule, "EscherClientRule", false, true, -1, -1 ),
353 new RecordType ( TypeCode.EscherCalloutRule, "EscherCalloutRule", false, true, -1, -1 ),
354 new RecordType ( TypeCode.EscherSelection, "EscherSelection", false, true, -1, -1 ),
355 new RecordType ( TypeCode.EscherColorMRU, "EscherColorMRU", false, true, -1, -1 ),
356 new RecordType ( TypeCode.EscherDeletedPspl, "EscherDeletedPspl", false, true, -1, -1 ),
357 new RecordType ( TypeCode.EscherSplitMenuColors, "EscherSplitMenuColors", false, true, -1, -1 ),
358 new RecordType ( TypeCode.EscherOleObject, "EscherOleObject", false, true, -1, -1 ),
359 new RecordType ( TypeCode.EscherUserDefined, "EscherUserDefined", false, true, -1, -1 )
362 public static RecordType Find (TypeCode typecode)
364 for (int i = 0; i < types.Length; i++) {
365 if (types[i].typecode == typecode)
366 return types[i];
368 return types[0];
372 namespace Beagle.Filters {
374 public class FilterPPT : FilterOle {
376 private enum TextType {
377 Invalid = -1,
378 Title,
379 Body,
380 Notes,
381 NotUsed,
382 Other,
383 CenterBody,
384 CenterTitle,
385 HalfBody,
386 QuarterBody
389 TextType textType;
390 public FilterPPT ()
392 AddSupportedFlavor (FilterFlavor.NewFromMimeType ("application/vnd.ms-powerpoint"));
393 textType = TextType.Invalid;
394 file = null;
395 FileName = null;
396 SnippetMode = true;
399 private int ParseElement (Input stream)
401 int data_remaining = (int) stream.Remaining;
402 //Console.WriteLine ("stream.Remaining = {0}", data_remaining);
404 // Weird!! Well, Its a M$ format ;-)
405 // Fixes: 323312
406 byte [] data = stream.Read (data_remaining > 7 ? 8 : data_remaining);
407 if (data == null || data_remaining < 8)
408 return 0;
410 RecordType.TypeCode opcode = (RecordType.TypeCode) GetInt16(data, 2);
411 int length = GetInt32(data, 4);
412 RecordType type = RecordType.Find (opcode);
414 // Process the container tree
415 if (type.is_container) {
416 int length_remaining = length;
418 if (opcode == RecordType.TypeCode.MainMaster) {
419 // Ignore MainMaster container as it contains
420 // just a master-slide view and no user data.
421 stream.Seek (length_remaining, SeekOrigin.Current);
422 } else {
423 while (length_remaining > 0) {
424 int elem_length = ParseElement(stream);
425 if (elem_length == 0)
426 return 0;
427 length_remaining -= elem_length;
428 //Console.WriteLine ("ParseElement: length = {0}, rem = {1}",
429 // elem_length, length_remaining);
432 } else {
433 if (length != 0) {
434 System.Text.Encoding encoding = null;
436 if (opcode == RecordType.TypeCode.TextBytesAtom) {
437 //encoding = System.Text.Encoding.GetEncoding (28591);
438 encoding = System.Text.Encoding.UTF8;
439 } else if (opcode == RecordType.TypeCode.TextCharsAtom) {
440 encoding = System.Text.Encoding.Unicode;
443 if (encoding != null && textType != TextType.NotUsed) {
444 StringBuilder strData = new StringBuilder () ;
445 data = stream.Read(length);
446 if (data == null)
447 return 0;
448 // Replace all ^M with "whitespace",
449 // because of which the contents were not properly
450 // been appended to the text pool.
451 strData.Append (encoding.GetString (data).Replace ('\r', ' '));
453 // Replace all ^K with "whitespace",
454 // because of which the contents were not properly
455 // been appended to the text pool.
456 strData.Replace ((char)0x0B, (char)0x20);
458 if (textType == TextType.Title ||
459 textType == TextType.CenterBody ||
460 textType == TextType.CenterTitle)
461 HotUp ();
462 AppendText (strData.ToString());
463 if (IsHot)
464 HotDown ();
465 else
466 AppendStructuralBreak ();
467 //Console.WriteLine ("Text : {0}", strData);
468 } else if (opcode == RecordType.TypeCode.TextHeaderAtom) {
469 data = stream.Read (4);
470 textType = (TextType) GetInt32 (data, 0);
471 } else {
472 stream.Seek(length, SeekOrigin.Current);
477 // length = RecordHeader.recLen
478 // 8 = sizeof (RecordHeader)
479 // Every Atom/container is preceded by a RecordHeader
480 return length + 8;
484 override protected void ExtractMetaData (Input sumStream, Input docSumStream)
486 int slide_count = 0;
487 DocProp prop = null;
489 if (docSumMeta != null) {
490 prop = docSumMeta.Lookup ("gsf:slide-count");
491 if (prop != null)
492 slide_count = (int) prop.Val;
493 if (slide_count > 0)
494 AddProperty (Beagle.Property.NewUnsearched ("fixme:slide-count", slide_count));
498 override protected void DoPull ()
500 if (file == null) {
501 Finished ();
502 return;
505 Input stream = null;
506 try {
507 stream = file.ChildByName ("PowerPoint Document");
509 if (stream != null) {
511 // The parsing was getting terminated when "EndDocument"
512 // container was parsed. We need to continue our
513 // parsing till the end of the file, since, some of the
514 // slides do persist after the actual "Document"
515 // container.
516 // PPTs exported from OO.o actually writes almost all the slides
517 // after "Document" container.
518 // And certain PPTs do have some slides in after
519 // "Document" container.
520 //Console.WriteLine ("Length of stream = {0}", stream.Size);
521 while (!stream.Eof) {
522 ParseElement (stream);
523 //Console.WriteLine ("Position of the ptr in the stream: {0}", stream.Position);
525 } else {
526 Logger.Log.Error ("Ole stream not found in {0}. Content extraction skipped.", FileName);
529 Finished ();
530 } catch (Exception e) {
531 Logger.Log.Error ("Exception {0} occurred during DoPull.", e.Message);
532 Error ();
536 override protected void OpenStorage (FileInfo info)
538 FileName = info.FullName;
540 // PPT 95/97-2000 format contains a "PP97_DUALSTORAGE", which is required
541 // to index PPT 97-2000 files.
542 // We don't support PPT 95 files, however, we happily accept patches ;-)
544 Input dualStorTemp = null;
545 try {
546 if ((dualStorTemp = file.ChildByName ("PP97_DUALSTORAGE")) != null) {
547 // "PP97_DUALSTORAGE" is a storage containing some streams
548 if (dualStorTemp.Handle != IntPtr.Zero)
549 file = (Gsf.Infile) GLib.Object.GetObject (dualStorTemp.Handle);
550 } else if (((dualStorTemp = file.ChildByName ("Header")) != null) ||
551 ((dualStorTemp = file.ChildByName ("PowerPoint Document")) == null)) {
552 Logger.Log.Error ("{0} is a PPT 95/4.0 file. Beagle does not support PPT 95 files. Skipping...", FileName);
553 Error ();
555 } catch (Exception e) {
557 Logger.Log.Error ("Unable to open OleFile stream of "+info.FullName);
558 Error ();