1 /* -*- Mode: csharp; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
5 // Copyright (C) 2004 Novell, Inc.
9 // Permission is hereby granted, free of charge, to any person obtaining a
10 // copy of this software and associated documentation files (the "Software"),
11 // to deal in the Software without restriction, including without limitation
12 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 // and/or sell copies of the Software, and to permit persons to whom the
14 // Software is furnished to do so, subject to the following conditions:
16 // The above copyright notice and this permission notice shall be included in
17 // all copies or substantial portions of the Software.
19 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 // DEALINGS IN THE SOFTWARE.
30 using System
.Collections
;
38 internal class RecordType
41 public enum TypeCode
{
51 SlidePersistAtom
= 1011,
52 SSlideLayoutAtom
= 1015,
54 SSSlideInfoAtom
= 1017,
59 SlideViewInfoAtom
= 1022,
64 DocRoutingSlip
= 1030,
65 OutlineViewInfo
= 1031,
66 SorterViewInfo
= 1032,
69 PPDrawingGroup
= 1035, //FIXME: Office Art File Format Docu
70 PPDrawing
= 1036, //FIXME: Office Art File Format Docu
71 NamedShows
= 1040, // don't know if container
73 NamedShowSlides
= 1042, // don't know if container
75 FontCollection
= 2005,
76 BookmarkCollection
= 2019,
80 BookmarkSeedAtom
= 2025,
81 ColorSchemeAtom
= 2032,
84 OEPlaceholderAtom
= 3011,
87 OutlineTextRefAtom
= 3998,
88 TextHeaderAtom
= 3999,
90 StyleTextPropAtom
= 4001,
91 BaseTextPropAtom
= 4002,
92 TxMasterStyleAtom
= 4003,
96 TextBookmarkAtom
= 4007,
99 TextSpecInfoAtom
= 4010,
100 DefaultRulerAtom
= 4011,
101 FontEntityAtom
= 4023,
102 FontEmbeddedData
= 4024,
111 BookmarkEntityAtom
= 4048,
113 SrKinsokuAtom
= 4050,
114 ExHyperlinkAtom
= 4051,
116 SlideNumberMCAtom
= 4056,
117 HeadersFooters
= 4057,
118 HeadersFootersAtom
= 4058,
119 TxInteractiveInfoAtom
= 4063,
120 CharFormatAtom
= 4066,
121 ParaFormatAtom
= 4067,
122 RecolorInfoAtom
= 4071,
123 ExQuickTimeMovie
= 4074,
124 ExQuickTimeMovieData
= 4075,
126 SlideListWithText
= 4080,
127 InteractiveInfo
= 4082,
128 InteractiveInfoAtom
= 4083,
130 CurrentUserAtom
= 4086,
131 DateTimeMCAtom
= 4087,
132 GenericDateMCAtom
= 4088,
134 ExControlAtom
= 4091,
141 ExWAVAudioEmbedded
= 4111,
142 ExWAVAudioLink
= 4112,
144 ExCDAudioAtom
= 4114,
145 ExWAVAudioEmbeddedAtom
= 4115,
146 AnimationInfoAtom
= 4116,
147 RTFDateTimeMCAtom
= 4117,
148 ProgTags
= 5000, // don't know if container
149 ProgStringTag
= 5001,
150 ProgBinaryTag
= 5002,
151 BinaryTagData
= 5003,
153 PersistPtrFullBlock
= 6001, // don't know if container
154 PersistPtrIncrementalBlock
= 6002, // don't know if container
155 GScalingAtom
= 10001,
157 EscherDggContainer
= 0xf000, /* Drawing Group Container */
159 EscherCLSID
= 0xf016,
161 EscherBStoreContainer
= 0xf001,
163 EscherBlip_START
= 0xf018, /* Blip types are between */
164 EscherBlip_END
= 0xf117, /* these two values */
165 EscherDgContainer
= 0xf002, /* Drawing Container */
167 EscherRegroupItems
= 0xf118,
168 EscherColorScheme
= 0xf120, /* bug in docs */
169 EscherSpgrContainer
= 0xf003,
170 EscherSpContainer
= 0xf004,
173 EscherTextbox
= 0xf00c,
174 EscherClientTextbox
= 0xf00d,
175 EscherAnchor
= 0xf00e,
176 EscherChildAnchor
= 0xf00f,
177 EscherClientAnchor
= 0xf010,
178 EscherClientData
= 0xf011,
179 EscherSolverContainer
= 0xf005,
180 EscherConnectorRule
= 0xf012, /* bug in docs */
181 EscherAlignRule
= 0xf013,
182 EscherArcRule
= 0xf014,
183 EscherClientRule
= 0xf015,
184 EscherCalloutRule
= 0xf017,
185 EscherSelection
= 0xf119,
186 EscherColorMRU
= 0xf11a,
187 EscherDeletedPspl
= 0xf11d, /* bug in docs */
188 EscherSplitMenuColors
= 0xf11e,
189 EscherOleObject
= 0xf11f,
190 EscherUserDefined
= 0xf122,
193 public TypeCode typecode
;
195 public bool is_container
;
197 public int min_record_size
;
198 public int max_record_size
;
199 RecordType (TypeCode typecode
, string name
, bool is_container
, bool do_read
, int min_record_size
, int max_record_size
)
201 this.typecode
= typecode
;
203 this.is_container
= is_container
;
204 this.do_read
= do_read
;
205 this.min_record_size
= min_record_size
;
206 this.max_record_size
= max_record_size
;
209 static RecordType
[] types
=
211 new RecordType ( TypeCode
.Unknown
, "Unknown", false, true, -1, -1 ),
212 new RecordType ( TypeCode
.Document
, "Document", true, true, -1, -1 ),
213 new RecordType ( TypeCode
.DocumentAtom
, "DocumentAtom", false, true, -1, -1 ),
214 new RecordType ( TypeCode
.EndDocument
, "EndDocument", false, true, -1, -1 ),
215 new RecordType ( TypeCode
.Slide
, "Slide", true, true, -1, -1 ),
216 new RecordType ( TypeCode
.SlideAtom
, "SlideAtom", false, true, -1, -1 ),
217 new RecordType ( TypeCode
.Notes
, "Notes", true, true, -1, -1 ),
218 new RecordType ( TypeCode
.NotesAtom
, "NotesAtom", false, true, -1, -1 ),
219 new RecordType ( TypeCode
.Environment
, "Environment", true, true, -1, -1 ),
220 new RecordType ( TypeCode
.SlidePersistAtom
, "SlidePersistAtom", false, true, -1, -1 ),
221 new RecordType ( TypeCode
.SSlideLayoutAtom
, "SSlideLayoutAtom", false, true, -1, -1 ),
222 new RecordType ( TypeCode
.MainMaster
, "MainMaster", true, true, -1, -1 ),
223 new RecordType ( TypeCode
.SSSlideInfoAtom
, "SSSlideInfoAtom", false, true, -1, -1 ),
224 new RecordType ( TypeCode
.SlideViewInfo
, "SlideViewInfo", true, true, -1, -1 ),
225 new RecordType ( TypeCode
.GuideAtom
, "GuideAtom", false, true, -1, -1 ),
226 new RecordType ( TypeCode
.ViewInfo
, "ViewInfo", true, true, -1, -1 ),
227 new RecordType ( TypeCode
.ViewInfoAtom
, "ViewInfoAtom", false, true, -1, -1 ),
228 new RecordType ( TypeCode
.SlideViewInfoAtom
, "SlideViewInfoAtom", false, true, -1, -1 ),
229 new RecordType ( TypeCode
.VBAInfo
, "VBAInfo", true, true, -1, -1 ),
230 new RecordType ( TypeCode
.VBAInfoAtom
, "VBAInfoAtom", false, true, -1, -1 ),
231 new RecordType ( TypeCode
.SSDocInfoAtom
, "SSDocInfoAtom", false, true, -1, -1 ),
232 new RecordType ( TypeCode
.Summary
, "Summary", true, true, -1, -1 ),
233 new RecordType ( TypeCode
.DocRoutingSlip
, "DocRoutingSlip", false, true, -1, -1 ),
234 new RecordType ( TypeCode
.OutlineViewInfo
, "OutlineViewInfo", true, true, -1, -1 ),
235 new RecordType ( TypeCode
.SorterViewInfo
, "SorterViewInfo", true, true, -1, -1 ),
236 new RecordType ( TypeCode
.ExObjList
, "ExObjList", true, true, -1, -1 ),
237 new RecordType ( TypeCode
.ExObjListAtom
, "ExObjListAtom", false, true, -1, -1 ),
238 new RecordType ( TypeCode
.PPDrawingGroup
, "PPDrawingGroup", true, true, -1, -1 ), //FIXME: Office Art File Format Docu
239 new RecordType ( TypeCode
.PPDrawing
, "PPDrawing", true, true, -1, -1 ), //FIXME: Office Art File Format Docu
240 new RecordType ( TypeCode
.NamedShows
, "NamedShows", false, true, -1, -1 ), // don't know if container
241 new RecordType ( TypeCode
.NamedShow
, "NamedShow", true, true, -1, -1 ),
242 new RecordType ( TypeCode
.NamedShowSlides
, "NamedShowSlides", false, true, -1, -1 ), // don't know if container
243 new RecordType ( TypeCode
.List
, "List", true, true, -1, -1 ),
244 new RecordType ( TypeCode
.FontCollection
, "FontCollection", true, true, -1, -1 ),
245 new RecordType ( TypeCode
.BookmarkCollection
, "BookmarkCollection", true, true, -1, -1 ),
246 new RecordType ( TypeCode
.SoundCollAtom
, "SoundCollAtom", false, true, -1, -1 ),
247 new RecordType ( TypeCode
.Sound
, "Sound", true, true, -1, -1 ),
248 new RecordType ( TypeCode
.SoundData
, "SoundData", false, true, -1, -1 ),
249 new RecordType ( TypeCode
.BookmarkSeedAtom
, "BookmarkSeedAtom", false, true, -1, -1 ),
250 new RecordType ( TypeCode
.ColorSchemeAtom
, "ColorSchemeAtom", false, true, -1, -1 ),
251 new RecordType ( TypeCode
.ExObjRefAtom
, "ExObjRefAtom", false, true, -1, -1 ),
252 new RecordType ( TypeCode
.OEShapeAtom
, "OEShapeAtom", false, true, -1, -1 ),
253 new RecordType ( TypeCode
.OEPlaceholderAtom
, "OEPlaceholderAtom", false, true, -1, -1 ),
254 new RecordType ( TypeCode
.GPointAtom
, "GPointAtom", false, true, -1, -1 ),
255 new RecordType ( TypeCode
.GRatioAtom
, "GRatioAtom", false, true, -1, -1 ),
256 new RecordType ( TypeCode
.OutlineTextRefAtom
, "OutlineTextRefAtom", false, true, -1, -1 ),
257 new RecordType ( TypeCode
.TextHeaderAtom
, "TextHeaderAtom", false, true, -1, -1 ),
258 new RecordType ( TypeCode
.TextCharsAtom
, "TextCharsAtom", false, true, -1, -1 ),
259 new RecordType ( TypeCode
.StyleTextPropAtom
, "StyleTextPropAtom", false, true, -1, -1 ),
260 new RecordType ( TypeCode
.BaseTextPropAtom
, "BaseTextPropAtom", false, true, -1, -1 ),
261 new RecordType ( TypeCode
.TxMasterStyleAtom
, "TxMasterStyleAtom", false, true, -1, -1 ),
262 new RecordType ( TypeCode
.TxCFStyleAtom
, "TxCFStyleAtom", false, true, -1, -1 ),
263 new RecordType ( TypeCode
.TxPFStyleAtom
, "TxPFStyleAtom", false, true, -1, -1 ),
264 new RecordType ( TypeCode
.TextRulerAtom
, "TextRulerAtom", false, true, -1, -1 ),
265 new RecordType ( TypeCode
.TextBookmarkAtom
, "TextBookmarkAtom", false, true, -1, -1 ),
266 new RecordType ( TypeCode
.TextBytesAtom
, "TextBytesAtom", false, true, -1, -1 ),
267 new RecordType ( TypeCode
.TxSIStyleAtom
, "TxSIStyleAtom", false, true, -1, -1 ),
268 new RecordType ( TypeCode
.TextSpecInfoAtom
, "TextSpecInfoAtom", false, true, -1, -1 ),
269 new RecordType ( TypeCode
.DefaultRulerAtom
, "DefaultRulerAtom", false, true, -1, -1 ),
270 new RecordType ( TypeCode
.FontEntityAtom
, "FontEntityAtom", false, true, -1, -1 ),
271 new RecordType ( TypeCode
.FontEmbeddedData
, "FontEmbeddedData", false, true, -1, -1 ),
272 new RecordType ( TypeCode
.CString
, "CString", false, true, -1, -1 ),
273 new RecordType ( TypeCode
.MetaFile
, "MetaFile", false, true, -1, -1 ),
274 new RecordType ( TypeCode
.ExOleObjAtom
, "ExOleObjAtom", false, true, -1, -1 ),
275 new RecordType ( TypeCode
.SrKinsoku
, "SrKinsoku", true, true, -1, -1 ),
276 new RecordType ( TypeCode
.HandOut
, "HandOut", true, true, -1, -1 ),
277 new RecordType ( TypeCode
.ExEmbed
, "ExEmbed", true, true, -1, -1 ),
278 new RecordType ( TypeCode
.ExEmbedAtom
, "ExEmbedAtom", false, true, -1, -1 ),
279 new RecordType ( TypeCode
.ExLink
, "ExLink", true, true, -1, -1 ),
280 new RecordType ( TypeCode
.BookmarkEntityAtom
, "BookmarkEntityAtom", false, true, -1, -1 ),
281 new RecordType ( TypeCode
.ExLinkAtom
, "ExLinkAtom", false, true, -1, -1 ),
282 new RecordType ( TypeCode
.SrKinsokuAtom
, "SrKinsokuAtom", false, true, -1, -1 ),
283 new RecordType ( TypeCode
.ExHyperlinkAtom
, "ExHyperlinkAtom", false, true, -1, -1 ),
284 new RecordType ( TypeCode
.ExHyperlink
, "ExHyperlink", true, true, -1, -1 ),
285 new RecordType ( TypeCode
.SlideNumberMCAtom
, "SlideNumberMCAtom", false, true, -1, -1 ),
286 new RecordType ( TypeCode
.HeadersFooters
, "HeadersFooters", true, true, -1, -1 ),
287 new RecordType ( TypeCode
.HeadersFootersAtom
, "HeadersFootersAtom", false, true, -1, -1 ),
288 new RecordType ( TypeCode
.TxInteractiveInfoAtom
, "TxInteractiveInfoAtom", false, true, -1, -1 ),
289 new RecordType ( TypeCode
.CharFormatAtom
, "CharFormatAtom", false, true, -1, -1 ),
290 new RecordType ( TypeCode
.ParaFormatAtom
, "ParaFormatAtom", false, true, -1, -1 ),
291 new RecordType ( TypeCode
.RecolorInfoAtom
, "RecolorInfoAtom", false, true, -1, -1 ),
292 new RecordType ( TypeCode
.ExQuickTimeMovie
, "ExQuickTimeMovie", true, true, -1, -1 ),
293 new RecordType ( TypeCode
.ExQuickTimeMovieData
, "ExQuickTimeMovieData", false, true, -1, -1 ),
294 new RecordType ( TypeCode
.ExControl
, "ExControl", true, true, -1, -1 ),
295 new RecordType ( TypeCode
.SlideListWithText
, "SlideListWithText", true, true, -1, -1 ),
296 new RecordType ( TypeCode
.InteractiveInfo
, "InteractiveInfo", true, true, -1, -1 ),
297 new RecordType ( TypeCode
.InteractiveInfoAtom
, "InteractiveInfoAtom", false, true, -1, -1 ),
298 new RecordType ( TypeCode
.UserEditAtom
, "UserEditAtom", false, true, -1, -1 ),
299 new RecordType ( TypeCode
.CurrentUserAtom
, "CurrentUserAtom", false, true, -1, -1 ),
300 new RecordType ( TypeCode
.DateTimeMCAtom
, "DateTimeMCAtom", false, true, -1, -1 ),
301 new RecordType ( TypeCode
.GenericDateMCAtom
, "GenericDateMCAtom", false, true, -1, -1 ),
302 new RecordType ( TypeCode
.FooterMCAtom
, "FooterMCAtom", false, true, -1, -1 ),
303 new RecordType ( TypeCode
.ExControlAtom
, "ExControlAtom", false, true, -1, -1 ),
304 new RecordType ( TypeCode
.ExMediaAtom
, "ExMediaAtom", false, true, -1, -1 ),
305 new RecordType ( TypeCode
.ExVideo
, "ExVideo", true, true, -1, -1 ),
306 new RecordType ( TypeCode
.ExAviMovie
, "ExAviMovie", true, true, -1, -1 ),
307 new RecordType ( TypeCode
.ExMCIMovie
, "ExMCIMovie", true, true, -1, -1 ),
308 new RecordType ( TypeCode
.ExMIDIAudio
, "ExMIDIAudio", true, true, -1, -1 ),
309 new RecordType ( TypeCode
.ExCDAudio
, "ExCDAudio", true, true, -1, -1 ),
310 new RecordType ( TypeCode
.ExWAVAudioEmbedded
, "ExWAVAudioEmbedded", true, true, -1, -1 ),
311 new RecordType ( TypeCode
.ExWAVAudioLink
, "ExWAVAudioLink", true, true, -1, -1 ),
312 new RecordType ( TypeCode
.ExOleObjStg
, "ExOleObjStg", false, true, -1, -1 ),
313 new RecordType ( TypeCode
.ExCDAudioAtom
, "ExCDAudioAtom", false, true, -1, -1 ),
314 new RecordType ( TypeCode
.ExWAVAudioEmbeddedAtom
, "ExWAVAudioEmbeddedAtom", false, true, -1, -1 ),
315 new RecordType ( TypeCode
.AnimationInfoAtom
, "AnimationInfoAtom", false, true, -1, -1 ),
316 new RecordType ( TypeCode
.RTFDateTimeMCAtom
, "RTFDateTimeMCAtom", false, true, -1, -1 ),
317 new RecordType ( TypeCode
.ProgTags
, "ProgTags", false, true, -1, -1 ), // don't know if container
318 new RecordType ( TypeCode
.ProgStringTag
, "ProgStringTag", true, true, -1, -1 ),
319 new RecordType ( TypeCode
.ProgBinaryTag
, "ProgBinaryTag", true, true, -1, -1 ),
320 new RecordType ( TypeCode
.BinaryTagData
, "BinaryTagData", false, true, -1, -1 ),
321 new RecordType ( TypeCode
.PrintOptions
, "PrintOptions", false, true, -1, -1 ),
322 new RecordType ( TypeCode
.PersistPtrFullBlock
, "PersistPtrFullBlock", false, true, -1, -1 ), // don't know if container
323 new RecordType ( TypeCode
.PersistPtrIncrementalBlock
, "PersistPtrIncrementalBlock", false, true, -1, -1 ),
324 new RecordType ( TypeCode
.GScalingAtom
, "GScalingAtom", false, true, -1, -1 ),
325 new RecordType ( TypeCode
.GRColorAtom
, "GRColorAtom", false, true, -1, -1 ),
327 new RecordType ( TypeCode
.EscherDggContainer
, "EscherDggContainer", true, true, -1, -1 ),
328 new RecordType ( TypeCode
.EscherDgg
, "EscherDgg", false, true, -1, -1 ),
329 new RecordType ( TypeCode
.EscherCLSID
, "EscherCLSID", false, true, -1, -1 ),
330 new RecordType ( TypeCode
.EscherOPT
, "EscherOPT", false, true, -1, -1 ),
331 new RecordType ( TypeCode
.EscherBStoreContainer
, "EscherBStoreContainer", true, true, -1, -1 ),
332 new RecordType ( TypeCode
.EscherBSE
, "EscherBSE", false, true, -1, -1 ),
333 new RecordType ( TypeCode
.EscherBlip_START
, "EscherBlip_START", false, true, -1, -1 ),
334 new RecordType ( TypeCode
.EscherBlip_END
, "EscherBlip_END", false, true, -1, -1 ),
335 new RecordType ( TypeCode
.EscherDgContainer
, "EscherDgContainer", true, true, -1, -1 ),
336 new RecordType ( TypeCode
.EscherDg
, "EscherDg", false, true, -1, -1 ),
337 new RecordType ( TypeCode
.EscherRegroupItems
, "EscherRegroupItems", false, true, -1, -1 ),
338 new RecordType ( TypeCode
.EscherColorScheme
, "EscherColorScheme", false, true, -1, -1 ),
339 new RecordType ( TypeCode
.EscherSpgrContainer
, "EscherSpgrContainer", true, true, -1, -1 ),
340 new RecordType ( TypeCode
.EscherSpContainer
, "EscherSpContainer", true, true, -1, -1 ),
341 new RecordType ( TypeCode
.EscherSpgr
, "EscherSpgr", false, true, -1, -1 ),
342 new RecordType ( TypeCode
.EscherSp
, "EscherSp", false, true, -1, -1 ),
343 new RecordType ( TypeCode
.EscherTextbox
, "EscherTextbox", false, true, -1, -1 ),
344 new RecordType ( TypeCode
.EscherClientTextbox
, "EscherClientTextbox", true, true, -1, -1 ),
345 new RecordType ( TypeCode
.EscherAnchor
, "EscherAnchor", false, true, -1, -1 ),
346 new RecordType ( TypeCode
.EscherChildAnchor
, "EscherChildAnchor", false, true, -1, -1 ),
347 new RecordType ( TypeCode
.EscherClientAnchor
, "EscherClientAnchor", false, true, -1, -1 ),
348 new RecordType ( TypeCode
.EscherClientData
, "EscherClientData", true, true, -1, -1 ),
349 new RecordType ( TypeCode
.EscherSolverContainer
, "EscherSolverContainer", true, true, -1, -1 ),
350 new RecordType ( TypeCode
.EscherConnectorRule
, "EscherConnectorRule", false, true, -1, -1 ),
351 new RecordType ( TypeCode
.EscherAlignRule
, "EscherAlignRule", false, true, -1, -1 ),
352 new RecordType ( TypeCode
.EscherArcRule
, "EscherArcRule", false, true, -1, -1 ),
353 new RecordType ( TypeCode
.EscherClientRule
, "EscherClientRule", false, true, -1, -1 ),
354 new RecordType ( TypeCode
.EscherCalloutRule
, "EscherCalloutRule", false, true, -1, -1 ),
355 new RecordType ( TypeCode
.EscherSelection
, "EscherSelection", false, true, -1, -1 ),
356 new RecordType ( TypeCode
.EscherColorMRU
, "EscherColorMRU", false, true, -1, -1 ),
357 new RecordType ( TypeCode
.EscherDeletedPspl
, "EscherDeletedPspl", false, true, -1, -1 ),
358 new RecordType ( TypeCode
.EscherSplitMenuColors
, "EscherSplitMenuColors", false, true, -1, -1 ),
359 new RecordType ( TypeCode
.EscherOleObject
, "EscherOleObject", false, true, -1, -1 ),
360 new RecordType ( TypeCode
.EscherUserDefined
, "EscherUserDefined", false, true, -1, -1 )
363 public static RecordType
Find (TypeCode typecode
)
365 for (int i
= 0; i
< types
.Length
; i
++) {
366 if (types
[i
].typecode
== typecode
)
373 namespace Beagle
.Filters
{
375 public class FilterPPT
: Beagle
.Daemon
.Filter
{
377 private enum TextType
{
395 AddSupportedFlavor (FilterFlavor
.NewFromMimeType ("application/vnd.ms-powerpoint"));
396 textType
= TextType
.Invalid
;
402 private int ParseElement (Input stream
)
404 byte [] data
= stream
.Read(8);
407 RecordType
.TypeCode opcode
= (RecordType
.TypeCode
) GetInt16(data
, 2);
408 int length
= GetInt32(data
, 4);
409 RecordType type
= RecordType
.Find (opcode
);
411 // Process the container tree
412 if (type
.is_container
) {
413 int length_remaining
= length
;
415 if (opcode
== RecordType
.TypeCode
.MainMaster
) {
416 // Ignore MainMaster container as it contains
417 // just a master-slide view and no user data.
418 stream
.Seek (length_remaining
, SeekOrigin
.Current
);
420 while (length_remaining
> 0) {
421 int elem_length
= ParseElement(stream
);
422 if (elem_length
== 0)
424 length_remaining
-= elem_length
;
429 System
.Text
.Encoding encoding
= null;
431 if (opcode
== RecordType
.TypeCode
.TextBytesAtom
) {
432 //encoding = System.Text.Encoding.GetEncoding (28591);
433 encoding
= System
.Text
.Encoding
.UTF8
;
434 } else if (opcode
== RecordType
.TypeCode
.TextCharsAtom
) {
435 encoding
= System
.Text
.Encoding
.Unicode
;
438 if (encoding
!= null && textType
!= TextType
.NotUsed
) {
439 StringBuilder strData
= new StringBuilder () ;
440 data
= stream
.Read(length
);
443 // Replace all ^M with "whitespace",
444 // because of which the contents were not properly
445 // been appended to the text pool.
446 strData
.Append (encoding
.GetString (data
).Replace ('\r', ' '));
448 // Replace all ^K with "whitespace",
449 // because of which the contents were not properly
450 // been appended to the text pool.
451 strData
.Replace ((char)0x0B, (char)0x20);
453 if (textType
== TextType
.Title
||
454 textType
== TextType
.CenterBody
||
455 textType
== TextType
.CenterTitle
)
457 AppendText (strData
.ToString());
461 AppendStructuralBreak ();
462 } else if (opcode
== RecordType
.TypeCode
.TextHeaderAtom
) {
463 data
= stream
.Read (4);
464 textType
= (TextType
) GetInt32 (data
, 0);
466 stream
.Seek(length
, SeekOrigin
.Current
);
471 // length = RecordHeader.recLen
472 // 8 = sizeof (RecordHeader)
473 // Every Atom/container is preceded by a RecordHeader
478 private void ExtractMetaData (Input sumStream
, Input docSumStream
)
480 DocMetaData sumMeta
= null;
481 if (sumStream
!= null)
482 sumMeta
= Msole
.MetadataReadReal (sumStream
);
484 Logger
.Log
.Error ("SummaryInformationStream not found in {0}", FileName
);
486 DocMetaData docSumMeta
= null;
487 if (docSumStream
!= null)
488 docSumMeta
= Msole
.MetadataReadReal (docSumStream
);
490 Logger
.Log
.Error ("DocumentSummaryInformationStream not found in {0}", FileName
);
494 if (sumMeta
!= null) {
495 prop
= sumMeta
.GetProp ("dc:title");
497 str
= Gsf
.Global
.GetPropValStr (prop
);
498 if (str
!= null && str
.Length
> 0)
499 AddProperty (Beagle
.Property
.New ("dc:title", str
));
502 prop
= sumMeta
.GetProp ("dc:subject");
504 str
= Gsf
.Global
.GetPropValStr (prop
);
505 if (str
!= null && str
.Length
> 0)
506 AddProperty (Beagle
.Property
.New ("dc:subject", str
));
509 prop
= sumMeta
.GetProp ("dc:description");
511 str
= Gsf
.Global
.GetPropValStr (prop
);
512 if (str
!= null && str
.Length
> 0)
513 AddProperty (Beagle
.Property
.New ("dc:description", str
));
516 prop
= sumMeta
.GetProp ("gsf:keywords");
518 str
= Gsf
.Global
.GetPropValStr (prop
);
519 if (str
!= null && str
.Length
> 0)
520 AddProperty (Beagle
.Property
.New ("fixme:keywords", str
));
523 prop
= sumMeta
.GetProp ("gsf:creator");
525 str
= Gsf
.Global
.GetPropValStr (prop
);
526 if (str
!= null && str
.Length
> 0)
527 AddProperty (Beagle
.Property
.New ("fixme:author", str
));
530 if (docSumMeta
!= null) {
532 prop
= docSumMeta
.GetProp ("gsf:company");
534 str
= Gsf
.Global
.GetPropValStr (prop
);
535 if (str
!= null && str
.Length
> 0)
536 AddProperty (Beagle
.Property
.New ("fixme:company", str
));
539 prop
= docSumMeta
.GetProp ("gsf:slide-count");
541 str
= Gsf
.Global
.GetPropValStr (prop
);
542 if (str
!= null && str
.Length
> 0)
543 AddProperty (Beagle
.Property
.New ("fixme:slide-count", str
));
546 override protected void DoPullProperties ()
548 Input sumStream
= null;
549 Input docSumStream
= null;
560 // FIXME: Should try to use Encoding instead of
561 // string.IndexOf ()... Hacky stuff ;-)
562 childCount
= file
.NumChildren();
563 for (int i
= 0; i
< childCount
&& found
!= 2; i
++) {
564 str
= file
.NameByIndex (i
);
565 if (str
.IndexOf ("SummaryInformation") > -1 && found
< 1) {
566 sumStream
= file
.ChildByIndex (i
);
569 else if (str
.IndexOf ("DocumentSummaryInformation") > -1) {
570 docSumStream
= file
.ChildByIndex (i
);
574 ExtractMetaData (sumStream
, docSumStream
);
575 } catch (Exception e
) {
576 Logger
.Log
.Error ("Exception {0} occurred duing DoPullProperties.", e
.Message
);
581 override protected void DoPull ()
590 stream
= file
.ChildByName ("PowerPoint Document");
592 if (stream
!= null) {
594 // The parsing was getting terminated when "EndDocument"
595 // container was parsed. We need to continue our
596 // parsing till the end of the file, since, some of the
597 // slides do persist after the actual "Document"
599 // PPTs exported from OO.o actually writes almost all the slides
600 // after "Document" container.
601 // And certain PPTs do have some slides in after
602 // "Document" container.
604 ParseElement (stream
);
606 Logger
.Log
.Error ("Ole stream not found in {0}. Content extraction skipped.", FileName
);
609 } catch (Exception e
) {
610 Logger
.Log
.Error ("Exception {0} occurred during DoPull.", e
.Message
);
615 override protected void DoOpen (FileInfo info
)
617 FileName
= info
.FullName
;
621 Input input
= Input
.MmapNew (info
.FullName
);
623 input
= input
.Uncompress();
624 file
= new InfileMSOle (input
);
626 if (input
== null || file
== null) {
627 Logger
.Log
.Error ("Unable to open [{0}] ",info
.FullName
);
632 } catch (Exception e
) {
633 Logger
.Log
.Error ("Unable to open "+info
.FullName
);
638 // PPT 95/97-2000 format contains a "PP97_DUALSTORAGE", which is required
639 // to index PPT 97-2000 files.
640 // We don't support PPT 95 files, however, we happily accept patches ;-)
642 Input dualStorTemp
= null;
644 if ((dualStorTemp
= file
.ChildByName ("PP97_DUALSTORAGE")) != null) {
645 // "PP97_DUALSTORAGE" is a storage containing some streams
646 if (dualStorTemp
.Handle
!= IntPtr
.Zero
)
647 file
= (Gsf
.Infile
) GLib
.Object
.GetObject (dualStorTemp
.Handle
);
648 } else if (((dualStorTemp
= file
.ChildByName ("Header")) != null) ||
649 ((dualStorTemp
= file
.ChildByName ("PowerPoint Document")) == null)) {
650 Logger
.Log
.Error ("{0} is a PPT 95/4.0 file. Beagle does not support PPT 95 files. Skipping...", FileName
);
653 } catch (Exception e
) {
655 Logger
.Log
.Error ("Unable to open OleFile stream of "+info
.FullName
);
660 // FIXME: These are utility functions and can be useful
661 // outside this filter as well.
662 public static int GetInt32 (byte [] data
, int offset
) {
663 return data
[offset
] + (data
[offset
+ 1] << 8) + (data
[offset
+ 2] << 16) + (data
[offset
+ 3] << 24);
665 public static int GetInt16 (byte [] data
, int offset
) {
666 return data
[offset
] + (data
[offset
+ 1] << 8);