* Filters/FilterPackage.cs, Filters/FilterRPM.cs,
[beagle.git] / Util / ChmFile.cs
blob6760cca4ec23ff71145351956fb958743dc24676
1 //
2 // CHMFile.cs: Basic chmlib Wrapper, CHM file format reader.
3 //
4 // Author:
5 // Miguel Fernando Cabrera <mfcabrer@unalmed.edu.co>
6 //
7 // Based on Razvan Cojocaru's X-CHM::CHMFile.
8 // Uses Jed Wing's CHMLib.
9 // For more information about CHM file format
10 // check out Pabs' CHM spec at http://bonedaddy.net/pabs3/hhm
11 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to deal
16 // in the Software without restriction, including without limitation the rights
17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 // copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
21 // The above copyright notice and this permission notice shall be included in all
22 // copies or substantial portions of the Software.
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 // SOFTWARE.
33 using System;
34 using System.Runtime.InteropServices;
35 using System.Collections;
36 using System.IO;
37 using System.Text;
40 #if true
41 namespace Beagle.Util {
42 #endif
47 /*just to make it 'right'*/
48 enum ChmResolve {
49 Sucess,
50 Failure
54 enum ChmEnumerate {
55 Normal = 1,
56 Meta = 1 << 1,
57 Special = 1 << 2,
58 Files = 1 << 3,
59 Dirs = 1 << 4,
60 All = (1 << 5) -1
65 public delegate void ChmHtmlParseFunc(TextReader stream);
67 public class ChmFile : IDisposable {
70 private string title = "";
71 //private ArrayList tocStrings;
72 private ArrayList htmlFiles;
73 private string topicsFile;
74 private bool hasTopics = false;
75 private IntPtr chmfile = IntPtr.Zero;
77 private bool loaded = false;
78 private const int bufSize = 4096;
79 private string defaultFile = "";
81 public string Title {
82 get { return title; }
86 public bool HasTopics {
87 get { return hasTopics; }
95 /* ChmLib Glue */
97 [ StructLayout (LayoutKind.Sequential) ]
98 private class chmUnitInfo {
100 public UInt64 start;
101 public UInt64 length;
102 public int space;
103 [MarshalAs (UnmanagedType.ByValTStr, SizeConst=257)]
104 public string path;
105 /*[MarshalAs (UnmanagedType.ByValArray, SizeConst=257)]
106 public char[] path;*/
111 private delegate int ChmEnumerator (IntPtr chmFile,
112 chmUnitInfo info,
113 IntPtr context);
116 [ DllImportAttribute("libchm") ]
117 private static extern IntPtr chm_open(string filename);
119 [ DllImportAttribute("libchm") ]
120 private static extern ChmResolve chm_resolve_object(IntPtr raw,
121 string objPath,
122 [Out] chmUnitInfo ui);
124 [ DllImportAttribute("libchm") ]
125 private static extern UInt64 chm_retrieve_object(IntPtr raw,
126 [In, Out] chmUnitInfo ui,
127 IntPtr buf,
128 UInt64 addr,
129 UInt64 len);
134 [ DllImportAttribute("libchm") ]
135 private static extern int chm_enumerate(IntPtr raw,
136 ChmEnumerate what,
137 ChmEnumerator e,
138 IntPtr context);
143 private int GetHtmlFiles(IntPtr chmFile,
144 chmUnitInfo info,
145 IntPtr context)
147 if(info.path.EndsWith(".html"))
148 htmlFiles.Add(info.path.Trim());
150 return 1;
157 private string ChmGetString(IntPtr ptr,
158 int offset,
159 int len)
162 int i = 0;
163 string str;
165 char[] cadena = new char[len];
167 for(i =0; i < len ; i++){
169 cadena[i] = (char) Marshal.ReadByte(ptr,offset + i);
170 if(cadena[i] == '\0')
171 break;
175 str = new string (cadena,0,i);
177 return str;
182 private string ChmFileToString(chmUnitInfo ui)
186 const ulong tmpBufSize = 1025;
188 StringBuilder strb = new StringBuilder();
191 ulong size = tmpBufSize -1;
192 ulong cur = 0;
195 IntPtr raw = Marshal.AllocCoTaskMem ((int)tmpBufSize);
197 do {
198 size = chm_retrieve_object(chmfile,ui,raw,cur,tmpBufSize-1);
199 // If I dont create a copy of the string when i free 'raw' the builder data dissapear
200 // the last chunk readed dissapear (mono bug or mi endless stupidity)
201 // I'll have to check it out
202 strb.Append(Marshal.PtrToStringAuto(raw,(int)size));
203 cur += size;
205 while(size == tmpBufSize-1);
207 Marshal.FreeCoTaskMem (raw);
208 return strb.ToString();
213 private void CleanUp()
215 Marshal.FreeCoTaskMem (chmfile);
216 this.loaded = false;
219 public void Dispose()
221 CleanUp();
222 GC.SuppressFinalize (this);
226 ~ChmFile()
228 CleanUp();
232 public ChmFile()
235 //this.tocStrings = new ArrayList();
236 this.htmlFiles = new ArrayList();
241 public bool Load(string path)
244 //chmUnitInfo ui = new chmUnitInfo() ;
246 this.chmfile = chm_open(path);
248 if(this.chmfile == IntPtr.Zero) {
249 throw new System.Exception ("Invalid file Type, not a CHM file");
251 this.loaded = true;
253 bool info = GetArchiveInfo();
255 /*if(info && HasTopics)
256 BuildTopicsList();*/
259 chm_enumerate(this.chmfile,
260 ChmEnumerate.All,
261 new ChmEnumerator(GetHtmlFiles),
262 IntPtr.Zero);
266 foreach(string str in htmlFiles)
267 Console.WriteLine(str); */
269 return info;
275 From the #SYSTEM File we are interested in the title (for now).
279 private bool SystemInfo()
281 ChmResolve res;
282 ulong size;
283 bool gottitle = false;
287 if(!loaded)
288 return false;
291 IntPtr buf = Marshal.AllocCoTaskMem (bufSize);
293 chmUnitInfo ui = new chmUnitInfo() ;
296 res = chm_resolve_object (this.chmfile,"/#SYSTEM", ui);
299 if(res == ChmResolve.Failure)
300 return false;
302 size = chm_retrieve_object (this.chmfile, ui,buf,4,(ulong)bufSize);
304 int index = 0;
305 ushort value = 0;
306 long tol = (long)size-2;
308 while(index < tol) {
311 value = (ushort)Marshal.ReadInt16(buf,index);
313 if(value == 3) {
315 //Console.WriteLine ("We got it");
316 index += 2;
317 ushort len = (ushort)Marshal.ReadInt16 (buf,(int)index);
319 if(this.title == "")
320 this.title = ChmGetString (buf,index+2,(int)len);
321 gottitle = true;
322 break;
326 else
327 index += 2;
329 value = (ushort) Marshal.ReadInt16(buf,(int)index);
331 index += (int)value +2;
335 Marshal.FreeCoTaskMem (buf);
336 return gottitle;
341 TODO:
342 We should trow something like a FileNotFoundException
344 public TextReader GetFile(string path)
347 chmUnitInfo ui = new chmUnitInfo();
349 if(chm_resolve_object (chmfile,path,ui) == ChmResolve.Failure) {
350 //Console.WriteLine("Fails to Open: {0}",path);
351 return new StringReader("");
356 return (new StringReader(ChmFileToString(ui)));
361 public TextReader GetTopicsFile()
364 if(HasTopics) {
366 return GetFile(topicsFile);
370 /*Oh Lina, why don't you love me? :P*/
371 return new StringReader("");
376 public TextReader GetDefaultFile()
381 return GetFile(defaultFile);
389 private bool WindowsInfo()
392 int entries;
393 int entrySize;
394 IntPtr windowsData;
395 long size = 0;
396 uint block;
398 const int headerLen = 0x8;
401 IntPtr buf = Marshal.AllocCoTaskMem (bufSize);
402 chmUnitInfo ui = new chmUnitInfo ();
405 if(chm_resolve_object (chmfile,"/#WINDOWS",ui) == ChmResolve.Failure)
406 return false;
408 if(chm_retrieve_object (chmfile,ui,buf,0,headerLen) == 0)
409 return false;
411 entries = Marshal.ReadInt32 (buf);
412 entrySize = Marshal.ReadInt32 (buf,0x4);
414 //Console.WriteLine ("entries -> {0}\nsize = {1}",entries,entrySize);
416 windowsData = Marshal.AllocCoTaskMem(entries*entrySize);
419 size = (long)chm_retrieve_object (chmfile,
421 windowsData,
422 headerLen,
423 (ulong)( entries * entrySize ) );
424 if(size == 0)
425 return false;
427 size = 0;
430 if(chm_resolve_object (chmfile,"/#STRINGS",ui) == ChmResolve.Failure )
431 return false;
435 From Pabs' CHM Spec:
436 "(STRINGS)This file is a list of ANSI/UTF-8 NT strings.
437 The first is just a NIL character so that offsets to this file can specify
438 zero & get a valid string.
439 The strings are sliced up into blocks that are 4096 bytes in length."
444 for(int i = 0; i < entries; i++) {
446 int offset = i * entrySize;
449 uint offTitle = (uint)Marshal.ReadInt32(windowsData,
450 offset + 0x14);
452 uint offTocFile = (uint)Marshal.ReadInt32(windowsData,
453 offset + 0x60 );
454 uint offDefaultFile = (uint)Marshal.ReadInt32(windowsData,
455 offset + 0x68 );
458 //Console.WriteLine("offTocFile = {0}",offTocFile);
461 block = offTitle / 4096;
464 if(size == 0)
465 size = (long)chm_retrieve_object(chmfile,
466 ui,
467 buf,
468 block * 4096,
469 (ulong)bufSize);
472 if(size > 0 && offTitle > 0)
473 this.title = ChmGetString(buf,(int)offTitle,4096);
476 if(block != offTocFile / 4096) {
477 block = offTocFile / 4096;
478 size = (long)chm_retrieve_object(chmfile,
479 ui,
480 buf,
481 block * 4096,
482 (ulong)bufSize);
487 if(size > 0 && offTocFile > 0){
488 topicsFile = "/" + ChmGetString(buf,
489 (int)offTocFile % 4096 ,
490 4096);
491 hasTopics = true;
495 if(block != offDefaultFile / 4096) {
496 block = offDefaultFile / 4096;
497 size = (long)chm_retrieve_object(chmfile,
498 ui,
499 buf,
500 block * 4096,
501 (ulong)bufSize);
506 if(size > 0 && offDefaultFile > 0)
507 defaultFile = ("/" + ChmGetString(buf,
508 (int)offDefaultFile % 4096 ,
509 4096) );
515 Marshal.FreeCoTaskMem (buf);
516 Marshal.FreeCoTaskMem (windowsData);
517 return true;
523 private bool GetArchiveInfo()
526 We only get the chm title (if any) from the #SYSTEM file
527 and the Toc strings for now.
528 I'm sure Razvan will feel some kind of Deja Vu.
530 bool rest = false;
531 bool resw = false;
534 if(this.loaded) {
536 resw = WindowsInfo();
537 rest = SystemInfo();
540 return (rest || resw);
544 public void ParseContents(ChmHtmlParseFunc Parse)
548 if(this.loaded)
549 foreach(string fileName in htmlFiles) {
550 chmUnitInfo ui = new chmUnitInfo();
552 chm_resolve_object(this.chmfile,
553 fileName,
554 ui) ;
557 //Console.WriteLine("Parsing....{0}",ui.path);
558 ///Logger.Log.Debug("CHMFile: Parsing {0}....",ui.path);
559 Parse( new StringReader(ChmFileToString(ui).Trim()) );
568 #if false
569 public static void ParseTest(TextReader text)
571 Console.WriteLine("in ParseText");
572 Console.WriteLine(text.ReadLine());
576 #endif
579 #if false
580 static void Main()
583 ChmFile reader = new ChmFile();
585 bool res = reader.Load("olib.chm");
586 //reader.GetArchiveInfo();
587 Console.WriteLine("The response {0}",res);
588 //reader.Dispose();
589 Console.WriteLine("El titulo es " + reader.Title);
591 TextReader r = reader.GetTopicsFile();
593 /*while((line = r.ReadLine()) != null)
594 Console.WriteLine(line);*/
596 Console.WriteLine("Waza???");
598 //Console.WriteLine(r.ReadToEnd());
600 ParseTest t = new ParseTest(r);
602 r.Close();
604 //reader.ParseContents(new ChmHtmlParseFunc(CHMFile.ParseTest));
607 reader.Load("/home/ceruno/Documentacion/Biblioteca/WebDev/JavaScript/Java Script Pocket Reference 2nd Edition.chm");
609 //reader.GetArchiveInfo();
611 Console.WriteLine("El titulo es " + reader.Title);*/
617 #endif
626 #if true
628 #endif