QueryResponses.cs, DumpIndex.cs, IQueryResult.cs, QueryExecutor.cs, QueryResult.cs...
[beagle.git] / Util / ChmFile.cs
blob3c2ecc89cc5e11cbe1e3e71efe2e75b829250ead
1 //
2 // CHMFile.cs: Basic chmlib Wrapper, CHM file format reader.
3 //
4 //
5 // Copyright (C) 2005,2006 Miguel Fernando Cabrera <mfcabrera@gmail.com>
6 //
7 // Based on Razvan Cojocaru's X-CHM::CHMFile.
8 // Uses Jed Wing's CHMLib.
9 // For more information about CHM file format
10 // check out Pabs' CHM spec at http://bonedaddy.net/pabs3/hhm
11 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to deal
16 // in the Software without restriction, including without limitation the rights
17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 // copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
21 // The above copyright notice and this permission notice shall be included in all
22 // copies or substantial portions of the Software.
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 // SOFTWARE.
33 using System;
34 using System.Runtime.InteropServices;
35 using System.Collections;
36 using System.IO;
37 using System.Text;
40 #if true
41 namespace Beagle.Util {
42 #endif
47 /*just to make it 'right'*/
48 enum ChmResolve {
49 Sucess,
50 Failure
54 enum ChmEnumerate {
55 Normal = 1,
56 Meta = 1 << 1,
57 Special = 1 << 2,
58 Files = 1 << 3,
59 Dirs = 1 << 4,
60 All = (1 << 5) -1
65 public delegate void ChmHtmlParseFunc(TextReader stream);
67 public class ChmFile : IDisposable {
70 private string title = "";
71 //private ArrayList tocStrings;
72 private ArrayList htmlFiles;
73 private string topicsFile;
74 private bool hasTopics = false;
75 private IntPtr chmfile = IntPtr.Zero;
77 private bool loaded = false;
78 private const int bufSize = 4096;
79 private string defaultFile = "";
81 public string Title {
82 get { return title; }
86 public bool HasTopics {
87 get { return hasTopics; }
95 /* ChmLib Glue */
97 [ StructLayout (LayoutKind.Sequential) ]
98 private class chmUnitInfo {
100 public UInt64 start;
101 public UInt64 length;
102 public int space;
103 public int flags;
104 [MarshalAs (UnmanagedType.ByValTStr, SizeConst=512)]
105 public string path;
106 /*[MarshalAs (UnmanagedType.ByValArray, SizeConst=257)]
107 public char[] path;*/
112 private delegate int ChmEnumerator (IntPtr chmFile,
113 chmUnitInfo info,
114 IntPtr context);
117 [DllImport ("libchm.so.0")]
118 private static extern IntPtr chm_open(string filename);
120 [DllImport ("libchm.so.0")]
121 private static extern ChmResolve chm_resolve_object(IntPtr raw,
122 string objPath,
123 [Out] chmUnitInfo ui);
125 [DllImport ("libchm.so.0")]
126 private static extern UInt64 chm_retrieve_object(IntPtr raw,
127 [In, Out] chmUnitInfo ui,
128 IntPtr buf,
129 UInt64 addr,
130 UInt64 len);
135 [DllImport ("libchm.so.0")]
136 private static extern int chm_enumerate(IntPtr raw,
137 ChmEnumerate what,
138 ChmEnumerator e,
139 IntPtr context);
144 private int GetHtmlFiles(IntPtr chmFile,
145 chmUnitInfo info,
146 IntPtr context)
148 if(info.path.EndsWith(".html"))
149 htmlFiles.Add(info.path.Trim());
151 return 1;
158 private string ChmGetString(IntPtr ptr,
159 int offset,
160 int len)
163 int i = 0;
164 string str;
166 char[] cadena = new char[len];
168 for(i =0; i < len ; i++){
170 cadena[i] = (char) Marshal.ReadByte(ptr,offset + i);
171 if(cadena[i] == '\0')
172 break;
176 str = new string (cadena,0,i);
178 return str;
183 private string ChmFileToString(chmUnitInfo ui)
187 const ulong tmpBufSize = 1025;
189 StringBuilder strb = new StringBuilder();
192 ulong size = tmpBufSize -1;
193 ulong cur = 0;
196 IntPtr raw = Marshal.AllocCoTaskMem ((int)tmpBufSize);
198 do {
199 size = chm_retrieve_object(chmfile,ui,raw,cur,tmpBufSize-1);
200 // If I dont create a copy of the string when i free 'raw' the builder data dissapear
201 // the last chunk readed dissapear (mono bug or mi endless stupidity)
202 // I'll have to check it out
203 strb.Append(Marshal.PtrToStringAuto(raw,(int)size));
204 cur += size;
206 while(size == tmpBufSize-1);
208 Marshal.FreeCoTaskMem (raw);
209 return strb.ToString();
214 private void CleanUp()
216 Marshal.FreeCoTaskMem (chmfile);
217 this.loaded = false;
220 public void Dispose()
222 CleanUp();
223 GC.SuppressFinalize (this);
227 ~ChmFile()
229 CleanUp();
233 public ChmFile()
236 //this.tocStrings = new ArrayList();
237 this.htmlFiles = new ArrayList();
242 public bool Load(string path)
245 //chmUnitInfo ui = new chmUnitInfo() ;
247 this.chmfile = chm_open(path);
249 if(this.chmfile == IntPtr.Zero) {
250 throw new System.Exception ("Invalid file Type, not a CHM file");
252 this.loaded = true;
254 bool info = GetArchiveInfo();
256 /*if(info && HasTopics)
257 BuildTopicsList();*/
260 chm_enumerate(this.chmfile,
261 ChmEnumerate.All,
262 new ChmEnumerator(GetHtmlFiles),
263 IntPtr.Zero);
267 foreach(string str in htmlFiles)
268 Console.WriteLine(str); */
270 return info;
276 From the #SYSTEM File we are interested in the title (for now).
280 private bool SystemInfo()
282 ChmResolve res;
283 ulong size;
284 bool gottitle = false;
288 if(!loaded)
289 return false;
292 IntPtr buf = Marshal.AllocCoTaskMem (bufSize);
294 chmUnitInfo ui = new chmUnitInfo() ;
297 res = chm_resolve_object (this.chmfile,"/#SYSTEM", ui);
300 if(res == ChmResolve.Failure)
301 return false;
303 size = chm_retrieve_object (this.chmfile, ui,buf, 4, (ulong)bufSize);
305 int index = 0;
306 ushort value = 0;
307 long tol = (long)size - 2;
309 while(index < tol) {
312 value = (ushort)Marshal.ReadInt16 (buf, index);
314 if(value == 3) {
317 index += 2;
318 ushort len = (ushort)Marshal.ReadInt16 (buf, (int)index);
320 if(this.title == "")
321 this.title = ChmGetString (buf,index+2, (int)len);
322 gottitle = true;
323 break;
327 else
328 index += 2;
331 value = (ushort) Marshal.ReadInt16(buf,(int)index);
333 index += (int)value + 2;
337 Marshal.FreeCoTaskMem (buf);
338 return gottitle;
343 TODO:
344 We should trow something like a FileNotFoundException
346 public TextReader GetFile(string path)
349 chmUnitInfo ui = new chmUnitInfo();
351 if(chm_resolve_object (chmfile,path,ui) == ChmResolve.Failure) {
352 //Console.WriteLine("Fails to Open: {0}",path);
353 return new StringReader("");
358 return (new StringReader(ChmFileToString(ui)));
363 public TextReader GetTopicsFile()
366 if(HasTopics) {
368 return GetFile(topicsFile);
372 /*Oh Lina, why don't you love me? :P*/
373 return new StringReader("");
378 public TextReader GetDefaultFile()
383 return GetFile(defaultFile);
391 private bool WindowsInfo()
394 int entries;
395 int entrySize;
396 IntPtr windowsData;
397 long size = 0;
398 uint block;
400 const int headerLen = 0x8;
403 IntPtr buf = Marshal.AllocCoTaskMem (bufSize);
404 chmUnitInfo ui = new chmUnitInfo ();
407 if(chm_resolve_object (chmfile,"/#WINDOWS",ui) == ChmResolve.Failure)
408 return false;
410 if(chm_retrieve_object (chmfile,ui,buf,0,headerLen) == 0)
411 return false;
413 entries = Marshal.ReadInt32 (buf);
414 entrySize = Marshal.ReadInt32 (buf,0x4);
416 //Console.WriteLine ("entries -> {0}\nsize = {1}",entries,entrySize);
418 windowsData = Marshal.AllocCoTaskMem(entries * entrySize);
421 size = (long)chm_retrieve_object (chmfile,
423 windowsData,
424 headerLen,
425 (ulong)(entries * entrySize));
426 if(size == 0)
427 return false;
429 size = 0;
432 if(chm_resolve_object (chmfile,"/#STRINGS",ui) == ChmResolve.Failure)
433 return false;
437 From Pabs' CHM Spec:
438 "(STRINGS)This file is a list of ANSI/UTF-8 NT strings.
439 The first is just a NIL character so that offsets to this file can specify
440 zero & get a valid string.
441 The strings are sliced up into blocks that are 4096 bytes in length."
446 for(int i = 0; i < entries; i++) {
448 int offset = i * entrySize;
451 uint offTitle = (uint)Marshal.ReadInt32(windowsData,
452 offset + 0x14);
454 uint offTocFile = (uint)Marshal.ReadInt32(windowsData,
455 offset + 0x60);
456 uint offDefaultFile = (uint)Marshal.ReadInt32(windowsData,
457 offset + 0x68);
460 //Console.WriteLine("offTocFile = {0}",offTocFile);
463 block = offTitle / 4096;
466 if(size == 0)
467 size = (long)chm_retrieve_object(chmfile,
468 ui,
469 buf,
470 block * 4096,
471 (ulong)bufSize);
474 if(size > 0 && offTitle > 0)
475 this.title = ChmGetString(buf,(int)offTitle,4096);
478 if(block != offTocFile / 4096) {
479 block = offTocFile / 4096;
480 size = (long)chm_retrieve_object(chmfile,
481 ui,
482 buf,
483 block * 4096,
484 (ulong)bufSize);
489 if(size > 0 && offTocFile > 0){
490 topicsFile = "/" + ChmGetString(buf,
491 (int)offTocFile % 4096 ,
492 4096);
493 hasTopics = true;
497 if(block != offDefaultFile / 4096) {
498 block = offDefaultFile / 4096;
499 size = (long)chm_retrieve_object(chmfile,
500 ui,
501 buf,
502 block * 4096,
503 (ulong)bufSize);
508 if(size > 0 && offDefaultFile > 0)
509 defaultFile = ("/" + ChmGetString(buf,
510 (int)offDefaultFile % 4096 ,
511 4096) );
517 Marshal.FreeCoTaskMem (buf);
518 Marshal.FreeCoTaskMem (windowsData);
519 return true;
525 private bool GetArchiveInfo()
528 We only get the chm title (if any) from the #SYSTEM file
529 and the Toc strings for now.
530 I'm sure Razvan will feel some kind of Deja Vu.
532 bool rest = false;
533 bool resw = false;
536 if(this.loaded) {
538 resw = WindowsInfo();
539 rest = SystemInfo();
542 return (rest || resw);
546 public void ParseContents(ChmHtmlParseFunc Parse)
550 if(this.loaded)
551 foreach(string fileName in htmlFiles) {
552 chmUnitInfo ui = new chmUnitInfo();
554 chm_resolve_object(this.chmfile,
555 fileName,
556 ui) ;
559 //Console.WriteLine("Parsing....{0}",ui.path);
560 ///Logger.Log.Debug("CHMFile: Parsing {0}....",ui.path);
561 Parse( new StringReader(ChmFileToString(ui).Trim()) );
578 #if true
580 #endif