2 // CHMFile.cs: Basic chmlib Wrapper, CHM file format reader.
5 // Miguel Fernando Cabrera <mfcabrer@unalmed.edu.co>
7 // Based on Razvan Cojocaru's X-CHM::CHMFile.
8 // Uses Jed Wing's CHMLib.
9 // For more information about CHM file format
10 // check out Pabs' CHM spec at http://bonedaddy.net/pabs3/hhm
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to deal
16 // in the Software without restriction, including without limitation the rights
17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 // copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
21 // The above copyright notice and this permission notice shall be included in all
22 // copies or substantial portions of the Software.
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 using System
.Runtime
.InteropServices
;
35 using System
.Collections
;
41 namespace Beagle
.Util
{
47 /*just to make it 'right'*/
65 public delegate void ChmHtmlParseFunc(TextReader stream
);
67 public class ChmFile
: IDisposable
{
70 private string title
= "";
71 //private ArrayList tocStrings;
72 private ArrayList htmlFiles
;
73 private string topicsFile
;
74 private bool hasTopics
= false;
75 private IntPtr chmfile
= IntPtr
.Zero
;
77 private bool loaded
= false;
78 private const int bufSize
= 4096;
79 private string defaultFile
= "";
86 public bool HasTopics
{
87 get { return hasTopics; }
97 [ StructLayout (LayoutKind
.Sequential
) ]
98 private class chmUnitInfo
{
101 public UInt64 length
;
103 [MarshalAs (UnmanagedType
.ByValTStr
, SizeConst
=257)]
105 /*[MarshalAs (UnmanagedType.ByValArray, SizeConst=257)]
106 public char[] path;*/
111 private delegate int ChmEnumerator (IntPtr chmFile
,
116 [ DllImportAttribute("libchm") ]
117 private static extern IntPtr
chm_open(string filename
);
119 [ DllImportAttribute("libchm") ]
120 private static extern ChmResolve
chm_resolve_object(IntPtr raw
,
122 [Out
] chmUnitInfo ui
);
124 [ DllImportAttribute("libchm") ]
125 private static extern UInt64
chm_retrieve_object(IntPtr raw
,
126 [In
, Out
] chmUnitInfo ui
,
134 [ DllImportAttribute("libchm") ]
135 private static extern int chm_enumerate(IntPtr raw
,
143 private int GetHtmlFiles(IntPtr chmFile
,
147 if(info
.path
.EndsWith(".html"))
148 htmlFiles
.Add(info
.path
.Trim());
157 private string ChmGetString(IntPtr ptr
,
165 char[] cadena
= new char[len
];
167 for(i
=0; i
< len
; i
++){
169 cadena
[i
] = (char) Marshal
.ReadByte(ptr
,offset
+ i
);
170 if(cadena
[i
] == '\0')
175 str
= new string (cadena
,0,i
);
182 private string ChmFileToString(chmUnitInfo ui
)
186 const ulong tmpBufSize
= 1025;
188 StringBuilder strb
= new StringBuilder();
191 ulong size
= tmpBufSize
-1;
195 IntPtr raw
= Marshal
.AllocCoTaskMem ((int)tmpBufSize
);
198 size
= chm_retrieve_object(chmfile
,ui
,raw
,cur
,tmpBufSize
-1);
199 // If I dont create a copy of the string when i free 'raw' the builder data dissapear
200 // the last chunk readed dissapear (mono bug or mi endless stupidity)
201 // I'll have to check it out
202 strb
.Append(Marshal
.PtrToStringAuto(raw
,(int)size
));
205 while(size
== tmpBufSize
-1);
207 Marshal
.FreeCoTaskMem (raw
);
208 return strb
.ToString();
213 private void CleanUp()
215 Marshal
.FreeCoTaskMem (chmfile
);
219 public void Dispose()
222 GC
.SuppressFinalize (this);
235 //this.tocStrings = new ArrayList();
236 this.htmlFiles
= new ArrayList();
241 public bool Load(string path
)
244 //chmUnitInfo ui = new chmUnitInfo() ;
246 this.chmfile
= chm_open(path
);
248 if(this.chmfile
== IntPtr
.Zero
) {
249 throw new System
.Exception ("Invalid file Type, not a CHM file");
253 bool info
= GetArchiveInfo();
255 /*if(info && HasTopics)
259 chm_enumerate(this.chmfile
,
261 new ChmEnumerator(GetHtmlFiles
),
266 foreach(string str in htmlFiles)
267 Console.WriteLine(str); */
275 From the #SYSTEM File we are interested in the title (for now).
279 private bool SystemInfo()
283 bool gottitle
= false;
291 IntPtr buf
= Marshal
.AllocCoTaskMem (bufSize
);
293 chmUnitInfo ui
= new chmUnitInfo() ;
296 res
= chm_resolve_object (this.chmfile
,"/#SYSTEM", ui
);
299 if(res
== ChmResolve
.Failure
)
302 size
= chm_retrieve_object (this.chmfile
, ui
,buf
,4,(ulong)bufSize
);
306 long tol
= (long)size
-2;
311 value = (ushort)Marshal
.ReadInt16(buf
,index
);
315 //Console.WriteLine ("We got it");
317 ushort len
= (ushort)Marshal
.ReadInt16 (buf
,(int)index
);
320 this.title
= ChmGetString (buf
,index
+2,(int)len
);
329 value = (ushort) Marshal
.ReadInt16(buf
,(int)index
);
331 index
+= (int)value +2;
335 Marshal
.FreeCoTaskMem (buf
);
342 We should trow something like a FileNotFoundException
344 public TextReader
GetFile(string path
)
347 chmUnitInfo ui
= new chmUnitInfo();
349 if(chm_resolve_object (chmfile
,path
,ui
) == ChmResolve
.Failure
) {
350 //Console.WriteLine("Fails to Open: {0}",path);
351 return new StringReader("");
356 return (new StringReader(ChmFileToString(ui
)));
361 public TextReader
GetTopicsFile()
366 return GetFile(topicsFile
);
370 /*Oh Lina, why don't you love me? :P*/
371 return new StringReader("");
376 public TextReader
GetDefaultFile()
381 return GetFile(defaultFile
);
389 private bool WindowsInfo()
398 const int headerLen
= 0x8;
401 IntPtr buf
= Marshal
.AllocCoTaskMem (bufSize
);
402 chmUnitInfo ui
= new chmUnitInfo ();
405 if(chm_resolve_object (chmfile
,"/#WINDOWS",ui
) == ChmResolve
.Failure
)
408 if(chm_retrieve_object (chmfile
,ui
,buf
,0,headerLen
) == 0)
411 entries
= Marshal
.ReadInt32 (buf
);
412 entrySize
= Marshal
.ReadInt32 (buf
,0x4);
414 //Console.WriteLine ("entries -> {0}\nsize = {1}",entries,entrySize);
416 windowsData
= Marshal
.AllocCoTaskMem(entries
*entrySize
);
419 size
= (long)chm_retrieve_object (chmfile
,
423 (ulong)( entries
* entrySize
) );
430 if(chm_resolve_object (chmfile
,"/#STRINGS",ui
) == ChmResolve
.Failure
)
436 "(STRINGS)This file is a list of ANSI/UTF-8 NT strings.
437 The first is just a NIL character so that offsets to this file can specify
438 zero & get a valid string.
439 The strings are sliced up into blocks that are 4096 bytes in length."
444 for(int i
= 0; i
< entries
; i
++) {
446 int offset
= i
* entrySize
;
449 uint offTitle
= (uint)Marshal
.ReadInt32(windowsData
,
452 uint offTocFile
= (uint)Marshal
.ReadInt32(windowsData
,
454 uint offDefaultFile
= (uint)Marshal
.ReadInt32(windowsData
,
458 //Console.WriteLine("offTocFile = {0}",offTocFile);
461 block
= offTitle
/ 4096;
465 size
= (long)chm_retrieve_object(chmfile
,
472 if(size
> 0 && offTitle
> 0)
473 this.title
= ChmGetString(buf
,(int)offTitle
,4096);
476 if(block
!= offTocFile
/ 4096) {
477 block
= offTocFile
/ 4096;
478 size
= (long)chm_retrieve_object(chmfile
,
487 if(size
> 0 && offTocFile
> 0){
488 topicsFile
= "/" + ChmGetString(buf
,
489 (int)offTocFile
% 4096 ,
495 if(block
!= offDefaultFile
/ 4096) {
496 block
= offDefaultFile
/ 4096;
497 size
= (long)chm_retrieve_object(chmfile
,
506 if(size
> 0 && offDefaultFile
> 0)
507 defaultFile
= ("/" + ChmGetString(buf
,
508 (int)offDefaultFile
% 4096 ,
515 Marshal
.FreeCoTaskMem (buf
);
516 Marshal
.FreeCoTaskMem (windowsData
);
523 private bool GetArchiveInfo()
526 We only get the chm title (if any) from the #SYSTEM file
527 and the Toc strings for now.
528 I'm sure Razvan will feel some kind of Deja Vu.
536 resw
= WindowsInfo();
540 return (rest
|| resw
);
544 public void ParseContents(ChmHtmlParseFunc Parse
)
549 foreach(string fileName
in htmlFiles
) {
550 chmUnitInfo ui
= new chmUnitInfo();
552 chm_resolve_object(this.chmfile
,
557 //Console.WriteLine("Parsing....{0}",ui.path);
558 ///Logger.Log.Debug("CHMFile: Parsing {0}....",ui.path);
559 Parse( new StringReader(ChmFileToString(ui
).Trim()) );
569 public static void ParseTest(TextReader text
)
571 Console
.WriteLine("in ParseText");
572 Console
.WriteLine(text
.ReadLine());
583 ChmFile reader
= new ChmFile();
585 bool res
= reader
.Load("olib.chm");
586 //reader.GetArchiveInfo();
587 Console
.WriteLine("The response {0}",res
);
589 Console
.WriteLine("El titulo es " + reader
.Title
);
591 TextReader r
= reader
.GetTopicsFile();
593 /*while((line = r.ReadLine()) != null)
594 Console.WriteLine(line);*/
596 Console
.WriteLine("Waza???");
598 //Console.WriteLine(r.ReadToEnd());
600 ParseTest t
= new ParseTest(r
);
604 //reader.ParseContents(new ChmHtmlParseFunc(CHMFile.ParseTest));
607 reader.Load("/home/ceruno/Documentacion/Biblioteca/WebDev/JavaScript/Java Script Pocket Reference 2nd Edition.chm");
609 //reader.GetArchiveInfo();
611 Console.WriteLine("El titulo es " + reader.Title);*/