2 // CHMFile.cs: Basic chmlib Wrapper, CHM file format reader.
5 // Copyright (C) 2005,2006 Miguel Fernando Cabrera <mfcabrera@gmail.com>
7 // Based on Razvan Cojocaru's X-CHM::CHMFile.
8 // Uses Jed Wing's CHMLib.
9 // For more information about CHM file format
10 // check out Pabs' CHM spec at http://bonedaddy.net/pabs3/hhm
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to deal
16 // in the Software without restriction, including without limitation the rights
17 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 // copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
21 // The above copyright notice and this permission notice shall be included in all
22 // copies or substantial portions of the Software.
24 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 using System
.Runtime
.InteropServices
;
35 using System
.Collections
;
41 namespace Beagle
.Util
{
47 /*just to make it 'right'*/
65 public delegate void ChmHtmlParseFunc(TextReader stream
);
67 public class ChmFile
: IDisposable
{
70 private string title
= "";
71 //private ArrayList tocStrings;
72 private ArrayList htmlFiles
;
73 private string topicsFile
;
74 private bool hasTopics
= false;
75 private IntPtr chmfile
= IntPtr
.Zero
;
77 private bool loaded
= false;
78 private const int bufSize
= 4096;
79 private string defaultFile
= "";
86 public bool HasTopics
{
87 get { return hasTopics; }
97 [ StructLayout (LayoutKind
.Sequential
) ]
98 private class chmUnitInfo
{
101 public UInt64 length
;
104 [MarshalAs (UnmanagedType
.ByValTStr
, SizeConst
=512)]
106 /*[MarshalAs (UnmanagedType.ByValArray, SizeConst=257)]
107 public char[] path;*/
112 private delegate int ChmEnumerator (IntPtr chmFile
,
117 [ DllImportAttribute("libchm") ]
118 private static extern IntPtr
chm_open(string filename
);
120 [ DllImportAttribute("libchm") ]
121 private static extern ChmResolve
chm_resolve_object(IntPtr raw
,
123 [Out
] chmUnitInfo ui
);
125 [ DllImportAttribute("libchm") ]
126 private static extern UInt64
chm_retrieve_object(IntPtr raw
,
127 [In
, Out
] chmUnitInfo ui
,
135 [ DllImportAttribute("libchm") ]
136 private static extern int chm_enumerate(IntPtr raw
,
144 private int GetHtmlFiles(IntPtr chmFile
,
148 if(info
.path
.EndsWith(".html"))
149 htmlFiles
.Add(info
.path
.Trim());
158 private string ChmGetString(IntPtr ptr
,
166 char[] cadena
= new char[len
];
168 for(i
=0; i
< len
; i
++){
170 cadena
[i
] = (char) Marshal
.ReadByte(ptr
,offset
+ i
);
171 if(cadena
[i
] == '\0')
176 str
= new string (cadena
,0,i
);
183 private string ChmFileToString(chmUnitInfo ui
)
187 const ulong tmpBufSize
= 1025;
189 StringBuilder strb
= new StringBuilder();
192 ulong size
= tmpBufSize
-1;
196 IntPtr raw
= Marshal
.AllocCoTaskMem ((int)tmpBufSize
);
199 size
= chm_retrieve_object(chmfile
,ui
,raw
,cur
,tmpBufSize
-1);
200 // If I dont create a copy of the string when i free 'raw' the builder data dissapear
201 // the last chunk readed dissapear (mono bug or mi endless stupidity)
202 // I'll have to check it out
203 strb
.Append(Marshal
.PtrToStringAuto(raw
,(int)size
));
206 while(size
== tmpBufSize
-1);
208 Marshal
.FreeCoTaskMem (raw
);
209 return strb
.ToString();
214 private void CleanUp()
216 Marshal
.FreeCoTaskMem (chmfile
);
220 public void Dispose()
223 GC
.SuppressFinalize (this);
236 //this.tocStrings = new ArrayList();
237 this.htmlFiles
= new ArrayList();
242 public bool Load(string path
)
245 //chmUnitInfo ui = new chmUnitInfo() ;
247 this.chmfile
= chm_open(path
);
249 if(this.chmfile
== IntPtr
.Zero
) {
250 throw new System
.Exception ("Invalid file Type, not a CHM file");
254 bool info
= GetArchiveInfo();
256 /*if(info && HasTopics)
260 chm_enumerate(this.chmfile
,
262 new ChmEnumerator(GetHtmlFiles
),
267 foreach(string str in htmlFiles)
268 Console.WriteLine(str); */
276 From the #SYSTEM File we are interested in the title (for now).
280 private bool SystemInfo()
284 bool gottitle
= false;
292 IntPtr buf
= Marshal
.AllocCoTaskMem (bufSize
);
294 chmUnitInfo ui
= new chmUnitInfo() ;
297 res
= chm_resolve_object (this.chmfile
,"/#SYSTEM", ui
);
300 if(res
== ChmResolve
.Failure
)
303 size
= chm_retrieve_object (this.chmfile
, ui
,buf
, 4, (ulong)bufSize
);
307 long tol
= (long)size
- 2;
312 value = (ushort)Marshal
.ReadInt16 (buf
, index
);
318 ushort len
= (ushort)Marshal
.ReadInt16 (buf
, (int)index
);
321 this.title
= ChmGetString (buf
,index
+2, (int)len
);
331 value = (ushort) Marshal
.ReadInt16(buf
,(int)index
);
333 index
+= (int)value + 2;
337 Marshal
.FreeCoTaskMem (buf
);
344 We should trow something like a FileNotFoundException
346 public TextReader
GetFile(string path
)
349 chmUnitInfo ui
= new chmUnitInfo();
351 if(chm_resolve_object (chmfile
,path
,ui
) == ChmResolve
.Failure
) {
352 //Console.WriteLine("Fails to Open: {0}",path);
353 return new StringReader("");
358 return (new StringReader(ChmFileToString(ui
)));
363 public TextReader
GetTopicsFile()
368 return GetFile(topicsFile
);
372 /*Oh Lina, why don't you love me? :P*/
373 return new StringReader("");
378 public TextReader
GetDefaultFile()
383 return GetFile(defaultFile
);
391 private bool WindowsInfo()
400 const int headerLen
= 0x8;
403 IntPtr buf
= Marshal
.AllocCoTaskMem (bufSize
);
404 chmUnitInfo ui
= new chmUnitInfo ();
407 if(chm_resolve_object (chmfile
,"/#WINDOWS",ui
) == ChmResolve
.Failure
)
410 if(chm_retrieve_object (chmfile
,ui
,buf
,0,headerLen
) == 0)
413 entries
= Marshal
.ReadInt32 (buf
);
414 entrySize
= Marshal
.ReadInt32 (buf
,0x4);
416 //Console.WriteLine ("entries -> {0}\nsize = {1}",entries,entrySize);
418 windowsData
= Marshal
.AllocCoTaskMem(entries
* entrySize
);
421 size
= (long)chm_retrieve_object (chmfile
,
425 (ulong)(entries
* entrySize
));
432 if(chm_resolve_object (chmfile
,"/#STRINGS",ui
) == ChmResolve
.Failure
)
438 "(STRINGS)This file is a list of ANSI/UTF-8 NT strings.
439 The first is just a NIL character so that offsets to this file can specify
440 zero & get a valid string.
441 The strings are sliced up into blocks that are 4096 bytes in length."
446 for(int i
= 0; i
< entries
; i
++) {
448 int offset
= i
* entrySize
;
451 uint offTitle
= (uint)Marshal
.ReadInt32(windowsData
,
454 uint offTocFile
= (uint)Marshal
.ReadInt32(windowsData
,
456 uint offDefaultFile
= (uint)Marshal
.ReadInt32(windowsData
,
460 //Console.WriteLine("offTocFile = {0}",offTocFile);
463 block
= offTitle
/ 4096;
467 size
= (long)chm_retrieve_object(chmfile
,
474 if(size
> 0 && offTitle
> 0)
475 this.title
= ChmGetString(buf
,(int)offTitle
,4096);
478 if(block
!= offTocFile
/ 4096) {
479 block
= offTocFile
/ 4096;
480 size
= (long)chm_retrieve_object(chmfile
,
489 if(size
> 0 && offTocFile
> 0){
490 topicsFile
= "/" + ChmGetString(buf
,
491 (int)offTocFile
% 4096 ,
497 if(block
!= offDefaultFile
/ 4096) {
498 block
= offDefaultFile
/ 4096;
499 size
= (long)chm_retrieve_object(chmfile
,
508 if(size
> 0 && offDefaultFile
> 0)
509 defaultFile
= ("/" + ChmGetString(buf
,
510 (int)offDefaultFile
% 4096 ,
517 Marshal
.FreeCoTaskMem (buf
);
518 Marshal
.FreeCoTaskMem (windowsData
);
525 private bool GetArchiveInfo()
528 We only get the chm title (if any) from the #SYSTEM file
529 and the Toc strings for now.
530 I'm sure Razvan will feel some kind of Deja Vu.
538 resw
= WindowsInfo();
542 return (rest
|| resw
);
546 public void ParseContents(ChmHtmlParseFunc Parse
)
551 foreach(string fileName
in htmlFiles
) {
552 chmUnitInfo ui
= new chmUnitInfo();
554 chm_resolve_object(this.chmfile
,
559 //Console.WriteLine("Parsing....{0}",ui.path);
560 ///Logger.Log.Debug("CHMFile: Parsing {0}....",ui.path);
561 Parse( new StringReader(ChmFileToString(ui
).Trim()) );