1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: HelpIndexer.java,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 package com
.sun
.star
.help
;
33 import java
.io
.FileInputStream
;
34 import java
.io
.FileOutputStream
;
35 import java
.util
.Arrays
;
36 import java
.util
.HashSet
;
37 import java
.util
.List
;
38 import java
.util
.zip
.ZipEntry
;
39 import java
.util
.zip
.ZipOutputStream
;
40 import java
.util
.zip
.CRC32
;
41 import org
.apache
.lucene
.analysis
.standard
.StandardAnalyzer
;
42 import org
.apache
.lucene
.analysis
.cjk
.CJKAnalyzer
;
43 import org
.apache
.lucene
.analysis
.Analyzer
;
44 import org
.apache
.lucene
.index
.IndexWriter
;
47 import java
.io
.FileNotFoundException
;
48 import java
.io
.IOException
;
49 import java
.util
.Date
;
51 public class HelpIndexerTool
53 public HelpIndexerTool()
59 * @param args the command line arguments
61 public static void main( String
[] args
)
63 boolean bExtensionMode
= false;
64 mainImpl( args
, bExtensionMode
);
67 public static void mainImpl( String
[] args
, boolean bExtensionMode
)
69 String aDirToZipStr
= "";
70 String aSrcDirStr
= "";
71 String aLanguageStr
= "";
73 String aTargetZipFileStr
= "";
77 boolean bLang
= false;
79 boolean bZipDir
= false;
80 boolean bSrcDir
= false;
81 boolean bOutput
= false;
82 boolean bCfsName
= false;
84 int nArgCount
= args
.length
;
85 for( int i
= 0 ; i
< nArgCount
; i
++ )
87 if( "-lang".equals(args
[i
]) )
89 if( i
+ 1 < nArgCount
)
91 aLanguageStr
= args
[i
+ 1];
96 else if( "-mod".equals(args
[i
]) )
98 if( i
+ 1 < nArgCount
)
100 aModule
= args
[i
+ 1];
105 else if( "-zipdir".equals(args
[i
]) )
107 if( i
+ 1 < nArgCount
)
109 aDirToZipStr
= args
[i
+ 1];
114 else if( "-srcdir".equals(args
[i
]) )
116 if( i
+ 1 < nArgCount
)
118 aSrcDirStr
= args
[i
+ 1];
123 else if( "-o".equals(args
[i
]) )
125 if( i
+ 1 < nArgCount
)
127 aTargetZipFileStr
= args
[i
+ 1];
132 else if( "-checkcfsname".equals(args
[i
]) )
134 if( i
+ 1 < nArgCount
)
136 aCfsName
= args
[i
+ 1] + ".cfs";
143 if( !bLang
|| !bMod
|| !bZipDir
|| (!bOutput
&& !bExtensionMode
) )
148 System
.out
.println("Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -zipdir TempZipDir -o OutputZipFile");
152 String aIndexDirName
= aModule
+ ".idxl";
153 File aIndexDir
= new File( aDirToZipStr
+ File
.separator
+ aIndexDirName
);
155 aSrcDirStr
= aDirToZipStr
;
156 File aCaptionFilesDir
= new File( aSrcDirStr
+ File
.separator
+ "caption" );
157 File aContentFilesDir
= new File( aSrcDirStr
+ File
.separator
+ "content" );
161 Date start
= new Date();
162 Analyzer analyzer
= aLanguageStr
.equals("ja") ?
(Analyzer
)new CJKAnalyzer() : (Analyzer
)new StandardAnalyzer();
163 IndexWriter writer
= new IndexWriter( aIndexDir
, analyzer
, true );
164 if( !bExtensionMode
)
165 System
.out
.println( "Lucene: Indexing to directory '" + aIndexDir
+ "'..." );
166 int nRet
= indexDocs( writer
, aModule
, bExtensionMode
, aCaptionFilesDir
, aContentFilesDir
);
169 if( !bExtensionMode
)
171 System
.out
.println();
172 System
.out
.println( "Optimizing ..." );
178 boolean bCfsFileOk
= true;
179 if( bCfsName
&& !bExtensionMode
&& nRet
!= -1 )
181 String aCompleteCfsFileName
= aDirToZipStr
+ File
.separator
+ aIndexDirName
+ File
.separator
+ aCfsName
;
182 File aCfsFile
= new File( aCompleteCfsFileName
);
183 bCfsFileOk
= aCfsFile
.exists();
184 System
.out
.println( "Checking cfs file " + aCfsName
+ ": " + (bCfsFileOk ?
"Found" : "Not found") );
191 deleteRecursively( aCaptionFilesDir
);
192 deleteRecursively( aContentFilesDir
);
198 deleteRecursively( aIndexDir
);
201 System
.out
.println( "Zipping ..." );
202 File aDirToZipFile
= new File( aDirToZipStr
);
203 createZipFile( aDirToZipFile
, aTargetZipFileStr
);
204 deleteRecursively( aDirToZipFile
);
209 System
.out
.println( "cfs file check failed, terminating..." );
213 Date end
= new Date();
214 if( !bExtensionMode
)
215 System
.out
.println(end
.getTime() - start
.getTime() + " total milliseconds");
217 catch (IOException e
)
222 System
.out
.println(" caught a " + e
.getClass() +
223 "\n with message: " + e
.getMessage());
228 private static int indexDocs(IndexWriter writer
, String aModule
, boolean bExtensionMode
,
229 File aCaptionFilesDir
, File aContentFilesDir
) throws IOException
231 if( !aCaptionFilesDir
.canRead() || !aCaptionFilesDir
.isDirectory() )
233 if( !bExtensionMode
)
234 System
.out
.println( "Not found: " + aCaptionFilesDir
);
237 if( !aContentFilesDir
.canRead() || !aContentFilesDir
.isDirectory() )
239 if( !bExtensionMode
)
240 System
.out
.println( "Not found: " + aContentFilesDir
);
244 String
[] aCaptionFiles
= aCaptionFilesDir
.list();
245 List aCaptionFilesList
= Arrays
.asList( aCaptionFiles
);
246 HashSet aCaptionFilesHashSet
= new HashSet( aCaptionFilesList
);
248 String
[] aContentFiles
= aContentFilesDir
.list();
249 List aContentFilesList
= Arrays
.asList( aContentFiles
);
250 HashSet aContentFilesHashSet
= new HashSet( aContentFilesList
);
252 // Loop over caption files and find corresponding content file
253 if( !bExtensionMode
)
254 System
.out
.println( "Indexing, adding files" );
255 int nCaptionFilesLen
= aCaptionFiles
.length
;
256 for( int i
= 0 ; i
< nCaptionFilesLen
; i
++ )
258 String aCaptionFileStr
= aCaptionFiles
[i
];
259 File aCaptionFile
= new File( aCaptionFilesDir
, aCaptionFileStr
);
260 File aContentFile
= null;
261 if( aContentFilesHashSet
.contains( aCaptionFileStr
) )
262 aContentFile
= new File( aContentFilesDir
, aCaptionFileStr
);
264 if( !bExtensionMode
)
265 System
.out
.print( "." );
266 writer
.addDocument( HelpFileDocument
.Document( aModule
, aCaptionFile
, aContentFile
) );
269 // Loop over content files to find remaining files not mapped to caption files
270 int nContentFilesLen
= aContentFiles
.length
;
271 for( int i
= 0 ; i
< nContentFilesLen
; i
++ )
273 String aContentFileStr
= aContentFiles
[i
];
274 if( !aCaptionFilesHashSet
.contains( aContentFileStr
) )
276 // Not already handled in caption files loop
277 File aCaptionFile
= null;
278 File aContentFile
= new File( aContentFilesDir
, aContentFileStr
);
279 if( !bExtensionMode
)
280 System
.out
.print( "." );
281 writer
.addDocument( HelpFileDocument
.Document( aModule
, aCaptionFile
, aContentFile
) );
287 public static void createZipFile( File aDirToZip
, String aTargetZipFileStr
)
288 throws FileNotFoundException
, IOException
290 FileOutputStream fos
= new FileOutputStream( aTargetZipFileStr
);
291 ZipOutputStream zos
= new ZipOutputStream( fos
);
293 File
[] aChildrenFiles
= aDirToZip
.listFiles();
294 int nFileCount
= aChildrenFiles
.length
;
295 for( int i
= 0 ; i
< nFileCount
; i
++ )
296 addToZipRecursively( zos
, aChildrenFiles
[i
], null );
301 public static void addToZipRecursively( ZipOutputStream zos
, File aFile
, String aBasePath
)
302 throws FileNotFoundException
, IOException
304 if( aFile
.isDirectory() )
306 String aDirName
= aFile
.getName();
307 if( aDirName
.equalsIgnoreCase( "caption" ) || aDirName
.equalsIgnoreCase( "content" ) )
310 File
[] aChildrenFiles
= aFile
.listFiles();
311 String aNewBasePath
= "";
312 if( aBasePath
!= null )
313 aNewBasePath
+= aBasePath
+ File
.separator
;
314 aNewBasePath
+= aDirName
;
316 int nFileCount
= aChildrenFiles
.length
;
317 for( int i
= 0 ; i
< nFileCount
; i
++ )
318 addToZipRecursively( zos
, aChildrenFiles
[i
], aNewBasePath
);
324 // read contents of file we are going to put in the zip
325 int fileLength
= (int) aFile
.length();
326 FileInputStream fis
= new FileInputStream( aFile
);
327 byte[] wholeFile
= new byte[fileLength
];
328 int bytesRead
= fis
.read( wholeFile
, 0, fileLength
);
331 String aFileName
= aFile
.getName();
332 String aEntryName
= "";
333 if( aBasePath
!= null )
334 aEntryName
+= aBasePath
+ "/";
335 aEntryName
+= aFileName
;
336 ZipEntry aZipEntry
= new ZipEntry( aEntryName
);
337 aZipEntry
.setTime( aFile
.lastModified() );
338 aZipEntry
.setSize( fileLength
);
340 int nMethod
= ( aFileName
.toLowerCase().endsWith( ".jar" ) )
341 ? ZipEntry
.STORED
: ZipEntry
.DEFLATED
;
342 aZipEntry
.setMethod( nMethod
);
344 CRC32 tempCRC
= new CRC32();
345 tempCRC
.update( wholeFile
, 0, wholeFile
.length
);
346 aZipEntry
.setCrc( tempCRC
.getValue() );
348 // write the contents into the zip element
349 zos
.putNextEntry( aZipEntry
);
350 zos
.write( wholeFile
, 0, fileLength
);
354 static public boolean deleteRecursively( File aFile
)
356 if( aFile
.isDirectory() )
358 File
[] aChildrenFiles
= aFile
.listFiles();
359 int nFileCount
= aChildrenFiles
.length
;
360 for( int i
= 0 ; i
< nFileCount
; i
++ )
362 File aChildrenFile
= aChildrenFiles
[i
];
363 boolean bSuccess
= deleteRecursively( aChildrenFile
);
369 return aFile
.delete();