update dev300-m58
[ooovba.git] / xmlhelp / source / cxxhelp / provider / resultsetforquery.cxx
blobe0602f5efa5faccb3aadbbdc87ec5aadf83a5df7
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: resultsetforquery.cxx,v $
10 * $Revision: 1.17 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_xmlhelp.hxx"
33 #include <com/sun/star/ucb/Command.hpp>
34 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
35 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
36 #include <com/sun/star/ucb/XCommandProcessor.hpp>
37 #include <com/sun/star/lang/Locale.hpp>
38 #include <com/sun/star/script/XInvocation.hpp>
40 #ifndef INCLUDED_STL_ALGORITHM
41 #include <algorithm>
42 #define INCLUDED_STL_ALGORITHM
43 #endif
44 #ifndef INCLUDED_STL_SET
45 #include <set>
46 #define INCLUDED_STL_SET
47 #endif
49 #include <qe/Query.hxx>
50 #include <qe/DocGenerator.hxx>
51 #include "resultsetforquery.hxx"
52 #include "databases.hxx"
54 // For testing
55 // #define LOGGING
57 using namespace std;
58 using namespace chelp;
59 using namespace xmlsearch::excep;
60 using namespace xmlsearch::qe;
61 using namespace com::sun::star;
62 using namespace com::sun::star::ucb;
63 using namespace com::sun::star::i18n;
64 using namespace com::sun::star::uno;
65 using namespace com::sun::star::lang;
67 struct HitItem
69 rtl::OUString m_aURL;
70 float m_fScore;
72 HitItem( void ) {}
73 HitItem( const rtl::OUString& aURL, float fScore )
74 : m_aURL( aURL )
75 , m_fScore( fScore )
77 bool operator < ( const HitItem& rHitItem ) const
79 return rHitItem.m_fScore < m_fScore;
83 ResultSetForQuery::ResultSetForQuery( const uno::Reference< lang::XMultiServiceFactory >& xMSF,
84 const uno::Reference< XContentProvider >& xProvider,
85 sal_Int32 nOpenMode,
86 const uno::Sequence< beans::Property >& seq,
87 const uno::Sequence< NumberedSortingInfo >& seqSort,
88 URLParameter& aURLParameter,
89 Databases* pDatabases )
90 : ResultSetBase( xMSF,xProvider,nOpenMode,seq,seqSort ),
91 m_pDatabases( pDatabases ),
92 m_aURLParameter( aURLParameter )
94 Reference< XTransliteration > xTrans(
95 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
96 UNO_QUERY );
97 Locale aLocale( aURLParameter.get_language(),
98 rtl::OUString(),
99 rtl::OUString() );
100 if(xTrans.is())
101 xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
102 aLocale );
104 // Access Lucene via XInvocation
105 Reference< script::XInvocation > xInvocation(
106 xMSF->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
107 UNO_QUERY );
109 vector< vector< rtl::OUString > > queryList;
111 sal_Int32 idx;
112 rtl::OUString query = m_aURLParameter.get_query();
113 while( query.getLength() )
115 idx = query.indexOf( sal_Unicode( ' ' ) );
116 if( idx == -1 )
117 idx = query.getLength();
119 vector< rtl::OUString > currentQuery;
120 rtl::OUString tmp(query.copy( 0,idx ));
121 rtl:: OUString toliterate = tmp;
122 if(xTrans.is()) {
123 Sequence<sal_Int32> aSeq;
124 toliterate = xTrans->transliterate(
125 tmp,0,tmp.getLength(),aSeq);
128 currentQuery.push_back( toliterate );
129 queryList.push_back( currentQuery );
131 int nCpy = 1 + idx;
132 if( nCpy >= query.getLength() )
133 query = rtl::OUString();
134 else
135 query = query.copy( 1 + idx );
139 vector< rtl::OUString > aCompleteResultVector;
140 if( xInvocation.is() )
142 rtl::OUString scope = m_aURLParameter.get_scope();
143 bool bCaptionsOnly = ( scope.compareToAscii( "Heading" ) == 0 );
144 sal_Int32 hitCount = m_aURLParameter.get_hitCount();
146 #ifdef LOGGING
147 FILE* pFile = fopen( "d:\\resultset_out.txt", "w" );
148 #endif
150 IndexFolderIterator aIndexFolderIt( *pDatabases, m_aURLParameter.get_module(), m_aURLParameter.get_language() );
151 rtl::OUString idxDir;
152 bool bExtension = false;
153 int iDir = 0;
154 vector< vector<HitItem>* > aIndexFolderResultVectorVector;
156 bool bTemporary;
157 while( (idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary )).getLength() > 0 )
159 vector<HitItem> aIndexFolderResultVector;
163 vector< vector<HitItem>* > aQueryListResultVectorVector;
164 set< rtl::OUString > aSet,aCurrent,aResultSet;
166 int nQueryListSize = queryList.size();
167 if( nQueryListSize > 1 )
168 hitCount = 2000;
170 for( int i = 0; i < nQueryListSize; ++i )
172 vector<HitItem>* pQueryResultVector;
173 if( nQueryListSize > 1 )
175 pQueryResultVector = new vector<HitItem>();
176 aQueryListResultVectorVector.push_back( pQueryResultVector );
178 else
180 pQueryResultVector = &aIndexFolderResultVector;
182 pQueryResultVector->reserve( hitCount );
184 int nParamCount = bCaptionsOnly ? 7 : 6;
185 Sequence<uno::Any> aParamsSeq( nParamCount );
187 aParamsSeq[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
188 aParamsSeq[1] = uno::makeAny( m_aURLParameter.get_language() );
190 aParamsSeq[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
191 rtl::OUString aSystemPath;
192 osl::FileBase::getSystemPathFromFileURL( idxDir, aSystemPath );
193 aParamsSeq[3] = uno::makeAny( aSystemPath );
195 aParamsSeq[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
197 const std::vector< rtl::OUString >& aListItem = queryList[i];
198 ::rtl::OUString aNewQueryStr = aListItem[0];
199 aParamsSeq[5] = uno::makeAny( aNewQueryStr );
201 if( bCaptionsOnly )
202 aParamsSeq[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
204 Sequence< sal_Int16 > aOutParamIndex;
205 Sequence< uno::Any > aOutParam;
207 uno::Any aRet = xInvocation->invoke( rtl::OUString::createFromAscii( "search" ),
208 aParamsSeq, aOutParamIndex, aOutParam );
210 Sequence< float > aScoreSeq;
211 int nScoreCount = 0;
212 int nOutParamCount = aOutParam.getLength();
213 if( nOutParamCount == 1 )
215 const uno::Any* pScoreAnySeq = aOutParam.getConstArray();
216 if( pScoreAnySeq[0] >>= aScoreSeq )
217 nScoreCount = aScoreSeq.getLength();
220 Sequence<rtl::OUString> aRetSeq;
221 if( aRet >>= aRetSeq )
223 if( nQueryListSize > 1 )
224 aSet.clear();
226 const rtl::OUString* pRetSeq = aRetSeq.getConstArray();
227 int nCount = aRetSeq.getLength();
228 if( nCount > hitCount )
229 nCount = hitCount;
230 for( int j = 0 ; j < nCount ; ++j )
232 float fScore = 0.0;
233 if( j < nScoreCount )
234 fScore = aScoreSeq[j];
236 rtl::OUString aURL = pRetSeq[j];
237 pQueryResultVector->push_back( HitItem( aURL, fScore ) );
238 if( nQueryListSize > 1 )
239 aSet.insert( aURL );
241 #ifdef LOGGING
242 if( pFile )
244 rtl::OString tmp(rtl::OUStringToOString( aURL, RTL_TEXTENCODING_UTF8));
245 fprintf( pFile, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir, i, fScore, tmp.getStr() );
247 #endif
251 // intersect
252 if( nQueryListSize > 1 )
254 if( i == 0 )
256 aResultSet = aSet;
258 else
260 aCurrent = aResultSet;
261 aResultSet.clear();
262 set_intersection( aSet.begin(),aSet.end(),
263 aCurrent.begin(),aCurrent.end(),
264 inserter(aResultSet,aResultSet.begin()));
269 // Combine results in aIndexFolderResultVector
270 if( nQueryListSize > 1 )
272 for( int n = 0 ; n < nQueryListSize ; ++n )
274 vector<HitItem>* pQueryResultVector = aQueryListResultVectorVector[n];
275 vector<HitItem>& rQueryResultVector = *pQueryResultVector;
277 int nItemCount = rQueryResultVector.size();
278 for( int i = 0 ; i < nItemCount ; ++i )
280 const HitItem& rItem = rQueryResultVector[ i ];
281 set< rtl::OUString >::iterator it;
282 if( (it = aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
284 HitItem aItemCopy( rItem );
285 aItemCopy.m_fScore /= nQueryListSize; // To get average score
286 if( n == 0 )
288 // Use first pass to create entry
289 aIndexFolderResultVector.push_back( aItemCopy );
291 #ifdef LOGGING
292 if( pFile )
294 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
295 fprintf( pFile, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n, i, aItemCopy.m_fScore, rItem.m_fScore, tmp.getStr() );
297 #endif
299 else
301 // Find entry in vector
302 int nCount = aIndexFolderResultVector.size();
303 for( int j = 0 ; j < nCount ; ++j )
305 HitItem& rFindItem = aIndexFolderResultVector[ j ];
306 if( rFindItem.m_aURL.equals( aItemCopy.m_aURL ) )
308 #ifdef LOGGING
309 if( pFile )
311 rtl::OString tmp(rtl::OUStringToOString( aItemCopy.m_aURL, RTL_TEXTENCODING_UTF8));
312 fprintf( pFile, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n, i,
313 rFindItem.m_fScore, aItemCopy.m_fScore, rFindItem.m_fScore + aItemCopy.m_fScore, tmp.getStr() );
315 #endif
317 rFindItem.m_fScore += aItemCopy.m_fScore;
318 break;
325 delete pQueryResultVector;
328 sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
331 vector<HitItem>* pIndexFolderHitItemVector = new vector<HitItem>( aIndexFolderResultVector );
332 aIndexFolderResultVectorVector.push_back( pIndexFolderHitItemVector );
333 aIndexFolderResultVector.clear();
335 catch( const Exception& )
339 ++iDir;
341 if( bTemporary )
342 aIndexFolderIt.deleteTempIndexFolder( idxDir );
344 } // Iterator
347 int nVectorCount = aIndexFolderResultVectorVector.size();
348 vector<HitItem>::size_type* pCurrentVectorIndex = new vector<HitItem>::size_type[nVectorCount];
349 for( int j = 0 ; j < nVectorCount ; ++j )
350 pCurrentVectorIndex[j] = 0;
352 #ifdef LOGGING
353 if( pFile )
355 for( int k = 0 ; k < nVectorCount ; ++k )
357 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
358 int nItemCount = rIndexFolderVector.size();
360 fprintf( pFile, "Vector %d, %d elements\n", k, nItemCount );
362 for( int i = 0 ; i < nItemCount ; ++i )
364 const HitItem& rItem = rIndexFolderVector[ i ];
365 rtl::OString tmp(rtl::OUStringToOString(rItem.m_aURL, RTL_TEXTENCODING_UTF8));
366 fprintf( pFile, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k, i, nItemCount, rItem.m_fScore, tmp.getStr() );
370 #endif
372 sal_Int32 nTotalHitCount = m_aURLParameter.get_hitCount();
373 sal_Int32 nHitCount = 0;
374 while( nHitCount < nTotalHitCount )
376 int iVectorWithBestScore = -1;
377 float fBestScore = 0.0;
378 for( int k = 0 ; k < nVectorCount ; ++k )
380 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[k];
381 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
383 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
385 if( fBestScore < rItem.m_fScore )
387 fBestScore = rItem.m_fScore;
388 iVectorWithBestScore = k;
393 if( iVectorWithBestScore == -1 ) // No item left at all
394 break;
396 vector<HitItem>& rIndexFolderVector = *aIndexFolderResultVectorVector[iVectorWithBestScore];
397 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
399 pCurrentVectorIndex[iVectorWithBestScore]++;
401 aCompleteResultVector.push_back( rItem.m_aURL );
402 ++nHitCount;
405 delete[] pCurrentVectorIndex;
406 for( int n = 0 ; n < nVectorCount ; ++n )
408 vector<HitItem>* pIndexFolderVector = aIndexFolderResultVectorVector[n];
409 delete pIndexFolderVector;
412 #ifdef LOGGING
413 fclose( pFile );
414 #endif
417 sal_Int32 replIdx = rtl::OUString::createFromAscii( "#HLP#" ).getLength();
418 rtl::OUString replWith = rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
420 int nResultCount = aCompleteResultVector.size();
421 for( int r = 0 ; r < nResultCount ; ++r )
423 rtl::OUString aURL = aCompleteResultVector[r];
424 rtl::OUString aResultStr = replWith + aURL.copy(replIdx);
425 m_aPath.push_back( aResultStr );
428 m_aItems.resize( m_aPath.size() );
429 m_aIdents.resize( m_aPath.size() );
431 Command aCommand;
432 aCommand.Name = rtl::OUString::createFromAscii( "getPropertyValues" );
433 aCommand.Argument <<= m_sProperty;
435 for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
437 m_aPath[m_nRow] =
438 m_aPath[m_nRow] +
439 rtl::OUString::createFromAscii( "?Language=" ) +
440 m_aURLParameter.get_language() +
441 rtl::OUString::createFromAscii( "&System=" ) +
442 m_aURLParameter.get_system();
444 uno::Reference< XContent > content = queryContent();
445 if( content.is() )
447 uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
448 cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( 0 ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
451 m_nRow = 0xffffffff;