1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: resultsetforquery.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_xmlhelp.hxx"
33 #include <com/sun/star/ucb/Command.hpp>
34 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
35 #include <com/sun/star/i18n/XExtendedTransliteration.hpp>
36 #include <com/sun/star/ucb/XCommandProcessor.hpp>
37 #include <com/sun/star/lang/Locale.hpp>
38 #include <com/sun/star/script/XInvocation.hpp>
40 #ifndef INCLUDED_STL_ALGORITHM
42 #define INCLUDED_STL_ALGORITHM
44 #ifndef INCLUDED_STL_SET
46 #define INCLUDED_STL_SET
49 #include <qe/Query.hxx>
50 #include <qe/DocGenerator.hxx>
51 #include "resultsetforquery.hxx"
52 #include "databases.hxx"
58 using namespace chelp
;
59 using namespace xmlsearch::excep
;
60 using namespace xmlsearch::qe
;
61 using namespace com::sun::star
;
62 using namespace com::sun::star::ucb
;
63 using namespace com::sun::star::i18n
;
64 using namespace com::sun::star::uno
;
65 using namespace com::sun::star::lang
;
73 HitItem( const rtl::OUString
& aURL
, float fScore
)
77 bool operator < ( const HitItem
& rHitItem
) const
79 return rHitItem
.m_fScore
< m_fScore
;
83 ResultSetForQuery::ResultSetForQuery( const uno::Reference
< lang::XMultiServiceFactory
>& xMSF
,
84 const uno::Reference
< XContentProvider
>& xProvider
,
86 const uno::Sequence
< beans::Property
>& seq
,
87 const uno::Sequence
< NumberedSortingInfo
>& seqSort
,
88 URLParameter
& aURLParameter
,
89 Databases
* pDatabases
)
90 : ResultSetBase( xMSF
,xProvider
,nOpenMode
,seq
,seqSort
),
91 m_pDatabases( pDatabases
),
92 m_aURLParameter( aURLParameter
)
94 Reference
< XTransliteration
> xTrans(
95 xMSF
->createInstance( rtl::OUString::createFromAscii( "com.sun.star.i18n.Transliteration" ) ),
97 Locale
aLocale( aURLParameter
.get_language(),
101 xTrans
->loadModule(TransliterationModules_UPPERCASE_LOWERCASE
,
104 // Access Lucene via XInvocation
105 Reference
< script::XInvocation
> xInvocation(
106 xMSF
->createInstance( rtl::OUString::createFromAscii( "com.sun.star.help.HelpSearch" ) ),
109 vector
< vector
< rtl::OUString
> > queryList
;
112 rtl::OUString query
= m_aURLParameter
.get_query();
113 while( query
.getLength() )
115 idx
= query
.indexOf( sal_Unicode( ' ' ) );
117 idx
= query
.getLength();
119 vector
< rtl::OUString
> currentQuery
;
120 rtl::OUString
tmp(query
.copy( 0,idx
));
121 rtl:: OUString toliterate
= tmp
;
123 Sequence
<sal_Int32
> aSeq
;
124 toliterate
= xTrans
->transliterate(
125 tmp
,0,tmp
.getLength(),aSeq
);
128 currentQuery
.push_back( toliterate
);
129 queryList
.push_back( currentQuery
);
132 if( nCpy
>= query
.getLength() )
133 query
= rtl::OUString();
135 query
= query
.copy( 1 + idx
);
139 vector
< rtl::OUString
> aCompleteResultVector
;
140 if( xInvocation
.is() )
142 rtl::OUString scope
= m_aURLParameter
.get_scope();
143 bool bCaptionsOnly
= ( scope
.compareToAscii( "Heading" ) == 0 );
144 sal_Int32 hitCount
= m_aURLParameter
.get_hitCount();
147 FILE* pFile
= fopen( "d:\\resultset_out.txt", "w" );
150 IndexFolderIterator
aIndexFolderIt( *pDatabases
, m_aURLParameter
.get_module(), m_aURLParameter
.get_language() );
151 rtl::OUString idxDir
;
152 bool bExtension
= false;
154 vector
< vector
<HitItem
>* > aIndexFolderResultVectorVector
;
157 while( (idxDir
= aIndexFolderIt
.nextIndexFolder( bExtension
, bTemporary
)).getLength() > 0 )
159 vector
<HitItem
> aIndexFolderResultVector
;
163 vector
< vector
<HitItem
>* > aQueryListResultVectorVector
;
164 set
< rtl::OUString
> aSet
,aCurrent
,aResultSet
;
166 int nQueryListSize
= queryList
.size();
167 if( nQueryListSize
> 1 )
170 for( int i
= 0; i
< nQueryListSize
; ++i
)
172 vector
<HitItem
>* pQueryResultVector
;
173 if( nQueryListSize
> 1 )
175 pQueryResultVector
= new vector
<HitItem
>();
176 aQueryListResultVectorVector
.push_back( pQueryResultVector
);
180 pQueryResultVector
= &aIndexFolderResultVector
;
182 pQueryResultVector
->reserve( hitCount
);
184 int nParamCount
= bCaptionsOnly
? 7 : 6;
185 Sequence
<uno::Any
> aParamsSeq( nParamCount
);
187 aParamsSeq
[0] = uno::makeAny( rtl::OUString::createFromAscii( "-lang" ) );
188 aParamsSeq
[1] = uno::makeAny( m_aURLParameter
.get_language() );
190 aParamsSeq
[2] = uno::makeAny( rtl::OUString::createFromAscii( "-index" ) );
191 rtl::OUString aSystemPath
;
192 osl::FileBase::getSystemPathFromFileURL( idxDir
, aSystemPath
);
193 aParamsSeq
[3] = uno::makeAny( aSystemPath
);
195 aParamsSeq
[4] = uno::makeAny( rtl::OUString::createFromAscii( "-query" ) );
197 const std::vector
< rtl::OUString
>& aListItem
= queryList
[i
];
198 ::rtl::OUString aNewQueryStr
= aListItem
[0];
199 aParamsSeq
[5] = uno::makeAny( aNewQueryStr
);
202 aParamsSeq
[6] = uno::makeAny( rtl::OUString::createFromAscii( "-caption" ) );
204 Sequence
< sal_Int16
> aOutParamIndex
;
205 Sequence
< uno::Any
> aOutParam
;
207 uno::Any aRet
= xInvocation
->invoke( rtl::OUString::createFromAscii( "search" ),
208 aParamsSeq
, aOutParamIndex
, aOutParam
);
210 Sequence
< float > aScoreSeq
;
212 int nOutParamCount
= aOutParam
.getLength();
213 if( nOutParamCount
== 1 )
215 const uno::Any
* pScoreAnySeq
= aOutParam
.getConstArray();
216 if( pScoreAnySeq
[0] >>= aScoreSeq
)
217 nScoreCount
= aScoreSeq
.getLength();
220 Sequence
<rtl::OUString
> aRetSeq
;
221 if( aRet
>>= aRetSeq
)
223 if( nQueryListSize
> 1 )
226 const rtl::OUString
* pRetSeq
= aRetSeq
.getConstArray();
227 int nCount
= aRetSeq
.getLength();
228 if( nCount
> hitCount
)
230 for( int j
= 0 ; j
< nCount
; ++j
)
233 if( j
< nScoreCount
)
234 fScore
= aScoreSeq
[j
];
236 rtl::OUString aURL
= pRetSeq
[j
];
237 pQueryResultVector
->push_back( HitItem( aURL
, fScore
) );
238 if( nQueryListSize
> 1 )
244 rtl::OString
tmp(rtl::OUStringToOString( aURL
, RTL_TEXTENCODING_UTF8
));
245 fprintf( pFile
, "Dir %d, Query %d, Item: score=%f, URL=%s\n", iDir
, i
, fScore
, tmp
.getStr() );
252 if( nQueryListSize
> 1 )
260 aCurrent
= aResultSet
;
262 set_intersection( aSet
.begin(),aSet
.end(),
263 aCurrent
.begin(),aCurrent
.end(),
264 inserter(aResultSet
,aResultSet
.begin()));
269 // Combine results in aIndexFolderResultVector
270 if( nQueryListSize
> 1 )
272 for( int n
= 0 ; n
< nQueryListSize
; ++n
)
274 vector
<HitItem
>* pQueryResultVector
= aQueryListResultVectorVector
[n
];
275 vector
<HitItem
>& rQueryResultVector
= *pQueryResultVector
;
277 int nItemCount
= rQueryResultVector
.size();
278 for( int i
= 0 ; i
< nItemCount
; ++i
)
280 const HitItem
& rItem
= rQueryResultVector
[ i
];
281 set
< rtl::OUString
>::iterator it
;
282 if( (it
= aResultSet
.find( rItem
.m_aURL
)) != aResultSet
.end() )
284 HitItem
aItemCopy( rItem
);
285 aItemCopy
.m_fScore
/= nQueryListSize
; // To get average score
288 // Use first pass to create entry
289 aIndexFolderResultVector
.push_back( aItemCopy
);
294 rtl::OString
tmp(rtl::OUStringToOString( aItemCopy
.m_aURL
, RTL_TEXTENCODING_UTF8
));
295 fprintf( pFile
, "Combine: Query %d (first pass), Item %d: score=%f (%f), URL=%s\n", n
, i
, aItemCopy
.m_fScore
, rItem
.m_fScore
, tmp
.getStr() );
301 // Find entry in vector
302 int nCount
= aIndexFolderResultVector
.size();
303 for( int j
= 0 ; j
< nCount
; ++j
)
305 HitItem
& rFindItem
= aIndexFolderResultVector
[ j
];
306 if( rFindItem
.m_aURL
.equals( aItemCopy
.m_aURL
) )
311 rtl::OString
tmp(rtl::OUStringToOString( aItemCopy
.m_aURL
, RTL_TEXTENCODING_UTF8
));
312 fprintf( pFile
, "Combine: Query %d, Item %d: score=%f + %f = %f, URL=%s\n", n
, i
,
313 rFindItem
.m_fScore
, aItemCopy
.m_fScore
, rFindItem
.m_fScore
+ aItemCopy
.m_fScore
, tmp
.getStr() );
317 rFindItem
.m_fScore
+= aItemCopy
.m_fScore
;
325 delete pQueryResultVector
;
328 sort( aIndexFolderResultVector
.begin(), aIndexFolderResultVector
.end() );
331 vector
<HitItem
>* pIndexFolderHitItemVector
= new vector
<HitItem
>( aIndexFolderResultVector
);
332 aIndexFolderResultVectorVector
.push_back( pIndexFolderHitItemVector
);
333 aIndexFolderResultVector
.clear();
335 catch( const Exception
& )
342 aIndexFolderIt
.deleteTempIndexFolder( idxDir
);
347 int nVectorCount
= aIndexFolderResultVectorVector
.size();
348 vector
<HitItem
>::size_type
* pCurrentVectorIndex
= new vector
<HitItem
>::size_type
[nVectorCount
];
349 for( int j
= 0 ; j
< nVectorCount
; ++j
)
350 pCurrentVectorIndex
[j
] = 0;
355 for( int k
= 0 ; k
< nVectorCount
; ++k
)
357 vector
<HitItem
>& rIndexFolderVector
= *aIndexFolderResultVectorVector
[k
];
358 int nItemCount
= rIndexFolderVector
.size();
360 fprintf( pFile
, "Vector %d, %d elements\n", k
, nItemCount
);
362 for( int i
= 0 ; i
< nItemCount
; ++i
)
364 const HitItem
& rItem
= rIndexFolderVector
[ i
];
365 rtl::OString
tmp(rtl::OUStringToOString(rItem
.m_aURL
, RTL_TEXTENCODING_UTF8
));
366 fprintf( pFile
, " Item_vector%d, %d/%d: score=%f, URL=%s\n", k
, i
, nItemCount
, rItem
.m_fScore
, tmp
.getStr() );
372 sal_Int32 nTotalHitCount
= m_aURLParameter
.get_hitCount();
373 sal_Int32 nHitCount
= 0;
374 while( nHitCount
< nTotalHitCount
)
376 int iVectorWithBestScore
= -1;
377 float fBestScore
= 0.0;
378 for( int k
= 0 ; k
< nVectorCount
; ++k
)
380 vector
<HitItem
>& rIndexFolderVector
= *aIndexFolderResultVectorVector
[k
];
381 if( pCurrentVectorIndex
[k
] < rIndexFolderVector
.size() )
383 const HitItem
& rItem
= rIndexFolderVector
[ pCurrentVectorIndex
[k
] ];
385 if( fBestScore
< rItem
.m_fScore
)
387 fBestScore
= rItem
.m_fScore
;
388 iVectorWithBestScore
= k
;
393 if( iVectorWithBestScore
== -1 ) // No item left at all
396 vector
<HitItem
>& rIndexFolderVector
= *aIndexFolderResultVectorVector
[iVectorWithBestScore
];
397 const HitItem
& rItem
= rIndexFolderVector
[ pCurrentVectorIndex
[iVectorWithBestScore
] ];
399 pCurrentVectorIndex
[iVectorWithBestScore
]++;
401 aCompleteResultVector
.push_back( rItem
.m_aURL
);
405 delete[] pCurrentVectorIndex
;
406 for( int n
= 0 ; n
< nVectorCount
; ++n
)
408 vector
<HitItem
>* pIndexFolderVector
= aIndexFolderResultVectorVector
[n
];
409 delete pIndexFolderVector
;
417 sal_Int32 replIdx
= rtl::OUString::createFromAscii( "#HLP#" ).getLength();
418 rtl::OUString replWith
= rtl::OUString::createFromAscii( "vnd.sun.star.help://" );
420 int nResultCount
= aCompleteResultVector
.size();
421 for( int r
= 0 ; r
< nResultCount
; ++r
)
423 rtl::OUString aURL
= aCompleteResultVector
[r
];
424 rtl::OUString aResultStr
= replWith
+ aURL
.copy(replIdx
);
425 m_aPath
.push_back( aResultStr
);
428 m_aItems
.resize( m_aPath
.size() );
429 m_aIdents
.resize( m_aPath
.size() );
432 aCommand
.Name
= rtl::OUString::createFromAscii( "getPropertyValues" );
433 aCommand
.Argument
<<= m_sProperty
;
435 for( m_nRow
= 0; sal::static_int_cast
<sal_uInt32
>( m_nRow
) < m_aPath
.size(); ++m_nRow
)
439 rtl::OUString::createFromAscii( "?Language=" ) +
440 m_aURLParameter
.get_language() +
441 rtl::OUString::createFromAscii( "&System=" ) +
442 m_aURLParameter
.get_system();
444 uno::Reference
< XContent
> content
= queryContent();
447 uno::Reference
< XCommandProcessor
> cmd( content
,uno::UNO_QUERY
);
448 cmd
->execute( aCommand
,0,uno::Reference
< XCommandEnvironment
>( 0 ) ) >>= m_aItems
[m_nRow
]; //TODO: check return value of operator >>=