1 # -*- coding: utf-8 -*-
3 # Copyright 2008 Zuza Software Foundation
5 # This file is part of translate.
7 # translate is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # translate is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with translate; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 interface for the pylucene (v1.x) indexing engine
26 take a look at PyLuceneIndexer.py for PyLucene v2.x support
31 # this module is based on PyLuceneIndexer (for PyLucene v2.x)
32 import PyLuceneIndexer
37 return PyLuceneIndexer
._get
_pylucene
_version
() == 1
40 class PyLuceneDatabase(PyLuceneIndexer
.PyLuceneDatabase
):
41 """manage and use a pylucene indexing database"""
43 def _create_query_for_string(self
, text
, require_all
=True,
45 """generate a query for a plain term of a string query
47 basically this function parses the string and returns the resulting
50 @param text: the query string
52 @param require_all: boolean operator
53 (True -> AND (default) / False -> OR)
54 @type require_all: bool
55 @param analyzer: the analyzer to be used
56 possible analyzers are:
57 CommonDatabase.ANALYZER_TOKENIZE
58 the field value is splitted to be matched word-wise
59 CommonDatabase.ANALYZER_PARTIAL
60 the field value must start with the query string
61 CommonDatabase.ANALYZER_EXACT
62 keep special characters and the like
64 @return: resulting query object
65 @rtype: PyLucene.Query
68 analyzer
= self
.analyzer
69 if analyzer
== self
.ANALYZER_EXACT
:
70 # exact matching - no substitution ...
71 # for PyLucene: nothing special is necessary
73 # don't care about special characters ...
74 if analyzer
== self
.ANALYZER_EXACT
:
75 analyzer_obj
= self
.ExactAnalyzer()
77 text
= _escape_term_value(text
)
78 analyzer_obj
= PyLucene
.StandardAnalyzer()
79 qp
= PyLucene
.QueryParser(analyzer
=analyzer_obj
)
81 qp
.setDefaultOperator(qp
.Operator
.AND
)
83 qp
.setDefaultOperator(qp
.Operator
.OR
)
84 if (analyzer
& self
.ANALYZER_PARTIAL
) > 0:
85 # PyLucene uses explicit wildcards for partial matching
89 def _create_query_for_field(self
, field
, value
, analyzer
=None):
90 """generate a field query
92 this functions creates a field->value query
94 @param field: the fieldname to be used
96 @param value: the wanted value of the field
98 @param analyzer: the analyzer to be used
99 possible analyzers are:
100 CommonDatabase.ANALYZER_TOKENIZE
101 the field value is splitted to be matched word-wise
102 CommonDatabase.ANALYZER_PARTIAL
103 the field value must start with the query string
104 CommonDatabase.ANALYZER_EXACT
105 keep special characters and the like
107 @return: resulting query object
108 @rtype: PyLucene.Query
111 analyzer
= self
.analyzer
112 if analyzer
== self
.ANALYZER_EXACT
:
113 analyzer_obj
= self
.ExactAnalyzer()
115 value
= _escape_term_value(value
)
116 analyzer_obj
= PyLucene
.StandardAnalyzer()
117 if (analyzer
& self
.ANALYZER_PARTIAL
) > 0:
118 # PyLucene uses explicit wildcards for partial matching
120 return PyLucene
.QueryParser
.parse(value
, field
, analyzer_obj
)
122 def _create_query_combined(self
, queries
, require_all
=True):
123 """generate a combined query
125 @param queries: list of the original queries
126 @type queries: list of xapian.Query
127 @param require_all: boolean operator
128 (True -> AND (default) / False -> OR)
129 @type require_all: bool
130 @return: the resulting combined query object
131 @rtype: PyLucene.Query
133 combined_query
= PyLucene
.BooleanQuery()
134 for query
in queries
:
136 PyLucene
.BooleanClause(query
, require_all
, False))
137 return combined_query
139 def _add_plain_term(self
, document
, term
, tokenize
=True):
140 """add a term to a document
142 @param document: the document to be changed
143 @type document: xapian.Document | PyLucene.Document
144 @param term: a single term to be added
146 @param tokenize: should the term be tokenized automatically
149 # Field parameters: name, string, store, index, token
150 document
.add(PyLucene
.Field(str(PyLuceneIndex
.UNNAMED_FIELD_NAME
), term
,
151 True, True, tokenize
))
153 def _add_field_term(self
, document
, field
, term
, tokenize
=True):
154 """add a field term to a document
156 @param document: the document to be changed
157 @type document: xapian.Document | PyLucene.Document
158 @param field: name of the field
160 @param term: term to be associated to the field
162 @param tokenize: should the term be tokenized automatically
165 # TODO: decoding (utf-8) is missing
166 # Field parameters: name, string, store, index, token
167 document
.add(PyLucene
.Field(str(field
), term
,
168 True, True, tokenize
))
170 def get_query_result(self
, query
):
171 """return an object containing the results of a query
173 @param query: a pre-compiled query
174 @type query: a query object of the real implementation
175 @return: an object that allows access to the results
176 @rtype: subclass of CommonEnquire
178 return PyLucene
.indexSearcher
.search(query
)
180 def search(self
, query
, fieldnames
):
181 """return a list of the contents of specified fields for all matches of
184 @param query: the query to be issued
185 @type query: a query object of the real implementation
186 @param fieldnames: the name(s) of a field of the document content
187 @type fieldnames: string | list of strings
188 @return: a list of dicts containing the specified field(s)
189 @rtype: list of dicts
191 if isinstance(fieldnames
, str):
192 fieldnames
= [fieldnames
]
193 hits
= PyLucene
.indexSearcher
.search(query
)
195 for hit
, doc
in hits
:
197 for fieldname
in fieldnames
:
198 content
= doc
.get(fieldname
)
199 if not content
is None:
200 fields
[fieldname
] = content
201 result
.append(fields
)
204 def _writer_open(self
):
205 """open write access for the indexing database and acquire an
208 super(PyLuceneIndexer1
, self
)._writer
_open
_()
209 self
.writer
.maxFieldLength
= PyLuceneIndexer
.MAX_FIELD_SIZE