2010-03-05 20:33:14 +00:00
|
|
|
# Copyright (C) 2009-2010 David Sauve, Trapeze. All rights reserved.
|
2009-07-30 12:57:35 +00:00
|
|
|
|
2009-10-01 16:14:40 +00:00
|
|
|
__author__ = 'David Sauve'
|
2010-03-05 20:33:14 +00:00
|
|
|
__version__ = (1, 1, 4, 'alpha')
|
2009-10-01 16:14:40 +00:00
|
|
|
|
2009-11-12 16:28:07 +00:00
|
|
|
import time
|
2009-06-16 18:48:11 +00:00
|
|
|
import datetime
|
|
|
|
import cPickle as pickle
|
|
|
|
import os
|
|
|
|
import re
|
2009-08-05 20:44:12 +00:00
|
|
|
import shutil
|
2009-08-28 14:42:58 +00:00
|
|
|
import sys
|
2009-06-16 18:48:11 +00:00
|
|
|
import warnings
|
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
from django.core.exceptions import ImproperlyConfigured
|
|
|
|
from django.utils.encoding import smart_unicode, force_unicode
|
|
|
|
|
2009-11-11 02:31:25 +00:00
|
|
|
from haystack.backends import BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
|
2009-12-08 14:56:57 +00:00
|
|
|
from haystack.exceptions import HaystackError, MissingDependency
|
2009-08-16 15:40:47 +00:00
|
|
|
from haystack.fields import DateField, DateTimeField, IntegerField, FloatField, BooleanField, MultiValueField
|
2009-06-16 18:48:11 +00:00
|
|
|
from haystack.models import SearchResult
|
2010-02-19 14:47:58 +00:00
|
|
|
from haystack.utils import get_identifier, get_facet_field_name
|
2009-06-16 18:48:11 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
import xapian
|
|
|
|
except ImportError:
|
|
|
|
raise MissingDependency("The 'xapian' backend requires the installation of 'xapian'. Please refer to the documentation.")
|
|
|
|
|
|
|
|
|
|
|
|
DOCUMENT_ID_TERM_PREFIX = 'Q'
|
|
|
|
DOCUMENT_CUSTOM_TERM_PREFIX = 'X'
|
|
|
|
DOCUMENT_CT_TERM_PREFIX = DOCUMENT_CUSTOM_TERM_PREFIX + 'CONTENTTYPE'
|
|
|
|
|
2010-02-10 02:09:33 +00:00
|
|
|
BACKEND_NAME = 'xapian'
|
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
|
2009-12-08 14:56:57 +00:00
|
|
|
class InvalidIndexError(HaystackError):
|
|
|
|
"""Raised when an index can not be opened."""
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2009-12-04 19:42:06 +00:00
|
|
|
class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
2009-12-08 18:18:17 +00:00
|
|
|
def __init__(self, backend):
|
|
|
|
self.backend = backend or SearchBackend()
|
2009-12-04 19:42:06 +00:00
|
|
|
xapian.ValueRangeProcessor.__init__(self)
|
|
|
|
|
|
|
|
def __call__(self, begin, end):
|
|
|
|
"""
|
|
|
|
Construct a tuple for value range processing.
|
|
|
|
`begin` -- a string in the format '<field_name>:[low_range]'
|
|
|
|
If 'low_range' is omitted, assume the smallest possible value.
|
|
|
|
`end` -- a string in the the format '[high_range|*]'. If '*', assume
|
|
|
|
the highest possible value.
|
|
|
|
Return a tuple of three strings: (column, low, high)
|
|
|
|
"""
|
|
|
|
colon = begin.find(':')
|
|
|
|
field_name = begin[:colon]
|
|
|
|
begin = begin[colon + 1:len(begin)]
|
2009-12-08 18:18:17 +00:00
|
|
|
for field_dict in self.backend.schema:
|
2009-12-04 19:42:06 +00:00
|
|
|
if field_dict['field_name'] == field_name:
|
|
|
|
if not begin:
|
|
|
|
if field_dict['type'] == 'text':
|
|
|
|
begin = u'a' # TODO: A better way of getting a min text value?
|
|
|
|
elif field_dict['type'] == 'long':
|
|
|
|
begin = -sys.maxint - 1
|
|
|
|
elif field_dict['type'] == 'float':
|
|
|
|
begin = float('-inf')
|
|
|
|
elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
|
|
|
|
begin = u'00010101000000'
|
|
|
|
elif end == '*':
|
|
|
|
if field_dict['type'] == 'text':
|
|
|
|
end = u'z' * 100 # TODO: A better way of getting a max text value?
|
|
|
|
elif field_dict['type'] == 'long':
|
|
|
|
end = sys.maxint
|
|
|
|
elif field_dict['type'] == 'float':
|
|
|
|
end = float('inf')
|
|
|
|
elif field_dict['type'] == 'date' or field_dict['type'] == 'datetime':
|
|
|
|
end = u'99990101000000'
|
|
|
|
if field_dict['type'] == 'float':
|
|
|
|
begin = _marshal_value(float(begin))
|
|
|
|
end = _marshal_value(float(end))
|
|
|
|
elif field_dict['type'] == 'long':
|
|
|
|
begin = _marshal_value(long(begin))
|
|
|
|
end = _marshal_value(long(end))
|
|
|
|
return field_dict['column'], str(begin), str(end)
|
|
|
|
|
|
|
|
|
2009-08-18 13:49:20 +00:00
|
|
|
class XHExpandDecider(xapian.ExpandDecider):
|
|
|
|
def __call__(self, term):
|
|
|
|
"""
|
|
|
|
Return True if the term should be used for expanding the search
|
|
|
|
query, False otherwise.
|
|
|
|
|
|
|
|
Currently, we only want to ignore terms beginning with `DOCUMENT_CT_TERM_PREFIX`
|
|
|
|
"""
|
|
|
|
if term.startswith(DOCUMENT_CT_TERM_PREFIX):
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
class SearchBackend(BaseSearchBackend):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
`SearchBackend` defines the Xapian search backend for use with the Haystack
|
|
|
|
API for Django search.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
It uses the Xapian Python bindings to interface with Xapian, and as
|
|
|
|
such is subject to this bug: <http://trac.xapian.org/ticket/364> when
|
|
|
|
Django is running with mod_python or mod_wsgi under Apache.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Until this issue has been fixed by Xapian, it is neccessary to set
|
|
|
|
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
|
|
|
|
`PythonInterpreter main_interpreter` when using mod_python.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
In order to use this backend, `HAYSTACK_XAPIAN_PATH` must be set in
|
|
|
|
your settings. This should point to a location where you would your
|
|
|
|
indexes to reside.
|
|
|
|
"""
|
2009-12-02 16:47:26 +00:00
|
|
|
def __init__(self, site=None, language='english'):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Instantiates an instance of `SearchBackend`.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Optional arguments:
|
|
|
|
`site` -- The site to associate the backend with (default = None)
|
2009-07-29 19:34:46 +00:00
|
|
|
`stemming_language` -- The stemming language (default = 'english')
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-29 19:34:46 +00:00
|
|
|
Also sets the stemming language to be used to `stemming_language`.
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-06-16 18:48:11 +00:00
|
|
|
super(SearchBackend, self).__init__(site)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
if not hasattr(settings, 'HAYSTACK_XAPIAN_PATH'):
|
|
|
|
raise ImproperlyConfigured('You must specify a HAYSTACK_XAPIAN_PATH in your settings.')
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-29 19:34:46 +00:00
|
|
|
if not os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
|
|
|
|
os.makedirs(settings.HAYSTACK_XAPIAN_PATH)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-12-08 16:44:10 +00:00
|
|
|
if not os.access(settings.HAYSTACK_XAPIAN_PATH, os.W_OK):
|
|
|
|
raise IOError("The path to your Xapian index '%s' is not writable for the current user/group." % settings.HAYSTACK_XAPIAN_PATH)
|
|
|
|
|
2009-12-02 16:47:26 +00:00
|
|
|
self.language = language
|
2009-12-08 18:18:17 +00:00
|
|
|
self._schema = None
|
|
|
|
self._content_field_name = None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def schema(self):
|
|
|
|
if not self._schema:
|
|
|
|
self._content_field_name, self._schema = self.build_schema(self.site.all_searchfields())
|
|
|
|
return self._schema
|
|
|
|
|
|
|
|
@property
|
|
|
|
def content_field_name(self):
|
|
|
|
if not self._content_field_name:
|
|
|
|
self._content_field_name, self._schema = self.build_schema(self.site.all_searchfields())
|
|
|
|
return self._content_field_name
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
def update(self, index, iterable):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Updates the `index` with any objects in `iterable` by adding/updating
|
|
|
|
the database as needed.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Required arguments:
|
|
|
|
`index` -- The `SearchIndex` to process
|
|
|
|
`iterable` -- An iterable of model instances to index
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
For each object in `iterable`, a document is created containing all
|
2010-02-25 16:10:10 +00:00
|
|
|
of the terms extracted from `index.full_prepare(obj)` with field prefixes,
|
2010-01-28 00:37:49 +00:00
|
|
|
and 'as-is' as needed. Also, if the field type is 'text' it will be
|
|
|
|
stemmed and stored with the 'Z' prefix as well.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2010-01-28 00:37:49 +00:00
|
|
|
eg. `content:Testing` ==> `testing, Ztest, ZXCONTENTtest, XCONTENTtest`
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
Each document also contains an extra term in the format:
|
2009-06-18 16:15:13 +00:00
|
|
|
|
|
|
|
`XCONTENTTYPE<app_name>.<model_name>`
|
|
|
|
|
|
|
|
As well as a unique identifier in the the format:
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
`Q<app_name>.<model_name>.<pk>`
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
eg.: foo.bar (pk=1) ==> `Qfoo.bar.1`, `XCONTENTTYPEfoo.bar`
|
|
|
|
|
|
|
|
This is useful for querying for a specific document corresponding to
|
2009-07-31 01:18:31 +00:00
|
|
|
a model instance.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
The document also contains a pickled version of the object itself and
|
|
|
|
the document ID in the document data field.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-09 18:04:06 +00:00
|
|
|
Finally, we also store field values to be used for sorting data. We
|
|
|
|
store these in the document value slots (position zero is reserver
|
|
|
|
for the document ID). All values are stored as unicode strings with
|
|
|
|
conversion of float, int, double, values being done by Xapian itself
|
|
|
|
through the use of the :method:xapian.sortable_serialise method.
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-07-31 01:18:31 +00:00
|
|
|
database = self._database(writable=True)
|
2009-06-16 18:48:11 +00:00
|
|
|
try:
|
|
|
|
for obj in iterable:
|
|
|
|
document = xapian.Document()
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-02 16:47:26 +00:00
|
|
|
term_generator = xapian.TermGenerator()
|
|
|
|
term_generator.set_database(database)
|
2009-12-02 18:10:30 +00:00
|
|
|
term_generator.set_stemmer(xapian.Stem(self.language))
|
2009-12-02 16:47:26 +00:00
|
|
|
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
|
|
|
|
term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
|
|
|
|
term_generator.set_document(document)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-11-10 01:27:46 +00:00
|
|
|
document_id = DOCUMENT_ID_TERM_PREFIX + get_identifier(obj)
|
2010-02-25 16:10:10 +00:00
|
|
|
data = index.full_prepare(obj)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
for field in self.schema:
|
2009-09-16 18:07:24 +00:00
|
|
|
if field['field_name'] in data.keys():
|
2009-07-31 19:17:22 +00:00
|
|
|
prefix = DOCUMENT_CUSTOM_TERM_PREFIX + field['field_name'].upper()
|
2009-09-16 18:07:24 +00:00
|
|
|
value = data[field['field_name']]
|
2010-01-28 00:37:49 +00:00
|
|
|
if field['type'] == 'text':
|
|
|
|
if field['multi_valued'] == 'false':
|
2010-03-05 20:33:14 +00:00
|
|
|
term = _marshal_term(value)
|
2010-03-11 17:11:14 +00:00
|
|
|
term_generator.index_text(term)
|
|
|
|
term_generator.index_text(term, 1, prefix)
|
|
|
|
if len(term.split()) == 1:
|
|
|
|
document.add_term(term)
|
|
|
|
document.add_term(prefix + term)
|
|
|
|
document.add_value(field['column'], _marshal_value(value))
|
|
|
|
else:
|
|
|
|
for term in value:
|
|
|
|
term = _marshal_term(term)
|
2010-03-05 20:33:14 +00:00
|
|
|
term_generator.index_text(term)
|
|
|
|
term_generator.index_text(term, 1, prefix)
|
2010-03-11 17:11:14 +00:00
|
|
|
if len(term.split()) == 1:
|
2010-03-05 20:33:14 +00:00
|
|
|
document.add_term(term)
|
|
|
|
document.add_term(prefix + term)
|
2010-01-28 00:37:49 +00:00
|
|
|
else:
|
|
|
|
if field['multi_valued'] == 'false':
|
2010-03-05 20:33:14 +00:00
|
|
|
term = _marshal_term(value)
|
2010-03-11 17:11:14 +00:00
|
|
|
if len(term.split()) == 1:
|
2010-03-11 14:45:06 +00:00
|
|
|
document.add_term(term)
|
|
|
|
document.add_term(prefix + term)
|
|
|
|
document.add_value(field['column'], _marshal_value(value))
|
2010-01-28 00:37:49 +00:00
|
|
|
else:
|
|
|
|
for term in value:
|
2010-03-05 20:33:14 +00:00
|
|
|
term = _marshal_term(term)
|
2010-03-11 17:11:14 +00:00
|
|
|
if len(term.split()) == 1:
|
2010-03-11 14:45:06 +00:00
|
|
|
document.add_term(term)
|
|
|
|
document.add_term(prefix + term)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
document.set_data(pickle.dumps(
|
2009-09-16 18:07:24 +00:00
|
|
|
(obj._meta.app_label, obj._meta.module_name, obj.pk, data),
|
2009-07-31 15:39:58 +00:00
|
|
|
pickle.HIGHEST_PROTOCOL
|
|
|
|
))
|
|
|
|
document.add_term(document_id)
|
2009-06-18 16:15:13 +00:00
|
|
|
document.add_term(
|
2009-08-11 12:49:45 +00:00
|
|
|
DOCUMENT_CT_TERM_PREFIX + u'%s.%s' %
|
2009-06-18 16:15:13 +00:00
|
|
|
(obj._meta.app_label, obj._meta.module_name)
|
|
|
|
)
|
2009-07-31 15:39:58 +00:00
|
|
|
database.replace_document(document_id, document)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
except UnicodeDecodeError:
|
|
|
|
sys.stderr.write('Chunk failed.\n')
|
|
|
|
pass
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
def remove(self, obj):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Remove indexes for `obj` from the database.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
We delete all instances of `Q<app_name>.<model_name>.<pk>` which
|
|
|
|
should be unique to this object.
|
|
|
|
"""
|
2009-07-31 15:39:58 +00:00
|
|
|
database = self._database(writable=True)
|
2009-11-10 01:27:46 +00:00
|
|
|
database.delete_document(DOCUMENT_ID_TERM_PREFIX + get_identifier(obj))
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
def clear(self, models=[]):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Clear all instances of `models` from the database or all models, if
|
|
|
|
not specified.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Optional Arguments:
|
|
|
|
`models` -- Models to clear from the database (default = [])
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
If `models` is empty, an empty query is executed which matches all
|
|
|
|
documents in the database. Afterwards, each match is deleted.
|
|
|
|
|
|
|
|
Otherwise, for each model, a `delete_document` call is issued with
|
|
|
|
the term `XCONTENTTYPE<app_name>.<model_name>`. This will delete
|
|
|
|
all documents with the specified model type.
|
|
|
|
"""
|
2009-07-31 15:39:58 +00:00
|
|
|
database = self._database(writable=True)
|
2009-06-16 18:48:11 +00:00
|
|
|
if not models:
|
2009-12-02 16:47:26 +00:00
|
|
|
query = xapian.Query('')
|
2009-12-02 20:05:53 +00:00
|
|
|
enquire = xapian.Enquire(database)
|
|
|
|
enquire.set_query(query)
|
2010-02-19 14:47:58 +00:00
|
|
|
for match in self._get_enquire_mset(database, enquire, 0, database.get_doccount()):
|
2009-08-18 13:49:20 +00:00
|
|
|
database.delete_document(match.docid)
|
2009-06-16 18:48:11 +00:00
|
|
|
else:
|
|
|
|
for model in models:
|
2009-07-31 15:39:58 +00:00
|
|
|
database.delete_document(
|
2009-08-11 12:49:45 +00:00
|
|
|
DOCUMENT_CT_TERM_PREFIX + '%s.%s' %
|
2009-06-18 16:15:13 +00:00
|
|
|
(model._meta.app_label, model._meta.module_name)
|
|
|
|
)
|
2010-02-06 15:45:26 +00:00
|
|
|
|
2009-10-08 18:42:58 +00:00
|
|
|
@log_query
|
2009-11-29 21:29:52 +00:00
|
|
|
def search(self, query, sort_by=None, start_offset=0, end_offset=None,
|
|
|
|
fields='', highlight=False, facets=None, date_facets=None,
|
|
|
|
query_facets=None, narrow_queries=None, spelling_query=None,
|
2009-10-08 18:42:58 +00:00
|
|
|
limit_to_registered_models=True, **kwargs):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-12-08 01:02:39 +00:00
|
|
|
Executes the Xapian::query as defined in `query`.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Required arguments:
|
2009-11-19 20:06:12 +00:00
|
|
|
`query` -- Search query to execute
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Optional arguments:
|
|
|
|
`sort_by` -- Sort results by specified field (default = None)
|
|
|
|
`start_offset` -- Slice results from `start_offset` (default = 0)
|
2009-11-29 21:29:52 +00:00
|
|
|
`end_offset` -- Slice results at `end_offset` (default = None), if None, then all documents
|
2009-06-18 16:15:13 +00:00
|
|
|
`fields` -- Filter results on `fields` (default = '')
|
|
|
|
`highlight` -- Highlight terms in results (default = False)
|
|
|
|
`facets` -- Facet results on fields (default = None)
|
|
|
|
`date_facets` -- Facet results on date ranges (default = None)
|
|
|
|
`query_facets` -- Facet results on queries (default = None)
|
|
|
|
`narrow_queries` -- Narrow queries (default = None)
|
2009-09-17 17:39:52 +00:00
|
|
|
`spelling_query` -- An optional query to execute spelling suggestion on
|
2009-10-08 18:42:58 +00:00
|
|
|
`limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Returns:
|
|
|
|
A dictionary with the following keys:
|
|
|
|
`results` -- A list of `SearchResult`
|
|
|
|
`hits` -- The total available results
|
|
|
|
`facets` - A dictionary of facets with the following keys:
|
|
|
|
`fields` -- A list of field facets
|
|
|
|
`dates` -- A list of date facets
|
|
|
|
`queries` -- A list of query facets
|
|
|
|
If faceting was not used, the `facets` key will not be present
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-12-08 01:02:39 +00:00
|
|
|
If `query` is None, returns no results.
|
2009-06-20 11:32:18 +00:00
|
|
|
|
|
|
|
If `HAYSTACK_INCLUDE_SPELLING` was enabled in `settings.py`, the
|
|
|
|
extra flag `FLAG_SPELLING_CORRECTION` will be passed to the query parser
|
|
|
|
and any suggestions for spell correction will be returned as well as
|
|
|
|
the results.
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-12-02 20:05:53 +00:00
|
|
|
if xapian.Query.empty(query):
|
2009-06-16 18:48:11 +00:00
|
|
|
return {
|
|
|
|
'results': [],
|
|
|
|
'hits': 0,
|
|
|
|
}
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
database = self._database()
|
2009-12-02 20:05:53 +00:00
|
|
|
|
2009-12-03 17:20:04 +00:00
|
|
|
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
|
|
|
|
spelling_suggestion = self._do_spelling_suggestion(database, query, spelling_query)
|
|
|
|
else:
|
|
|
|
spelling_suggestion = ''
|
2009-12-02 20:05:53 +00:00
|
|
|
|
2009-12-04 15:27:23 +00:00
|
|
|
if narrow_queries is not None:
|
|
|
|
query = xapian.Query(
|
2009-12-05 15:43:52 +00:00
|
|
|
xapian.Query.OP_AND, query, xapian.Query(
|
|
|
|
xapian.Query.OP_OR, [self.parse_query(narrow_query) for narrow_query in narrow_queries]
|
|
|
|
)
|
2009-12-04 15:27:23 +00:00
|
|
|
)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-04 15:27:23 +00:00
|
|
|
if limit_to_registered_models:
|
|
|
|
registered_models = self.build_registered_models_list()
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-04 15:27:23 +00:00
|
|
|
if len(registered_models) > 0:
|
|
|
|
query = xapian.Query(
|
2009-12-04 21:41:41 +00:00
|
|
|
xapian.Query.OP_AND, query,
|
2009-12-04 15:27:23 +00:00
|
|
|
xapian.Query(
|
|
|
|
xapian.Query.OP_OR, [
|
|
|
|
xapian.Query('%s%s' % (DOCUMENT_CT_TERM_PREFIX, model)) for model in registered_models
|
|
|
|
]
|
|
|
|
)
|
|
|
|
)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-02 20:05:53 +00:00
|
|
|
enquire = xapian.Enquire(database)
|
|
|
|
enquire.set_query(query)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-24 20:15:34 +00:00
|
|
|
if sort_by:
|
2009-12-02 20:05:53 +00:00
|
|
|
sorter = xapian.MultiValueSorter()
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-02 20:05:53 +00:00
|
|
|
for sort_field in sort_by:
|
|
|
|
if sort_field.startswith('-'):
|
|
|
|
reverse = True
|
|
|
|
sort_field = sort_field[1:] # Strip the '-'
|
|
|
|
else:
|
|
|
|
reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311
|
|
|
|
sorter.add(self._value_column(sort_field), reverse)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-07-24 20:15:34 +00:00
|
|
|
enquire.set_sort_by_key_then_relevance(sorter, True)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
results = []
|
|
|
|
facets_dict = {
|
|
|
|
'fields': {},
|
|
|
|
'dates': {},
|
|
|
|
'queries': {},
|
|
|
|
}
|
2009-12-02 20:05:53 +00:00
|
|
|
|
2009-11-28 17:31:03 +00:00
|
|
|
if not end_offset:
|
2010-01-27 20:55:34 +00:00
|
|
|
end_offset = database.get_doccount() - start_offset
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2010-02-19 14:47:58 +00:00
|
|
|
matches = self._get_enquire_mset(database, enquire, start_offset, end_offset)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
for match in matches:
|
2010-02-19 14:47:58 +00:00
|
|
|
app_label, module_name, pk, model_data = pickle.loads(self._get_document_data(database, match.document))
|
2009-12-03 18:49:26 +00:00
|
|
|
if highlight:
|
2009-07-31 15:39:58 +00:00
|
|
|
model_data['highlighted'] = {
|
|
|
|
self.content_field_name: self._do_highlight(
|
2009-12-03 18:49:26 +00:00
|
|
|
model_data.get(self.content_field_name), query
|
2009-07-31 15:39:58 +00:00
|
|
|
)
|
|
|
|
}
|
2009-07-31 19:23:55 +00:00
|
|
|
results.append(
|
2010-01-19 20:27:16 +00:00
|
|
|
SearchResult(app_label, module_name, pk, match.percent, weight=match.weight, **model_data)
|
2009-07-31 19:23:55 +00:00
|
|
|
)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-11 13:28:15 +00:00
|
|
|
if facets:
|
|
|
|
facets_dict['fields'] = self._do_field_facets(results, facets)
|
2009-08-10 20:12:59 +00:00
|
|
|
if date_facets:
|
|
|
|
facets_dict['dates'] = self._do_date_facets(results, date_facets)
|
2009-08-11 12:42:42 +00:00
|
|
|
if query_facets:
|
|
|
|
facets_dict['queries'] = self._do_query_facets(results, query_facets)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
return {
|
|
|
|
'results': results,
|
|
|
|
'hits': matches.get_matches_estimated(),
|
|
|
|
'facets': facets_dict,
|
|
|
|
'spelling_suggestion': spelling_suggestion,
|
|
|
|
}
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-12-03 21:40:33 +00:00
|
|
|
def more_like_this(self, model_instance, additional_query=None,
|
2009-12-04 21:41:41 +00:00
|
|
|
start_offset=0, end_offset=None,
|
2009-10-08 18:42:58 +00:00
|
|
|
limit_to_registered_models=True, **kwargs):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Given a model instance, returns a result set of similar documents.
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
`model_instance` -- The model instance to use as a basis for
|
|
|
|
retrieving similar documents.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-14 22:54:46 +00:00
|
|
|
Optional arguments:
|
2009-12-03 21:40:33 +00:00
|
|
|
`additional_query` -- An additional query to narrow results
|
2009-08-14 22:54:46 +00:00
|
|
|
`start_offset` -- The starting offset (default=0)
|
2009-11-29 21:29:52 +00:00
|
|
|
`end_offset` -- The ending offset (default=None), if None, then all documents
|
2009-10-08 18:42:58 +00:00
|
|
|
`limit_to_registered_models` -- Limit returned results to models registered in the current `SearchSite` (default = True)
|
2009-08-14 22:54:46 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Returns:
|
|
|
|
A dictionary with the following keys:
|
|
|
|
`results` -- A list of `SearchResult`
|
|
|
|
`hits` -- The total available results
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Opens a database connection, then builds a simple query using the
|
|
|
|
`model_instance` to build the unique identifier.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
For each document retrieved(should always be one), adds an entry into
|
|
|
|
an RSet (relevance set) with the document id, then, uses the RSet
|
|
|
|
to query for an ESet (A set of terms that can be used to suggest
|
|
|
|
expansions to the original query), omitting any document that was in
|
|
|
|
the original query.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Finally, processes the resulting matches and returns.
|
|
|
|
"""
|
2009-07-31 15:39:58 +00:00
|
|
|
database = self._database()
|
2009-12-02 20:05:53 +00:00
|
|
|
|
2009-11-10 01:27:46 +00:00
|
|
|
query = xapian.Query(DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance))
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-02 20:05:53 +00:00
|
|
|
enquire = xapian.Enquire(database)
|
|
|
|
enquire.set_query(query)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-06-17 20:54:39 +00:00
|
|
|
rset = xapian.RSet()
|
2009-12-02 20:05:53 +00:00
|
|
|
|
2009-11-28 17:31:03 +00:00
|
|
|
if not end_offset:
|
2009-12-02 16:47:26 +00:00
|
|
|
end_offset = database.get_doccount()
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2010-02-19 14:47:58 +00:00
|
|
|
for match in self._get_enquire_mset(database, enquire, 0, end_offset):
|
2009-08-18 12:56:35 +00:00
|
|
|
rset.add_document(match.docid)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2010-05-25 20:56:59 +00:00
|
|
|
query = xapian.Query(xapian.Query.OP_ELITE_SET,
|
2009-11-28 17:31:03 +00:00
|
|
|
[expand.term for expand in enquire.get_eset(match.document.termlist_count(), rset, XHExpandDecider())]
|
2009-06-17 20:54:39 +00:00
|
|
|
)
|
2009-07-31 19:29:48 +00:00
|
|
|
query = xapian.Query(
|
2009-11-10 01:27:46 +00:00
|
|
|
xapian.Query.OP_AND_NOT, [query, DOCUMENT_ID_TERM_PREFIX + get_identifier(model_instance)]
|
2009-06-17 23:30:25 +00:00
|
|
|
)
|
2009-12-03 21:48:21 +00:00
|
|
|
if limit_to_registered_models:
|
|
|
|
registered_models = self.build_registered_models_list()
|
|
|
|
|
|
|
|
if len(registered_models) > 0:
|
|
|
|
query = xapian.Query(
|
2009-12-04 21:41:41 +00:00
|
|
|
xapian.Query.OP_AND, query,
|
2009-12-03 21:48:21 +00:00
|
|
|
xapian.Query(
|
|
|
|
xapian.Query.OP_OR, [
|
|
|
|
xapian.Query('%s%s' % (DOCUMENT_CT_TERM_PREFIX, model)) for model in registered_models
|
|
|
|
]
|
|
|
|
)
|
|
|
|
)
|
2009-12-03 21:40:33 +00:00
|
|
|
if additional_query:
|
2009-08-14 22:54:46 +00:00
|
|
|
query = xapian.Query(
|
2009-08-14 23:06:23 +00:00
|
|
|
xapian.Query.OP_AND, query, additional_query
|
2009-08-14 22:54:46 +00:00
|
|
|
)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-06-17 20:54:39 +00:00
|
|
|
enquire.set_query(query)
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
results = []
|
2010-02-19 14:47:58 +00:00
|
|
|
matches = self._get_enquire_mset(database, enquire, start_offset, end_offset)
|
2009-07-31 19:29:48 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
for match in matches:
|
2010-02-19 14:47:58 +00:00
|
|
|
app_label, module_name, pk, model_data = pickle.loads(self._get_document_data(database, match.document))
|
2009-07-31 19:29:48 +00:00
|
|
|
results.append(
|
2010-01-19 20:27:16 +00:00
|
|
|
SearchResult(app_label, module_name, pk, match.percent, weight=match.weight, **model_data)
|
2009-06-16 18:48:11 +00:00
|
|
|
)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
return {
|
|
|
|
'results': results,
|
2009-07-31 19:29:48 +00:00
|
|
|
'hits': matches.get_matches_estimated(),
|
|
|
|
'facets': {
|
|
|
|
'fields': {},
|
|
|
|
'dates': {},
|
|
|
|
'queries': {},
|
|
|
|
},
|
|
|
|
'spelling_suggestion': None,
|
2009-06-16 18:48:11 +00:00
|
|
|
}
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-12-04 19:42:06 +00:00
|
|
|
def parse_query(self, query_string):
|
|
|
|
"""
|
|
|
|
Given a `query_string`, will attempt to return a xapian.Query
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
``query_string`` -- A query string to parse
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-04 19:42:06 +00:00
|
|
|
Returns a xapian.Query
|
|
|
|
"""
|
2009-12-04 20:41:32 +00:00
|
|
|
if query_string == '*':
|
|
|
|
return xapian.Query('') # Match everything
|
|
|
|
elif query_string == '':
|
|
|
|
return xapian.Query() # Match nothing
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-04 19:42:06 +00:00
|
|
|
flags = xapian.QueryParser.FLAG_PARTIAL \
|
|
|
|
| xapian.QueryParser.FLAG_PHRASE \
|
|
|
|
| xapian.QueryParser.FLAG_BOOLEAN \
|
|
|
|
| xapian.QueryParser.FLAG_LOVEHATE \
|
|
|
|
| xapian.QueryParser.FLAG_WILDCARD \
|
|
|
|
| xapian.QueryParser.FLAG_PURE_NOT
|
|
|
|
qp = xapian.QueryParser()
|
|
|
|
qp.set_database(self._database())
|
|
|
|
qp.set_stemmer(xapian.Stem(self.language))
|
|
|
|
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
|
|
|
|
qp.add_boolean_prefix('django_ct', DOCUMENT_CT_TERM_PREFIX)
|
2010-02-09 19:04:53 +00:00
|
|
|
|
2009-12-04 19:42:06 +00:00
|
|
|
for field_dict in self.schema:
|
|
|
|
qp.add_prefix(
|
|
|
|
field_dict['field_name'],
|
|
|
|
DOCUMENT_CUSTOM_TERM_PREFIX + field_dict['field_name'].upper()
|
|
|
|
)
|
|
|
|
|
|
|
|
vrp = XHValueRangeProcessor(self)
|
|
|
|
qp.add_valuerangeprocessor(vrp)
|
|
|
|
|
|
|
|
return qp.parse_query(query_string, flags)
|
|
|
|
|
2009-08-16 15:40:47 +00:00
|
|
|
def build_schema(self, fields):
|
|
|
|
"""
|
|
|
|
Build the schema from fields.
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-16 15:40:47 +00:00
|
|
|
Required arguments:
|
|
|
|
``fields`` -- A list of fields in the index
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-16 15:40:47 +00:00
|
|
|
Returns a list of fields in dictionary format ready for inclusion in
|
|
|
|
an indexed meta-data.
|
|
|
|
"""
|
|
|
|
content_field_name = ''
|
|
|
|
schema_fields = []
|
|
|
|
column = 0
|
|
|
|
|
|
|
|
for field_name, field_class in fields.items():
|
|
|
|
if field_class.document is True:
|
2010-02-09 01:28:51 +00:00
|
|
|
content_field_name = field_class.index_fieldname
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-16 15:40:47 +00:00
|
|
|
if field_class.indexed is True:
|
|
|
|
field_data = {
|
2010-02-09 01:28:51 +00:00
|
|
|
'field_name': field_class.index_fieldname,
|
2009-08-16 15:40:47 +00:00
|
|
|
'type': 'text',
|
|
|
|
'multi_valued': 'false',
|
|
|
|
'column': column,
|
|
|
|
}
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-16 15:40:47 +00:00
|
|
|
if isinstance(field_class, (DateField, DateTimeField)):
|
|
|
|
field_data['type'] = 'date'
|
|
|
|
elif isinstance(field_class, IntegerField):
|
|
|
|
field_data['type'] = 'long'
|
2009-08-29 22:04:28 +00:00
|
|
|
elif isinstance(field_class, FloatField):
|
|
|
|
field_data['type'] = 'float'
|
2009-08-16 15:40:47 +00:00
|
|
|
elif isinstance(field_class, BooleanField):
|
|
|
|
field_data['type'] = 'boolean'
|
|
|
|
elif isinstance(field_class, MultiValueField):
|
|
|
|
field_data['multi_valued'] = 'true'
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-16 15:40:47 +00:00
|
|
|
schema_fields.append(field_data)
|
|
|
|
column += 1
|
2010-02-19 14:47:58 +00:00
|
|
|
|
|
|
|
if field_class.faceted is True:
|
|
|
|
# Duplicate the field.
|
|
|
|
faceted_field = field_data.copy()
|
|
|
|
faceted_field['field_name'] = get_facet_field_name(faceted_field['field_name'])
|
|
|
|
faceted_field['column'] = column
|
|
|
|
|
|
|
|
schema_fields.append(faceted_field)
|
|
|
|
column += 1
|
2009-08-16 15:40:47 +00:00
|
|
|
|
|
|
|
return (content_field_name, schema_fields)
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-12-03 18:49:26 +00:00
|
|
|
def _do_highlight(self, content, query, tag='em'):
|
2009-07-27 19:12:20 +00:00
|
|
|
"""
|
2009-12-03 18:49:26 +00:00
|
|
|
Highlight `query` terms in `content` with html `tag`.
|
2009-07-27 19:12:20 +00:00
|
|
|
|
|
|
|
This method assumes that the input text (`content`) does not contain
|
|
|
|
any special formatting. That is, it does not contain any html tags
|
|
|
|
or similar markup that could be screwed up by the highlighting.
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
`content` -- Content to search for instances of `text`
|
|
|
|
`text` -- The text to be highlighted
|
|
|
|
"""
|
2009-12-03 18:49:26 +00:00
|
|
|
for term in query:
|
2009-12-04 21:41:41 +00:00
|
|
|
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
|
2009-12-03 18:49:26 +00:00
|
|
|
match_re = re.compile(match, re.I)
|
|
|
|
content = match_re.sub('<%s>%s</%s>' % (tag, term, tag), content)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-07-27 19:12:20 +00:00
|
|
|
return content
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-11 13:28:15 +00:00
|
|
|
def _do_field_facets(self, results, field_facets):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Private method that facets a document by field name.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-12-04 21:41:41 +00:00
|
|
|
Fields of type MultiValueField will be faceted on each item in the
|
2009-09-16 18:07:24 +00:00
|
|
|
(containing) list.
|
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Required arguments:
|
2009-08-11 13:28:15 +00:00
|
|
|
`results` -- A list SearchResults to facet
|
|
|
|
`field_facets` -- A list of fields to facet on
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-08-11 13:28:15 +00:00
|
|
|
facet_dict = {}
|
|
|
|
|
2009-12-08 01:02:39 +00:00
|
|
|
# DS_TODO: Improve this algorithm. Currently, runs in O(N^2), ouch.
|
2009-08-11 13:28:15 +00:00
|
|
|
for field in field_facets:
|
|
|
|
facet_list = {}
|
|
|
|
|
|
|
|
for result in results:
|
|
|
|
field_value = getattr(result, field)
|
2009-09-16 18:07:24 +00:00
|
|
|
if self._multi_value_field(field):
|
|
|
|
for item in field_value: # Facet each item in a MultiValueField
|
|
|
|
facet_list[item] = facet_list.get(item, 0) + 1
|
|
|
|
else:
|
|
|
|
facet_list[field_value] = facet_list.get(field_value, 0) + 1
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-11 13:28:15 +00:00
|
|
|
facet_dict[field] = facet_list.items()
|
2009-08-19 00:26:59 +00:00
|
|
|
|
2009-08-11 13:28:15 +00:00
|
|
|
return facet_dict
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-10 20:12:59 +00:00
|
|
|
def _do_date_facets(self, results, date_facets):
|
2009-08-07 18:22:40 +00:00
|
|
|
"""
|
|
|
|
Private method that facets a document by date ranges
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-07 18:22:40 +00:00
|
|
|
Required arguments:
|
2009-08-10 20:12:59 +00:00
|
|
|
`results` -- A list SearchResults to facet
|
2009-08-11 12:49:45 +00:00
|
|
|
`date_facets` -- A dictionary containing facet parameters:
|
2009-08-16 18:03:19 +00:00
|
|
|
{'field': {'start_date': ..., 'end_date': ...: 'gap_by': '...', 'gap_amount': n}}
|
|
|
|
nb., gap must be one of the following:
|
|
|
|
year|month|day|hour|minute|second
|
2009-08-10 20:12:59 +00:00
|
|
|
|
|
|
|
For each date facet field in `date_facets`, generates a list
|
2009-08-16 18:03:19 +00:00
|
|
|
of date ranges (from `start_date` to `end_date` by `gap_by`) then
|
2009-08-10 20:12:59 +00:00
|
|
|
iterates through `results` and tallies the count for each date_facet.
|
|
|
|
|
|
|
|
Returns a dictionary of date facets (fields) containing a list with
|
|
|
|
entries for each range and a count of documents matching the range.
|
|
|
|
|
|
|
|
eg. {
|
|
|
|
'pub_date': [
|
|
|
|
('2009-01-01T00:00:00Z', 5),
|
|
|
|
('2009-02-01T00:00:00Z', 0),
|
|
|
|
('2009-03-01T00:00:00Z', 0),
|
|
|
|
('2009-04-01T00:00:00Z', 1),
|
|
|
|
('2009-05-01T00:00:00Z', 2),
|
|
|
|
],
|
|
|
|
}
|
2009-08-07 18:22:40 +00:00
|
|
|
"""
|
2009-08-10 20:12:59 +00:00
|
|
|
facet_dict = {}
|
|
|
|
|
2009-08-07 18:22:40 +00:00
|
|
|
for date_facet, facet_params in date_facets.iteritems():
|
2009-08-16 18:03:19 +00:00
|
|
|
gap_type = facet_params.get('gap_by')
|
|
|
|
gap_value = facet_params.get('gap_amount', 1)
|
2009-08-10 20:12:59 +00:00
|
|
|
date_range = facet_params['start_date']
|
|
|
|
facet_list = []
|
2009-08-10 20:34:29 +00:00
|
|
|
while date_range < facet_params['end_date']:
|
2009-08-10 20:12:59 +00:00
|
|
|
facet_list.append((date_range.isoformat(), 0))
|
|
|
|
if gap_type == 'year':
|
|
|
|
date_range = date_range.replace(
|
|
|
|
year=date_range.year + int(gap_value)
|
|
|
|
)
|
|
|
|
elif gap_type == 'month':
|
2009-12-05 01:45:36 +00:00
|
|
|
if date_range.month + int(gap_value) > 12:
|
2009-08-10 20:12:59 +00:00
|
|
|
date_range = date_range.replace(
|
2009-12-05 01:45:36 +00:00
|
|
|
month=((date_range.month + int(gap_value)) % 12),
|
|
|
|
year=(date_range.year + (date_range.month + int(gap_value)) / 12)
|
2009-08-10 20:12:59 +00:00
|
|
|
)
|
2009-08-08 12:00:54 +00:00
|
|
|
else:
|
2009-08-10 20:12:59 +00:00
|
|
|
date_range = date_range.replace(
|
|
|
|
month=date_range.month + int(gap_value)
|
|
|
|
)
|
|
|
|
elif gap_type == 'day':
|
2009-08-10 20:38:13 +00:00
|
|
|
date_range += datetime.timedelta(days=int(gap_value))
|
2009-08-10 20:12:59 +00:00
|
|
|
elif gap_type == 'hour':
|
2009-08-10 20:38:13 +00:00
|
|
|
date_range += datetime.timedelta(hours=int(gap_value))
|
2009-08-10 20:12:59 +00:00
|
|
|
elif gap_type == 'minute':
|
2009-08-10 20:38:13 +00:00
|
|
|
date_range += datetime.timedelta(minutes=int(gap_value))
|
2009-08-10 20:12:59 +00:00
|
|
|
elif gap_type == 'second':
|
2009-08-10 20:38:13 +00:00
|
|
|
date_range += datetime.timedelta(seconds=int(gap_value))
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-10 20:34:29 +00:00
|
|
|
facet_list = sorted(facet_list, key=lambda n:n[0], reverse=True)
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-10 20:12:59 +00:00
|
|
|
for result in results:
|
|
|
|
result_date = getattr(result, date_facet)
|
|
|
|
if result_date:
|
|
|
|
if not isinstance(result_date, datetime.datetime):
|
|
|
|
result_date = datetime.datetime(
|
|
|
|
year=result_date.year,
|
|
|
|
month=result_date.month,
|
|
|
|
day=result_date.day,
|
|
|
|
)
|
|
|
|
for n, facet_date in enumerate(facet_list):
|
2009-11-12 16:28:07 +00:00
|
|
|
if result_date > datetime.datetime(*(time.strptime(facet_date[0], '%Y-%m-%dT%H:%M:%S')[0:6])):
|
2009-08-10 20:12:59 +00:00
|
|
|
facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1))
|
2009-08-10 20:34:29 +00:00
|
|
|
break
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-10 20:12:59 +00:00
|
|
|
facet_dict[date_facet] = facet_list
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-10 20:12:59 +00:00
|
|
|
return facet_dict
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-08-11 12:42:42 +00:00
|
|
|
def _do_query_facets(self, results, query_facets):
|
2009-08-11 12:49:45 +00:00
|
|
|
"""
|
|
|
|
Private method that facets a document by query
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
`results` -- A list SearchResults to facet
|
|
|
|
`query_facets` -- A dictionary containing facet parameters:
|
|
|
|
{'field': 'query', [...]}
|
|
|
|
|
|
|
|
For each query in `query_facets`, generates a dictionary entry with
|
|
|
|
the field name as the key and a tuple with the query and result count
|
|
|
|
as the value.
|
|
|
|
|
|
|
|
eg. {'name': ('a*', 5)}
|
|
|
|
"""
|
2009-08-11 12:42:42 +00:00
|
|
|
facet_dict = {}
|
|
|
|
|
|
|
|
for field, query in query_facets.iteritems():
|
2009-12-05 15:43:52 +00:00
|
|
|
facet_dict[field] = (query, self.search(self.parse_query(query))['hits'])
|
|
|
|
|
2009-08-11 12:42:42 +00:00
|
|
|
return facet_dict
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-12-03 17:20:04 +00:00
|
|
|
def _do_spelling_suggestion(self, database, query, spelling_query):
|
|
|
|
"""
|
|
|
|
Private method that returns a single spelling suggestion based on
|
|
|
|
`spelling_query` or `query`.
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
`database` -- The database to check spelling against
|
|
|
|
`query` -- The query to check
|
|
|
|
`spelling_query` -- If not None, this will be checked instead of `query`
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 17:20:04 +00:00
|
|
|
Returns a string with a suggested spelling
|
|
|
|
"""
|
|
|
|
if spelling_query:
|
|
|
|
if ' ' in spelling_query:
|
|
|
|
return ' '.join([database.get_spelling_suggestion(term) for term in spelling_query.split()])
|
|
|
|
else:
|
|
|
|
return database.get_spelling_suggestion(spelling_query)
|
|
|
|
|
2009-12-05 16:32:29 +00:00
|
|
|
term_set = set()
|
2009-12-03 18:49:26 +00:00
|
|
|
for term in query:
|
2009-12-04 19:16:08 +00:00
|
|
|
for match in re.findall('[^A-Z]+', term): # Ignore field identifiers
|
2009-12-05 16:32:29 +00:00
|
|
|
term_set.add(database.get_spelling_suggestion(match))
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-05 16:32:29 +00:00
|
|
|
return ' '.join(term_set)
|
2009-12-03 17:20:04 +00:00
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
def _database(self, writable=False):
|
2009-07-21 17:11:33 +00:00
|
|
|
"""
|
2009-12-08 18:18:17 +00:00
|
|
|
Private method that returns a xapian.Database for use.
|
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
Optional arguments:
|
|
|
|
``writable`` -- Open the database in read/write mode (default=False)
|
2009-12-08 18:18:17 +00:00
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
Returns an instance of a xapian.Database or xapian.WritableDatabase
|
|
|
|
"""
|
|
|
|
if writable:
|
|
|
|
database = xapian.WritableDatabase(settings.HAYSTACK_XAPIAN_PATH, xapian.DB_CREATE_OR_OPEN)
|
|
|
|
else:
|
2009-12-08 14:56:57 +00:00
|
|
|
try:
|
|
|
|
database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH)
|
|
|
|
except xapian.DatabaseOpeningError:
|
|
|
|
raise InvalidIndexError(u'Unable to open index at %s' % settings.HAYSTACK_XAPIAN_PATH)
|
2009-12-08 18:18:17 +00:00
|
|
|
|
2009-07-31 01:18:31 +00:00
|
|
|
return database
|
2009-12-08 18:18:17 +00:00
|
|
|
|
2010-02-19 14:47:58 +00:00
|
|
|
def _get_enquire_mset(self, database, enquire, start_offset, end_offset):
|
2010-02-06 15:45:26 +00:00
|
|
|
"""
|
|
|
|
A safer version of Xapian.enquire.get_mset
|
|
|
|
|
|
|
|
Simply wraps the Xapian version and catches any `Xapian.DatabaseModifiedError`,
|
|
|
|
attempting a `database.reopen` as needed.
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
`database` -- The database to be read
|
|
|
|
`enquire` -- An instance of an Xapian.enquire object
|
|
|
|
`start_offset` -- The start offset to pass to `enquire.get_mset`
|
|
|
|
`end_offset` -- The end offset to pass to `enquire.get_mset`
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return enquire.get_mset(start_offset, end_offset)
|
|
|
|
except xapian.DatabaseModifiedError:
|
|
|
|
database.reopen()
|
|
|
|
return enquire.get_mset(start_offset, end_offset)
|
|
|
|
|
2010-02-19 14:47:58 +00:00
|
|
|
def _get_document_data(self, database, document):
|
|
|
|
"""
|
|
|
|
A safer version of Xapian.document.get_data
|
|
|
|
|
|
|
|
Simply wraps the Xapian version and catches any `Xapian.DatabaseModifiedError`,
|
|
|
|
attempting a `database.reopen` as needed.
|
|
|
|
|
|
|
|
Required arguments:
|
|
|
|
`database` -- The database to be read
|
|
|
|
`document` -- An instance of an Xapian.document object
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return document.get_data()
|
|
|
|
except xapian.DatabaseModifiedError:
|
|
|
|
database.reopen()
|
|
|
|
return document.get_data()
|
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
def _value_column(self, field):
|
|
|
|
"""
|
|
|
|
Private method that returns the column value slot in the database
|
|
|
|
for a given field.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
Required arguemnts:
|
|
|
|
`field` -- The field to lookup
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
Returns an integer with the column location (0 indexed).
|
|
|
|
"""
|
|
|
|
for field_dict in self.schema:
|
|
|
|
if field_dict['field_name'] == field:
|
|
|
|
return field_dict['column']
|
|
|
|
return 0
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-09-16 18:07:24 +00:00
|
|
|
def _multi_value_field(self, field):
|
|
|
|
"""
|
|
|
|
Private method that returns `True` if a field is multi-valued, else
|
|
|
|
`False`.
|
|
|
|
|
|
|
|
Required arguemnts:
|
|
|
|
`field` -- The field to lookup
|
|
|
|
|
|
|
|
Returns a boolean value indicating whether the field is multi-valued.
|
|
|
|
"""
|
|
|
|
for field_dict in self.schema:
|
|
|
|
if field_dict['field_name'] == field:
|
|
|
|
return field_dict['multi_valued'] == 'true'
|
|
|
|
return False
|
|
|
|
|
2009-07-31 15:39:58 +00:00
|
|
|
|
2009-06-16 18:48:11 +00:00
|
|
|
class SearchQuery(BaseSearchQuery):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-10-21 14:22:29 +00:00
|
|
|
This class is the Xapian specific version of the SearchQuery class.
|
|
|
|
It acts as an intermediary between the ``SearchQuerySet`` and the
|
|
|
|
``SearchBackend`` itself.
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2010-01-18 21:11:43 +00:00
|
|
|
def __init__(self, backend=None, site=None):
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
|
|
|
Create a new instance of the SearchQuery setting the backend as
|
|
|
|
specified. If no backend is set, will use the Xapian `SearchBackend`.
|
2009-08-11 12:49:45 +00:00
|
|
|
|
2009-06-18 16:15:13 +00:00
|
|
|
Optional arguments:
|
2009-10-21 14:22:29 +00:00
|
|
|
``backend`` -- The ``SearchBackend`` to use (default = None)
|
2010-01-18 21:11:43 +00:00
|
|
|
``site`` -- The site to use (default = None)
|
2009-06-18 16:15:13 +00:00
|
|
|
"""
|
2009-06-16 18:48:11 +00:00
|
|
|
super(SearchQuery, self).__init__(backend=backend)
|
2010-01-18 21:11:43 +00:00
|
|
|
self.backend = backend or SearchBackend(site=site)
|
2009-11-10 01:01:20 +00:00
|
|
|
|
2009-11-11 01:45:37 +00:00
|
|
|
def build_query(self):
|
|
|
|
if not self.query_filter:
|
2009-11-29 21:05:36 +00:00
|
|
|
query = xapian.Query('')
|
2009-11-11 01:45:37 +00:00
|
|
|
else:
|
2009-11-29 21:05:36 +00:00
|
|
|
query = self._query_from_search_node(self.query_filter)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-11-29 22:06:29 +00:00
|
|
|
if self.models:
|
|
|
|
subqueries = [
|
2009-11-30 00:12:59 +00:00
|
|
|
xapian.Query(
|
|
|
|
xapian.Query.OP_SCALE_WEIGHT, xapian.Query('%s%s.%s' % (
|
2009-12-04 21:41:41 +00:00
|
|
|
DOCUMENT_CT_TERM_PREFIX,
|
2009-11-30 00:12:59 +00:00
|
|
|
model._meta.app_label, model._meta.module_name
|
|
|
|
)
|
2009-11-30 11:26:34 +00:00
|
|
|
), 0 # Pure boolean sub-query
|
2009-11-29 22:06:29 +00:00
|
|
|
) for model in self.models
|
|
|
|
]
|
|
|
|
query = xapian.Query(
|
|
|
|
xapian.Query.OP_AND, query,
|
|
|
|
xapian.Query(xapian.Query.OP_OR, subqueries)
|
|
|
|
)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-11-29 21:05:36 +00:00
|
|
|
if self.boost:
|
|
|
|
subqueries = [
|
|
|
|
xapian.Query(
|
|
|
|
xapian.Query.OP_SCALE_WEIGHT, xapian.Query(term), value
|
|
|
|
) for term, value in self.boost.iteritems()
|
|
|
|
]
|
|
|
|
query = xapian.Query(
|
2010-01-14 23:33:58 +00:00
|
|
|
xapian.Query.OP_AND_MAYBE, query,
|
|
|
|
xapian.Query(xapian.Query.OP_OR, subqueries)
|
2009-11-29 21:05:36 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
return query
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-11-11 02:31:25 +00:00
|
|
|
def _query_from_search_node(self, search_node, is_not=False):
|
|
|
|
query_list = []
|
|
|
|
|
|
|
|
for child in search_node.children:
|
|
|
|
if isinstance(child, SearchNode):
|
|
|
|
query_list.append(
|
2010-05-13 21:08:28 +00:00
|
|
|
self._query_from_search_node(child, child.negated)
|
2009-11-11 02:31:25 +00:00
|
|
|
)
|
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
expression, term = child
|
2009-11-30 21:10:46 +00:00
|
|
|
field, filter_type = search_node.split_expression(expression)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 15:21:03 +00:00
|
|
|
if isinstance(term, (list, tuple)):
|
2009-12-03 14:38:49 +00:00
|
|
|
term = [_marshal_term(t) for t in term]
|
2009-12-03 15:21:03 +00:00
|
|
|
else:
|
|
|
|
term = _marshal_term(term)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-11-30 21:10:46 +00:00
|
|
|
if field == 'content':
|
2009-12-03 14:38:49 +00:00
|
|
|
query_list.append(self._content_field(term, is_not))
|
2009-11-11 02:31:25 +00:00
|
|
|
else:
|
2009-11-30 21:10:46 +00:00
|
|
|
if filter_type == 'exact':
|
2009-12-03 14:38:49 +00:00
|
|
|
query_list.append(self._filter_exact(term, field, is_not))
|
2009-11-30 21:10:46 +00:00
|
|
|
elif filter_type == 'gt':
|
2009-12-04 21:41:41 +00:00
|
|
|
query_list.append(self._filter_gt(term, field, is_not))
|
2009-11-30 21:10:46 +00:00
|
|
|
elif filter_type == 'gte':
|
2009-12-05 01:30:26 +00:00
|
|
|
query_list.append(self._filter_gte(term, field, is_not))
|
2009-11-30 21:10:46 +00:00
|
|
|
elif filter_type == 'lt':
|
2009-12-04 21:44:36 +00:00
|
|
|
query_list.append(self._filter_lt(term, field, is_not))
|
2009-11-30 21:10:46 +00:00
|
|
|
elif filter_type == 'lte':
|
2009-12-05 01:30:26 +00:00
|
|
|
query_list.append(self._filter_lte(term, field, is_not))
|
2009-11-30 21:10:46 +00:00
|
|
|
elif filter_type == 'startswith':
|
2009-12-03 17:20:04 +00:00
|
|
|
query_list.append(self._filter_startswith(term, field, is_not))
|
2009-11-30 21:10:46 +00:00
|
|
|
elif filter_type == 'in':
|
2009-12-03 14:38:49 +00:00
|
|
|
query_list.append(self._filter_in(term, field, is_not))
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-11-11 02:45:49 +00:00
|
|
|
if search_node.connector == 'OR':
|
|
|
|
return xapian.Query(xapian.Query.OP_OR, query_list)
|
|
|
|
else:
|
|
|
|
return xapian.Query(xapian.Query.OP_AND, query_list)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 14:38:49 +00:00
|
|
|
def _content_field(self, term, is_not):
|
2009-12-01 14:19:30 +00:00
|
|
|
"""
|
|
|
|
Private method that returns a xapian.Query that searches for `value`
|
|
|
|
in all fields.
|
|
|
|
|
|
|
|
Required arguments:
|
2009-12-03 14:38:49 +00:00
|
|
|
``term`` -- The term to search for
|
2009-12-01 14:19:30 +00:00
|
|
|
``is_not`` -- Invert the search results
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2009-12-03 14:38:49 +00:00
|
|
|
if ' ' in term:
|
2009-11-30 21:49:47 +00:00
|
|
|
if is_not:
|
|
|
|
return xapian.Query(
|
2009-12-03 14:38:49 +00:00
|
|
|
xapian.Query.OP_AND_NOT, self._all_query(), self._phrase_query(term.split())
|
2009-12-01 13:58:34 +00:00
|
|
|
)
|
2009-11-30 21:49:47 +00:00
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
return self._phrase_query(term.split())
|
2009-11-30 21:49:47 +00:00
|
|
|
else:
|
|
|
|
if is_not:
|
2009-12-03 14:38:49 +00:00
|
|
|
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(term))
|
2009-11-30 21:49:47 +00:00
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
return self._term_query(term)
|
2009-11-30 21:49:47 +00:00
|
|
|
|
2009-12-03 14:38:49 +00:00
|
|
|
def _filter_exact(self, term, field, is_not):
|
2009-12-01 14:19:30 +00:00
|
|
|
"""
|
2009-12-03 14:38:49 +00:00
|
|
|
Private method that returns a xapian.Query that searches for `term`
|
2009-12-01 14:19:30 +00:00
|
|
|
in a specified `field`.
|
|
|
|
|
|
|
|
Required arguments:
|
2009-12-03 14:38:49 +00:00
|
|
|
``term`` -- The term to search for
|
2009-12-01 14:19:30 +00:00
|
|
|
``field`` -- The field to search
|
|
|
|
``is_not`` -- Invert the search results
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2009-12-03 14:38:49 +00:00
|
|
|
if ' ' in term:
|
2009-11-30 21:49:47 +00:00
|
|
|
if is_not:
|
|
|
|
return xapian.Query(
|
2009-12-03 14:38:49 +00:00
|
|
|
xapian.Query.OP_AND_NOT, self._all_query(), self._phrase_query(term.split(), field)
|
2009-11-30 21:49:47 +00:00
|
|
|
)
|
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
return self._phrase_query(term.split(), field)
|
2009-11-30 21:49:47 +00:00
|
|
|
else:
|
|
|
|
if is_not:
|
2009-12-03 14:38:49 +00:00
|
|
|
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(term, field))
|
2009-11-30 21:49:47 +00:00
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
return self._term_query(term, field)
|
2009-12-01 13:58:34 +00:00
|
|
|
|
2009-12-03 14:38:49 +00:00
|
|
|
def _filter_in(self, term_list, field, is_not):
|
2009-12-01 14:19:30 +00:00
|
|
|
"""
|
2009-12-03 14:38:49 +00:00
|
|
|
Private method that returns a xapian.Query that searches for any term
|
2009-12-01 14:19:30 +00:00
|
|
|
of `value_list` in a specified `field`.
|
|
|
|
|
|
|
|
Required arguments:
|
2009-12-03 14:38:49 +00:00
|
|
|
``term_list`` -- The terms to search for
|
2009-12-01 14:19:30 +00:00
|
|
|
``field`` -- The field to search
|
|
|
|
``is_not`` -- Invert the search results
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2009-12-01 13:58:34 +00:00
|
|
|
query_list = []
|
2009-12-03 14:38:49 +00:00
|
|
|
for term in term_list:
|
|
|
|
if ' ' in term:
|
2009-12-01 13:58:34 +00:00
|
|
|
query_list.append(
|
2010-05-13 21:08:28 +00:00
|
|
|
self._phrase_query(term.split(), field)
|
2009-12-01 13:58:34 +00:00
|
|
|
)
|
|
|
|
else:
|
|
|
|
query_list.append(
|
2010-05-13 21:08:28 +00:00
|
|
|
self._term_query(term, field)
|
2009-12-01 13:58:34 +00:00
|
|
|
)
|
2009-12-01 14:11:01 +00:00
|
|
|
if is_not:
|
2009-12-04 21:41:41 +00:00
|
|
|
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), xapian.Query(xapian.Query.OP_OR, query_list))
|
2009-12-01 14:11:01 +00:00
|
|
|
else:
|
|
|
|
return xapian.Query(xapian.Query.OP_OR, query_list)
|
2009-12-01 13:58:34 +00:00
|
|
|
|
2009-12-03 17:20:04 +00:00
|
|
|
def _filter_startswith(self, term, field, is_not):
|
|
|
|
"""
|
|
|
|
Private method that returns a xapian.Query that searches for any term
|
|
|
|
that begins with `term` in a specified `field`.
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 17:20:04 +00:00
|
|
|
Required arguments:
|
|
|
|
``term`` -- The terms to search for
|
|
|
|
``field`` -- The field to search
|
|
|
|
``is_not`` -- Invert the search results
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 17:20:04 +00:00
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2010-02-09 19:04:53 +00:00
|
|
|
if is_not:
|
|
|
|
return xapian.Query(
|
|
|
|
xapian.Query.OP_AND_NOT,
|
|
|
|
self._all_query(),
|
|
|
|
self.backend.parse_query('%s:%s' % (field, term)),
|
|
|
|
)
|
|
|
|
return self.backend.parse_query('%s:%s' % (field, term))
|
2009-12-04 21:41:41 +00:00
|
|
|
|
|
|
|
def _filter_gt(self, term, field, is_not):
|
2009-12-05 01:30:26 +00:00
|
|
|
return self._filter_lte(term, field, is_not=(is_not != True))
|
|
|
|
|
|
|
|
def _filter_lt(self, term, field, is_not):
|
|
|
|
return self._filter_gte(term, field, is_not=(is_not != True))
|
|
|
|
|
|
|
|
def _filter_gte(self, term, field, is_not):
|
2009-12-04 21:41:41 +00:00
|
|
|
"""
|
2009-12-04 21:44:36 +00:00
|
|
|
Private method that returns a xapian.Query that searches for any term
|
2009-12-04 21:41:41 +00:00
|
|
|
that is greater than `term` in a specified `field`.
|
|
|
|
"""
|
|
|
|
vrp = XHValueRangeProcessor(self.backend)
|
2009-12-05 01:30:26 +00:00
|
|
|
pos, begin, end = vrp('%s:%s' % (field, _marshal_value(term)), '*')
|
|
|
|
if is_not:
|
|
|
|
return xapian.Query(xapian.Query.OP_AND_NOT,
|
|
|
|
self._all_query(),
|
|
|
|
xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
|
|
|
|
)
|
2009-12-04 21:41:41 +00:00
|
|
|
return xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
|
|
|
|
|
2009-12-05 01:30:26 +00:00
|
|
|
def _filter_lte(self, term, field, is_not):
|
2009-12-04 21:44:36 +00:00
|
|
|
"""
|
|
|
|
Private method that returns a xapian.Query that searches for any term
|
|
|
|
that is less than `term` in a specified `field`.
|
|
|
|
"""
|
|
|
|
vrp = XHValueRangeProcessor(self.backend)
|
2009-12-05 01:30:26 +00:00
|
|
|
pos, begin, end = vrp('%s:' % field, '%s' % _marshal_value(term))
|
|
|
|
if is_not:
|
|
|
|
return xapian.Query(xapian.Query.OP_AND_NOT,
|
|
|
|
self._all_query(),
|
|
|
|
xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
|
|
|
|
)
|
2009-12-04 21:44:36 +00:00
|
|
|
return xapian.Query(xapian.Query.OP_VALUE_RANGE, pos, begin, end)
|
|
|
|
|
2009-12-01 13:58:34 +00:00
|
|
|
def _all_query(self):
|
2009-12-01 14:19:30 +00:00
|
|
|
"""
|
|
|
|
Private method that returns a xapian.Query that returns all documents,
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2009-12-01 13:58:34 +00:00
|
|
|
return xapian.Query('')
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 14:38:49 +00:00
|
|
|
def _term_query(self, term, field=None):
|
2009-12-01 14:19:30 +00:00
|
|
|
"""
|
|
|
|
Private method that returns a term based xapian.Query that searches
|
2009-12-03 14:38:49 +00:00
|
|
|
for `term`.
|
2009-12-01 14:19:30 +00:00
|
|
|
|
|
|
|
Required arguments:
|
2009-12-03 14:38:49 +00:00
|
|
|
``term`` -- The term to search for
|
2009-12-01 14:19:30 +00:00
|
|
|
``field`` -- The field to search (If `None`, all fields)
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2009-12-05 16:32:29 +00:00
|
|
|
stem = xapian.Stem(self.backend.language)
|
2009-12-01 13:58:34 +00:00
|
|
|
if field:
|
2009-12-05 16:32:29 +00:00
|
|
|
return xapian.Query(
|
|
|
|
xapian.Query.OP_OR,
|
|
|
|
xapian.Query('Z%s%s%s' % (
|
|
|
|
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), stem(term)
|
|
|
|
)
|
|
|
|
),
|
|
|
|
xapian.Query('%s%s%s' % (
|
|
|
|
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), term
|
|
|
|
)
|
2009-12-01 13:58:34 +00:00
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
2009-12-05 16:32:29 +00:00
|
|
|
return xapian.Query(
|
|
|
|
xapian.Query.OP_OR,
|
2010-02-12 19:40:14 +00:00
|
|
|
xapian.Query('Z%s' % stem(term)),
|
2009-12-05 16:32:29 +00:00
|
|
|
xapian.Query(term)
|
|
|
|
)
|
2009-12-04 21:41:41 +00:00
|
|
|
|
2009-12-03 14:38:49 +00:00
|
|
|
def _phrase_query(self, term_list, field=None):
|
2009-12-01 14:19:30 +00:00
|
|
|
"""
|
|
|
|
Private method that returns a phrase based xapian.Query that searches
|
2009-12-03 14:38:49 +00:00
|
|
|
for terms in `term_list.
|
2009-12-01 14:19:30 +00:00
|
|
|
|
|
|
|
Required arguments:
|
2009-12-03 14:38:49 +00:00
|
|
|
``term_list`` -- The terms to search for
|
2009-12-01 14:19:30 +00:00
|
|
|
``field`` -- The field to search (If `None`, all fields)
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A xapian.Query
|
|
|
|
"""
|
2009-12-01 13:58:34 +00:00
|
|
|
if field:
|
|
|
|
return xapian.Query(
|
|
|
|
xapian.Query.OP_PHRASE, [
|
|
|
|
'%s%s%s' % (
|
2009-12-03 14:38:49 +00:00
|
|
|
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), term
|
|
|
|
) for term in term_list
|
2009-12-01 13:58:34 +00:00
|
|
|
]
|
|
|
|
)
|
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
return xapian.Query(xapian.Query.OP_PHRASE, term_list)
|
2009-12-01 13:58:34 +00:00
|
|
|
|
2009-11-28 23:55:11 +00:00
|
|
|
|
|
|
|
def _marshal_value(value):
|
|
|
|
"""
|
2009-12-01 13:58:34 +00:00
|
|
|
Private utility method that converts Python values to a string for Xapian values.
|
2009-11-28 23:55:11 +00:00
|
|
|
"""
|
|
|
|
if isinstance(value, datetime.datetime):
|
2009-12-03 14:38:49 +00:00
|
|
|
value = _marshal_datetime(value)
|
2009-11-28 23:55:11 +00:00
|
|
|
elif isinstance(value, datetime.date):
|
2009-12-03 14:38:49 +00:00
|
|
|
value = _marshal_date(value)
|
2009-11-28 23:55:11 +00:00
|
|
|
elif isinstance(value, bool):
|
|
|
|
if value:
|
2009-12-03 14:38:49 +00:00
|
|
|
value = u't'
|
2009-11-28 23:55:11 +00:00
|
|
|
else:
|
2009-12-03 14:38:49 +00:00
|
|
|
value = u'f'
|
2009-11-28 23:55:11 +00:00
|
|
|
elif isinstance(value, float):
|
|
|
|
value = xapian.sortable_serialise(value)
|
|
|
|
elif isinstance(value, (int, long)):
|
|
|
|
value = u'%012d' % value
|
|
|
|
else:
|
2009-11-30 21:10:46 +00:00
|
|
|
value = force_unicode(value).lower()
|
2009-11-28 23:55:11 +00:00
|
|
|
return value
|
|
|
|
|
2009-12-03 14:38:49 +00:00
|
|
|
|
|
|
|
def _marshal_term(term):
|
|
|
|
"""
|
|
|
|
Private utility method that converts Python terms to a string for Xapian terms.
|
|
|
|
"""
|
|
|
|
if isinstance(term, datetime.datetime):
|
|
|
|
term = _marshal_datetime(term)
|
|
|
|
elif isinstance(term, datetime.date):
|
|
|
|
term = _marshal_date(term)
|
|
|
|
else:
|
|
|
|
term = force_unicode(term).lower()
|
|
|
|
return term
|
|
|
|
|
|
|
|
|
|
|
|
def _marshal_date(d):
|
|
|
|
return u'%04d%02d%02d000000' % (d.year, d.month, d.day)
|
|
|
|
|
|
|
|
|
|
|
|
def _marshal_datetime(dt):
|
|
|
|
if dt.microsecond:
|
|
|
|
return u'%04d%02d%02d%02d%02d%02d%06d' % (
|
|
|
|
dt.year, dt.month, dt.day, dt.hour,
|
|
|
|
dt.minute, dt.second, dt.microsecond
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
return u'%04d%02d%02d%02d%02d%02d' % (
|
|
|
|
dt.year, dt.month, dt.day, dt.hour,
|
|
|
|
dt.minute, dt.second
|
|
|
|
)
|