Fixed #103 - improves support for exact search.

This commit is contained in:
Jorge C. Leitão 2014-05-20 23:01:49 +02:00
parent ad0d67dc12
commit ec8fb42e68
2 changed files with 57 additions and 24 deletions

View File

@ -162,8 +162,8 @@ class XapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
self.sq.add_filter(~SQ(title__in=["Dune", "Jaws"]))
self.assertEqual(str(self.sq.build_query()),
'Xapian::Query(((Zwhi OR why) AND '
'(<alldocuments> AND_NOT ("XTITLEdune" OR '
'"XTITLEjaws"))))')
'(<alldocuments> AND_NOT (XTITLE^dune$ OR '
'XTITLE^jaws$))))')
def test_build_query_in_filter_multiple_words(self):
self.sq.add_filter(SQ(content='why'))
@ -233,7 +233,7 @@ class XapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
self.sq.add_filter(SQ(title__in=MockModel.objects.values_list('id', flat=True)))
self.assertEqual(str(self.sq.build_query()),
'Xapian::Query(((Zwhi OR why) AND '
'("XTITLE1" OR "XTITLE2" OR "XTITLE3")))')
'(XTITLE^1$ OR XTITLE^2$ OR XTITLE^3$)))')
class MockSearchIndex(indexes.SearchIndex):

View File

@ -261,13 +261,40 @@ class XapianSearchBackend(BaseSearchBackend):
if self.include_spelling is True:
term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
def add_text(termpos, prefix, term, weight):
term_generator.set_termpos(termpos + 1)
term_generator.index_text(term, weight)
term_generator.index_text(term, weight, prefix)
term_generator.increase_termpos()
return term_generator.get_termpos()
for obj in iterable:
document = xapian.Document()
term_generator.set_document(document)
def add_to_document(prefix, term, weight):
document.add_term('%s' % term, weight)
document.add_term(prefix + term, weight)
document.add_term(prefix + '^%s$' % term, weight)
def add_datetime_to_document(termpos, prefix, term, weight):
date, time = term.split()
document.add_posting(date, termpos, weight)
termpos += 1
document.add_posting(time, termpos, weight)
termpos += 1
document.add_posting(prefix + date, termpos, weight)
termpos += 1
document.add_posting(prefix + time, termpos, weight)
termpos += 1
return termpos
data = index.full_prepare(obj)
weights = index.get_field_weights()
termpos = 0
for field in self.schema:
termpos += 1
# not supported fields are ignored.
if field['field_name'] not in list(data.keys()):
continue
@ -288,30 +315,36 @@ class XapianSearchBackend(BaseSearchBackend):
document.add_term(TERM_PREFIXES[field['field_name']] + value, weight)
document.add_value(field['column'], value)
continue
else:
prefix = TERM_PREFIXES['field'] + field['field_name'].upper()
# if not multi_valued, we add a value and construct a one-element list
# if not multi_valued, we add as a document value
# for sorting and facets
if field['multi_valued'] == 'false':
document.add_value(field['column'], _term_to_xapian_value(value, field['type']))
value = [value]
else:
for t in value:
# add the exact match of each value
term = _to_xapian_term(t)
add_to_document(prefix, term, weight)
# index each value with positional information
if ' ' in term:
termpos = add_text(termpos, prefix, term, weight)
continue
for term in value:
term = _to_xapian_term(term)
# from here on term is a string;
# now decide how it is stored:
term = _to_xapian_term(value)
# from here on the term is a string;
# we now decide how it is indexed
# these are
if field['type'] == 'text':
term_generator.index_text(term, weight)
term_generator.index_text(term, weight, prefix)
elif ' ' in term:
for t in term.split():
document.add_term(t, weight)
document.add_term(prefix + t, weight)
if term != "":
document.add_term(term, weight)
document.add_term(prefix + term, weight)
if field['type'] == 'text':
# text is indexed with positional information
termpos = add_text(termpos, prefix, term, weight)
elif field['type'] == 'datetime':
termpos = add_datetime_to_document(termpos, prefix, term, weight)
if term != "":
# all other terms are added without positional information
add_to_document(prefix, term, weight)
# store data without indexing it
document.set_data(pickle.dumps(
@ -1300,10 +1333,9 @@ class XapianSearchQuery(BaseSearchQuery):
assert not exact
constructor = '{prefix}{term}'
# "" is to do a boolean match, but only works on indexed terms
# (constraint on Xapian side)
if exact and field_type == 'text':
constructor = '"{prefix}{term}"'
# ^{term}$ is for boolean match of the term
if exact:
constructor = '{prefix}^{term}$'
# construct the prefix to be used.
prefix = ''
@ -1321,6 +1353,7 @@ class XapianSearchQuery(BaseSearchQuery):
# we construct the query dates in a slightly different way
if field_type == 'datetime':
date, time = term.split()
constructor = '{prefix}{term}'
return xapian.Query(xapian.Query.OP_AND_MAYBE,
constructor.format(prefix=prefix, term=date),
constructor.format(prefix=prefix, term=time)