Rewrote __starts_with filter to be much more effecient by using a query_parser for the query fragment to expand wildcards

This commit is contained in:
David Sauve 2010-02-09 14:04:53 -05:00
parent 576f13ea91
commit e794f53c99
2 changed files with 102 additions and 101 deletions

View File

@ -383,6 +383,7 @@ class XapianSearchBackendTestCase(TestCase):
self.backend.update(self.index, self.sample_objs)
self.assertEqual(self.backend.parse_query('indexed').get_description(), 'Xapian::Query((indexed:(pos=1) OR Zindex:(pos=1)))')
self.assertEqual(self.backend.parse_query('name:david').get_description(), 'Xapian::Query((XNAMEdavid1:(pos=1) OR XNAMEdavid2:(pos=1) OR XNAMEdavid3:(pos=1) OR ZXNAMEdavid:(pos=1)))')
self.assertEqual(self.backend.parse_query('name:da*').get_description(), 'Xapian::Query((XNAMEdavid1:(pos=1) OR XNAMEdavid2:(pos=1) OR XNAMEdavid3:(pos=1)))')
self.assertEqual(self.backend.parse_query('name:david1..david2').get_description(), 'Xapian::Query(VALUE_RANGE 0 david1 david2)')
self.assertEqual(self.backend.parse_query('value:0..10').get_description(), 'Xapian::Query(VALUE_RANGE 6 000000000000 000000000010)')
self.assertEqual(self.backend.parse_query('value:..10').get_description(), 'Xapian::Query(VALUE_RANGE 6 -02147483648 000000000010)')
@ -390,96 +391,96 @@ class XapianSearchBackendTestCase(TestCase):
self.assertEqual(self.backend.parse_query('popularity:25.5..100.0').get_description(), 'Xapian::Query(VALUE_RANGE 4 \xb2` \xba@)')
# class LiveXapianMockSearchIndex(indexes.SearchIndex):
# text = indexes.CharField(document=True, use_template=True)
# name = indexes.CharField(model_attr='author')
# pub_date = indexes.DateField(model_attr='pub_date')
# created = indexes.DateField()
# title = indexes.CharField()
#
#
# class LiveXapianSearchQueryTestCase(TestCase):
# """
# SearchQuery specific tests
# """
# fixtures = ['initial_data.json']
#
# def setUp(self):
# super(LiveXapianSearchQueryTestCase, self).setUp()
#
# site = SearchSite()
# backend = SearchBackend(site=site)
# index = LiveXapianMockSearchIndex(MockModel, backend=backend)
# site.register(MockModel, LiveXapianMockSearchIndex)
# backend.update(index, MockModel.objects.all())
#
# self.sq = SearchQuery(backend=backend)
#
# def test_get_spelling(self):
# self.sq.add_filter(SQ(content='indxd'))
# self.assertEqual(self.sq.get_spelling_suggestion(), u'indexed')
# self.assertEqual(self.sq.get_spelling_suggestion('indxd'), u'indexed')
#
# def test_startswith(self):
# self.sq.add_filter(SQ(name__startswith='da*'))
# self.assertEqual([result.pk for result in self.sq.get_results()], [1, 2, 3])
#
# self.sq = SearchQuery(backend=SearchBackend())
# self.sq.add_filter(SQ(name__startswith='daniel1'))
# self.assertEqual([result.pk for result in self.sq.get_results()], [1])
#
# def test_build_query_gt(self):
# self.sq.add_filter(SQ(name__gt='m'))
# self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT VALUE_RANGE 3 a m))')
#
# def test_build_query_gte(self):
# self.sq.add_filter(SQ(name__gte='m'))
# self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz)')
#
# def test_build_query_lt(self):
# self.sq.add_filter(SQ(name__lt='m'))
# self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz))')
#
# def test_build_query_lte(self):
# self.sq.add_filter(SQ(name__lte='m'))
# self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(VALUE_RANGE 3 a m)')
#
# def test_build_query_multiple_filter_types(self):
# self.sq.add_filter(SQ(content='why'))
# self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0)))
# self.sq.add_filter(SQ(name__gt='david'))
# self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0)))
# self.sq.add_filter(SQ(title__gte='B'))
# self.sq.add_filter(SQ(id__in=[1, 2, 3]))
# self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND VALUE_RANGE 2 00010101000000 20090210015900 AND (<alldocuments> AND_NOT VALUE_RANGE 3 a david) AND (<alldocuments> AND_NOT VALUE_RANGE 4 20090212121300 99990101000000) AND VALUE_RANGE 1 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (ZXID1 OR XID1 OR ZXID2 OR XID2 OR ZXID3 OR XID3)))')
#
# def test_log_query(self):
# backends.reset_search_queries()
# self.assertEqual(len(backends.queries), 0)
#
# # Stow.
# old_debug = settings.DEBUG
# settings.DEBUG = False
#
# len(self.sq.get_results())
# self.assertEqual(len(backends.queries), 0)
#
# settings.DEBUG = True
# # Redefine it to clear out the cached results.
# self.sq = SearchQuery(backend=SearchBackend())
# self.sq.add_filter(SQ(name='bar'))
# len(self.sq.get_results())
# self.assertEqual(len(backends.queries), 1)
# self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
#
# # And again, for good measure.
# self.sq = SearchQuery(backend=SearchBackend())
# self.sq.add_filter(SQ(name='bar'))
# self.sq.add_filter(SQ(text='moof'))
# len(self.sq.get_results())
# self.assertEqual(len(backends.queries), 2)
# self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
# self.assertEqual(backends.queries[1]['query_string'].get_description(), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
#
# # Restore.
# settings.DEBUG = old_debug
class LiveXapianMockSearchIndex(indexes.SearchIndex):
text = indexes.CharField(document=True, use_template=True)
name = indexes.CharField(model_attr='author')
pub_date = indexes.DateField(model_attr='pub_date')
created = indexes.DateField()
title = indexes.CharField()
class LiveXapianSearchQueryTestCase(TestCase):
"""
SearchQuery specific tests
"""
fixtures = ['initial_data.json']
def setUp(self):
super(LiveXapianSearchQueryTestCase, self).setUp()
site = SearchSite()
backend = SearchBackend(site=site)
index = LiveXapianMockSearchIndex(MockModel, backend=backend)
site.register(MockModel, LiveXapianMockSearchIndex)
backend.update(index, MockModel.objects.all())
self.sq = SearchQuery(backend=backend)
def test_get_spelling(self):
self.sq.add_filter(SQ(content='indxd'))
self.assertEqual(self.sq.get_spelling_suggestion(), u'indexed')
self.assertEqual(self.sq.get_spelling_suggestion('indxd'), u'indexed')
def test_startswith_wildcard(self):
self.sq.add_filter(SQ(name__startswith='da*'))
self.assertEqual([result.pk for result in self.sq.get_results()], [1, 2, 3])
def test_startswith_fullword(self):
self.sq.add_filter(SQ(name__startswith='daniel1'))
self.assertEqual([result.pk for result in self.sq.get_results()], [1])
def test_build_query_gt(self):
self.sq.add_filter(SQ(name__gt='m'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT VALUE_RANGE 3 a m))')
def test_build_query_gte(self):
self.sq.add_filter(SQ(name__gte='m'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz)')
def test_build_query_lt(self):
self.sq.add_filter(SQ(name__lt='m'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query((<alldocuments> AND_NOT VALUE_RANGE 3 m zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz))')
def test_build_query_lte(self):
self.sq.add_filter(SQ(name__lte='m'))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(VALUE_RANGE 3 a m)')
def test_build_query_multiple_filter_types(self):
self.sq.add_filter(SQ(content='why'))
self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0)))
self.sq.add_filter(SQ(name__gt='david'))
self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0)))
self.sq.add_filter(SQ(title__gte='B'))
self.sq.add_filter(SQ(id__in=[1, 2, 3]))
self.assertEqual(self.sq.build_query().get_description(), u'Xapian::Query(((Zwhy OR why) AND VALUE_RANGE 2 00010101000000 20090210015900 AND (<alldocuments> AND_NOT VALUE_RANGE 3 a david) AND (<alldocuments> AND_NOT VALUE_RANGE 4 20090212121300 99990101000000) AND VALUE_RANGE 1 b zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz AND (ZXID1 OR XID1 OR ZXID2 OR XID2 OR ZXID3 OR XID3)))')
def test_log_query(self):
backends.reset_search_queries()
self.assertEqual(len(backends.queries), 0)
# Stow.
old_debug = settings.DEBUG
settings.DEBUG = False
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 0)
settings.DEBUG = True
# Redefine it to clear out the cached results.
self.sq = SearchQuery(backend=SearchBackend())
self.sq.add_filter(SQ(name='bar'))
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 1)
self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
# And again, for good measure.
self.sq = SearchQuery(backend=SearchBackend())
self.sq.add_filter(SQ(name='bar'))
self.sq.add_filter(SQ(text='moof'))
len(self.sq.get_results())
self.assertEqual(len(backends.queries), 2)
self.assertEqual(backends.queries[0]['query_string'].get_description(), u'Xapian::Query((ZXNAMEbar OR XNAMEbar))')
self.assertEqual(backends.queries[1]['query_string'].get_description(), u'Xapian::Query(((ZXNAMEbar OR XNAMEbar) AND (ZXTEXTmoof OR XTEXTmoof)))')
# Restore.
settings.DEBUG = old_debug

View File

@ -527,7 +527,7 @@ class SearchBackend(BaseSearchBackend):
qp.set_stemmer(xapian.Stem(self.language))
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
qp.add_boolean_prefix('django_ct', DOCUMENT_CT_TERM_PREFIX)
for field_dict in self.schema:
qp.add_prefix(
field_dict['field_name'],
@ -1018,13 +1018,13 @@ class SearchQuery(BaseSearchQuery):
Returns:
A xapian.Query
"""
sb = SearchBackend()
term_list = set()
for t in sb._database().allterms():
if t.term.startswith(term.rstrip('*')):
term_list.add(t.term)
return self._filter_in(list(term_list), field, is_not)
if is_not:
return xapian.Query(
xapian.Query.OP_AND_NOT,
self._all_query(),
self.backend.parse_query('%s:%s' % (field, term)),
)
return self.backend.parse_query('%s:%s' % (field, term))
def _filter_gt(self, term, field, is_not):
return self._filter_lte(term, field, is_not=(is_not != True))