Split marshalling into two methods: marshal_term and marshal_value as they should be done differently

This commit is contained in:
David Sauve 2009-12-03 09:38:49 -05:00
parent c2e33da295
commit 4bff8c9376
7 changed files with 108 additions and 88 deletions

2
tests/xapian_settings.py Normal file → Executable file
View File

@ -1,4 +1,4 @@
# Copyright (C) 2009 David Sauve, Trapeze
# Copyright (C) 2009 David Sauve, Trapeze. All rights reserved.
import os
from settings import *

View File

@ -1 +1 @@
# Copyright (C) 2009 David Sauve, Trapeze
# Copyright (C) 2009 David Sauve, Trapeze. All rights reserved.

View File

@ -1 +1 @@
# Copyright (C) 2009 David Sauve, Trapeze
# Copyright (C) 2009 David Sauve, Trapeze. All rights reserved.

View File

@ -1,4 +1,4 @@
# Copyright (C) 2009 David Sauve, Trapeze
# Copyright (C) 2009 David Sauve, Trapeze. All rights reserved.
import warnings

View File

@ -1,4 +1,4 @@
# Copyright (C) 2009 David Sauve, Trapeze
# Copyright (C) 2009 David Sauve, Trapeze. All rights reserved.
import cPickle as pickle
import datetime
@ -114,14 +114,21 @@ class XapianSearchBackendTestCase(TestCase):
return document_list
def silly_test(self):
self.sb.update(self.msi, self.sample_objs)
self.assertEqual(len(self.xapian_search('indexed')), 3)
self.assertEqual(len(self.xapian_search('Indexed')), 3)
def test_update(self):
self.sb.update(self.msi, self.sample_objs)
self.assertEqual(len(self.xapian_search('')), 3)
self.assertEqual([dict(doc) for doc in self.xapian_search('')], [
{'flag': u'true', 'name': u'david1', 'text': u'indexed!\n1', 'sites': u"['1', '2', '3']", 'pub_date': u'20090224000000', 'value': u'000000000005', 'id': u'tests.xapianmockmodel.1', 'slug': u'http://example.com/1', 'popularity': '\xca\x84', 'django_id': u'1', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u'false', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u'true', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'}
{'flag': u't', 'name': u'david1', 'text': u'indexed!\n1', 'sites': u"['1', '2', '3']", 'pub_date': u'20090224000000', 'value': u'000000000005', 'id': u'tests.xapianmockmodel.1', 'slug': u'http://example.com/1', 'popularity': '\xca\x84', 'django_id': u'1', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u'f', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u't', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'}
])
def test_duplicate_update(self):
@ -137,8 +144,8 @@ class XapianSearchBackendTestCase(TestCase):
self.sb.remove(self.sample_objs[0])
self.assertEqual(len(self.xapian_search('')), 2)
self.assertEqual([dict(doc) for doc in self.xapian_search('')], [
{'flag': u'false', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u'true', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'}
{'flag': u'f', 'name': u'david2', 'text': u'indexed!\n2', 'sites': u"['2', '4', '6']", 'pub_date': u'20090223000000', 'value': u'000000000010', 'id': u'tests.xapianmockmodel.2', 'slug': u'http://example.com/2', 'popularity': '\xb4p', 'django_id': u'2', 'django_ct': u'tests.xapianmockmodel'},
{'flag': u't', 'name': u'david3', 'text': u'indexed!\n3', 'sites': u"['3', '6', '9']", 'pub_date': u'20090222000000', 'value': u'000000000015', 'id': u'tests.xapianmockmodel.3', 'slug': u'http://example.com/3', 'popularity': '\xcb\x98', 'django_id': u'3', 'django_ct': u'tests.xapianmockmodel'}
])
def test_clear(self):
@ -174,19 +181,6 @@ class XapianSearchBackendTestCase(TestCase):
self.assertEqual(self.sb.search(xapian.Query(''))['hits'], 3)
self.assertEqual([result.pk for result in self.sb.search(xapian.Query(''))['results']], [1, 2, 3])
# # Ranges
# self.assertEqual([result.pk for result in self.sb.search('index name:david2..david3')['results']], [2, 3])
# self.assertEqual([result.pk for result in self.sb.search('index name:..david2')['results']], [1, 2])
# self.assertEqual([result.pk for result in self.sb.search('index name:david2..*')['results']], [2, 3])
# self.assertEqual([result.pk for result in self.sb.search('index pub_date:20090222000000..20090223000000')['results']], [2, 3])
# self.assertEqual([result.pk for result in self.sb.search('index pub_date:..20090223000000')['results']], [2, 3])
# self.assertEqual([result.pk for result in self.sb.search('index pub_date:20090223000000..*')['results']], [1, 2])
# self.assertEqual([result.pk for result in self.sb.search('index value:10..15')['results']], [2, 3])
# self.assertEqual([result.pk for result in self.sb.search('index value:..10')['results']], [1, 2])
# self.assertEqual([result.pk for result in self.sb.search('index value:10..*')['results']], [2, 3])
# self.assertEqual([result.pk for result in self.sb.search('index popularity:..100.0')['results']], [2])
# self.assertEqual([result.pk for result in self.sb.search('index popularity:100.0..*')['results']], [1, 3])
# def test_field_facets(self):
# self.sb.update(self.msi, self.sample_objs)
# self.assertEqual(len(self.xapian_search('')), 3)
@ -264,17 +258,7 @@ class XapianSearchBackendTestCase(TestCase):
#
# self.assertEqual(self.sb.search('indx')['hits'], 0)
# self.assertEqual(self.sb.search('indx', spelling_query='indexy')['spelling_suggestion'], 'indexed')
# def test_stemming(self):
# self.sb.update(self.msi, self.sample_objs)
# self.assertEqual(len(self.xapian_search('')), 3)
#
# results = self.sb.search('index')
# self.assertEqual(results['hits'], 3)
#
# results = self.sb.search('indexing')
# self.assertEqual(results['hits'], 3)
# def test_more_like_this(self):
# self.sb.update(self.msi, self.sample_objs)
# self.assertEqual(len(self.xapian_search('')), 3)

View File

@ -1,4 +1,4 @@
# Copyright (C) 2009 David Sauve, Trapeze
# Copyright (C) 2009 David Sauve, Trapeze. All rights reserved.
import datetime
import os
@ -51,6 +51,10 @@ class XapianSearchQueryTestCase(TestCase):
self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28)))
self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(20090508112800)')
def test_build_query_float(self):
self.sq.add_filter(SQ(content=25.52))
self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(25.52)')
def test_build_query_multiple_words_and(self):
self.sq.add_filter(SQ(content='hello'))
self.sq.add_filter(SQ(content='world'))
@ -136,6 +140,10 @@ class XapianSearchQueryTestCase(TestCase):
# self.sq.add_filter(SQ(title__startswith='haystack'))
# self.assertEqual(self.sq.build_query().get_description(), 'Xapian::Query(why AND XTITLEhaystack*)')
# def test_stem_single_word(self):
# self.sq.add_filter(SQ(content='testing'))
# self.assertEqual(self.sq.build_query().get_description(), 'Xapian.Query(Ztest)')
#
def test_clean(self):
self.assertEqual(self.sq.clean('hello world'), 'hello world')
self.assertEqual(self.sq.clean('hello AND world'), 'hello AND world')

View File

@ -682,6 +682,8 @@ class SearchQuery(BaseSearchQuery):
self.backend = backend or SearchBackend()
def build_query(self):
# DS_TODO: How does stemming work with this new refactor?
if not self.query_filter:
query = xapian.Query('')
else:
@ -729,17 +731,19 @@ class SearchQuery(BaseSearchQuery):
)
)
else:
expression, value = child
expression, term = child
field, filter_type = search_node.split_expression(expression)
if not isinstance(value, (list, tuple)):
value = _marshal_value(value)
if not isinstance(term, (list, tuple)):
term = _marshal_term(term)
else:
term = [_marshal_term(t) for t in term]
if field == 'content':
query_list.append(self._content_field(value, is_not))
query_list.append(self._content_field(term, is_not))
else:
if filter_type == 'exact':
query_list.append(self._filter_exact(value, field, is_not))
query_list.append(self._filter_exact(term, field, is_not))
elif filter_type == 'gt':
pass
elif filter_type == 'gte':
@ -751,71 +755,71 @@ class SearchQuery(BaseSearchQuery):
elif filter_type == 'startswith':
pass
elif filter_type == 'in':
query_list.append(self._filter_in(value, field, is_not))
query_list.append(self._filter_in(term, field, is_not))
if search_node.connector == 'OR':
return xapian.Query(xapian.Query.OP_OR, query_list)
else:
return xapian.Query(xapian.Query.OP_AND, query_list)
def _content_field(self, value, is_not):
def _content_field(self, term, is_not):
"""
Private method that returns a xapian.Query that searches for `value`
in all fields.
Required arguments:
``value`` -- The value to search for
``term`` -- The term to search for
``is_not`` -- Invert the search results
Returns:
A xapian.Query
"""
if ' ' in value:
if ' ' in term:
if is_not:
return xapian.Query(
xapian.Query.OP_AND_NOT, self._all_query(), self._phrase_query(value.split())
xapian.Query.OP_AND_NOT, self._all_query(), self._phrase_query(term.split())
)
else:
return self._phrase_query(value.split())
return self._phrase_query(term.split())
else:
if is_not:
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(value))
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(term))
else:
return self._term_query(value)
return self._term_query(term)
def _filter_exact(self, value, field, is_not):
def _filter_exact(self, term, field, is_not):
"""
Private method that returns a xapian.Query that searches for `value`
Private method that returns a xapian.Query that searches for `term`
in a specified `field`.
Required arguments:
``value`` -- The value to search for
``term`` -- The term to search for
``field`` -- The field to search
``is_not`` -- Invert the search results
Returns:
A xapian.Query
"""
if ' ' in value:
if ' ' in term:
if is_not:
return xapian.Query(
xapian.Query.OP_AND_NOT, self._all_query(), self._phrase_query(value.split(), field)
xapian.Query.OP_AND_NOT, self._all_query(), self._phrase_query(term.split(), field)
)
else:
return self._phrase_query(value.split(), field)
return self._phrase_query(term.split(), field)
else:
if is_not:
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(value, field))
return xapian.Query(xapian.Query.OP_AND_NOT, self._all_query(), self._term_query(term, field))
else:
return self._term_query(value, field)
return self._term_query(term, field)
def _filter_in(self, value_list, field, is_not):
def _filter_in(self, term_list, field, is_not):
"""
Private method that returns a xapian.Query that searches for any value
Private method that returns a xapian.Query that searches for any term
of `value_list` in a specified `field`.
Required arguments:
``value_list`` -- The values to search for
``term_list`` -- The terms to search for
``field`` -- The field to search
``is_not`` -- Invert the search results
@ -823,18 +827,17 @@ class SearchQuery(BaseSearchQuery):
A xapian.Query
"""
query_list = []
for value in value_list:
value = _marshal_value(value)
if ' ' in value:
for term in term_list:
if ' ' in term:
query_list.append(
xapian.Query(
xapian.Query.OP_OR, self._phrase_query(value.split(), field)
xapian.Query.OP_OR, self._phrase_query(term.split(), field)
)
)
else:
query_list.append(
xapian.Query(
xapian.Query.OP_OR, self._term_query(value, field)
xapian.Query.OP_OR, self._term_query(term, field)
)
)
if is_not:
@ -851,13 +854,13 @@ class SearchQuery(BaseSearchQuery):
"""
return xapian.Query('')
def _term_query(self, value, field=None):
def _term_query(self, term, field=None):
"""
Private method that returns a term based xapian.Query that searches
for term `value`.
for `term`.
Required arguments:
``value`` -- The value to search for
``term`` -- The term to search for
``field`` -- The field to search (If `None`, all fields)
Returns:
@ -865,19 +868,19 @@ class SearchQuery(BaseSearchQuery):
"""
if field:
return xapian.Query('%s%s%s' % (
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), _marshal_value(value)
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), term
)
)
else:
return xapian.Query(value)
return xapian.Query(term)
def _phrase_query(self, value_list, field=None):
def _phrase_query(self, term_list, field=None):
"""
Private method that returns a phrase based xapian.Query that searches
for terms in `value_list.
for terms in `term_list.
Required arguments:
``value_list`` -- The values to search for
``term_list`` -- The terms to search for
``field`` -- The field to search (If `None`, all fields)
Returns:
@ -887,12 +890,12 @@ class SearchQuery(BaseSearchQuery):
return xapian.Query(
xapian.Query.OP_PHRASE, [
'%s%s%s' % (
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), _marshal_value(value)
) for value in value_list
DOCUMENT_CUSTOM_TERM_PREFIX, field.upper(), term
) for term in term_list
]
)
else:
return xapian.Query(xapian.Query.OP_PHRASE, value_list)
return xapian.Query(xapian.Query.OP_PHRASE, term_list)
def _marshal_value(value):
@ -900,23 +903,14 @@ def _marshal_value(value):
Private utility method that converts Python values to a string for Xapian values.
"""
if isinstance(value, datetime.datetime):
if value.microsecond:
value = u'%04d%02d%02d%02d%02d%02d%06d' % (
value.year, value.month, value.day, value.hour,
value.minute, value.second, value.microsecond
)
else:
value = u'%04d%02d%02d%02d%02d%02d' % (
value.year, value.month, value.day, value.hour,
value.minute, value.second
)
value = _marshal_datetime(value)
elif isinstance(value, datetime.date):
value = u'%04d%02d%02d000000' % (value.year, value.month, value.day)
value = _marshal_date(value)
elif isinstance(value, bool):
if value:
value = u'true'
value = u't'
else:
value = u'false'
value = u'f'
elif isinstance(value, float):
value = xapian.sortable_serialise(value)
elif isinstance(value, (int, long)):
@ -925,3 +919,37 @@ def _marshal_value(value):
value = force_unicode(value).lower()
return value
def _marshal_term(term):
"""
Private utility method that converts Python terms to a string for Xapian terms.
"""
if isinstance(term, datetime.datetime):
term = _marshal_datetime(term)
elif isinstance(term, datetime.date):
term = _marshal_date(term)
elif isinstance(term, bool):
if term:
term = u'true'
else:
term = u'false'
else:
term = force_unicode(term).lower()
return term
def _marshal_date(d):
return u'%04d%02d%02d000000' % (d.year, d.month, d.day)
def _marshal_datetime(dt):
if dt.microsecond:
return u'%04d%02d%02d%02d%02d%02d%06d' % (
dt.year, dt.month, dt.day, dt.hour,
dt.minute, dt.second, dt.microsecond
)
else:
return u'%04d%02d%02d%02d%02d%02d' % (
dt.year, dt.month, dt.day, dt.hour,
dt.minute, dt.second
)