Compare commits

..

No commits in common. "master" and "0be3ed9f728918457db5aeb97572b6bb2073d041" have entirely different histories.

9 changed files with 30 additions and 186 deletions

View File

@ -10,8 +10,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']
xapian-version: ['1.4.19']
python-version: ['3.6', '3.9', '3.10']
xapian-version: ['1.4.18']
steps:
- name: Set up Python ${{ matrix.python-version }}
@ -41,16 +41,14 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10']
django-version: ['3.2', '4.0', '4.1']
xapian-version: ['1.4.19']
filelock-version: ['3.4.2']
exclude:
# Django dropped python 3.7 support in 4.0
- python-version: '3.7'
django-version: '4.0'
- python-version: '3.7'
django-version: '4.1'
python-version: ['3.6', '3.9']
django-version: ['2.2', '3.1', '3.2']
xapian-version: ['1.4.18']
include:
# Django added python 3.10 support in 3.2.9
- python-version: '3.10'
django-version: '3.2'
xapian-version: '1.4.18'
steps:
- name: Set up Python ${{ matrix.python-version }}
@ -72,7 +70,7 @@ jobs:
- name: Install Django and other Python dependencies
run: |
python -m pip install --upgrade pip
pip install django~=${{ matrix.django-version }} filelock~=${{ matrix.filelock-version }} coveralls xapian*.whl
pip install django~=${{ matrix.django-version }} coveralls xapian*.whl
- name: Checkout django-haystack
uses: actions/checkout@v2

View File

@ -2,16 +2,6 @@
xapian-haystack Changelog
=========================
v3.1.0 (2023-03-19)
-------------------
- Add DJANGO_CT, DJANGO_ID, ID to be used with '__exact' internally.
- Ability to configure ngram min and max lengths.
- Supported Django versions: 3.2, 4.0, 4.1
- Dropped support for Python 3.6.
- Fixed DatabaseLocked errors when running management commands with
multiple workers.
v3.0.1 (2021-11-12)
-------------------

View File

@ -92,10 +92,6 @@ The backend has the following optional settings:
See `here <http://xapian.org/docs/apidoc/html/classXapian_1_1QueryParser.html#ac7dc3b55b6083bd3ff98fc8b2726c8fd>`__ for
more information about the different strategies.
- ``XAPIAN_NGRAM_MIN_LENGTH``, ``XAPIAN_NGRAM_MAX_LENGTH``: options for custom configuration of ngrams (phrases) length.
- ``HAYSTACK_XAPIAN_USE_LOCKFILE``: Use a lockfile to prevent database locking errors when running management commands with multiple workers.
Defaults to `True`.
Testing
-------

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# first argument of the script is Xapian version (e.g. 1.4.19)
# first argument of the script is Xapian version (e.g. 1.4.18)
VERSION=$1

View File

@ -1,3 +1,2 @@
Django>=2.2
Django-Haystack>=3.0
filelock>=3.4

View File

@ -8,7 +8,7 @@ def read(fname):
setup(
name='xapian-haystack',
version='3.1.0',
version='3.0.1',
description='A Xapian backend for Haystack',
long_description=read('README.rst'),
long_description_content_type='text/x-rst',
@ -18,16 +18,15 @@ setup(
'License :: OSI Approved :: GNU General Public License (GPL)',
'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
'Framework :: Django',
'Programming Language :: Python :: 3 :: Only',
],
author='Jorge C. Leitão',
author_email='jorgecarleitao@gmail.com',
url='https://github.com/notanumber/xapian-haystack',
url='http://github.com/notanumber/xapian-haystack',
download_url='http://github.com/notanumber/xapian-haystack/tarball/2.1.0',
license='GPL2',
py_modules=['xapian_backend'],
install_requires=[
'django>=3.2',
'django>=2.2',
'django-haystack>=2.8.0',
'filelock>=3.4',
]
)

View File

@ -1,103 +0,0 @@
import sys
from io import StringIO
from unittest import TestCase
from django.core.management import call_command
from ..models import BlogEntry
from ..search_indexes import BlogSearchIndex
from .test_backend import BackendFeaturesTestCase, HaystackBackendTestCase
class ManagementCommandTestCase(HaystackBackendTestCase, TestCase):
NUM_BLOG_ENTRIES = 20
def get_index(self):
return BlogSearchIndex()
def setUp(self):
super().setUp()
self.sample_objs = []
for i in range(1, self.NUM_BLOG_ENTRIES + 1):
entry = BackendFeaturesTestCase.get_entry(i)
self.sample_objs.append(entry)
entry.save()
def verify_indexed_document_count(self, expected):
count = self.backend.document_count()
self.assertEqual(count, expected)
def verify_indexed_documents(self):
"""Confirm that the documents in the search index match the database"""
count = self.backend.document_count()
self.assertEqual(count, self.NUM_BLOG_ENTRIES)
pks = set(BlogEntry.objects.values_list("pk", flat=True))
doc_ids = set()
database = self.backend._database()
for pk in pks:
xapian_doc = database.get_document(pk)
doc_id = xapian_doc.get_docid()
doc_ids.add(doc_id)
database.close()
self.assertSetEqual(pks, doc_ids)
def test_clear(self):
self.backend.update(self.index, BlogEntry.objects.all())
self.verify_indexed_documents()
call_command("clear_index", interactive=False, verbosity=0)
self.verify_indexed_document_count(0)
def test_update(self):
self.verify_indexed_document_count(0)
call_command("update_index", verbosity=0)
self.verify_indexed_documents()
def test_rebuild(self):
self.verify_indexed_document_count(0)
call_command("rebuild_index", interactive=False, verbosity=0)
self.verify_indexed_documents()
def test_remove(self):
self.verify_indexed_document_count(0)
call_command("update_index", verbosity=0)
self.verify_indexed_documents()
# Remove three instances.
three_pks = BlogEntry.objects.all()[:3].values_list("pk", flat=True)
BlogEntry.objects.filter(pk__in=three_pks).delete()
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES)
# Plain ``update_index`` doesn't fix it.
call_command("update_index", verbosity=0)
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES)
# … but remove does:
call_command("update_index", remove=True, verbosity=0)
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES - 3)
def test_multiprocessing(self):
self.verify_indexed_document_count(0)
old_stderr = sys.stderr
sys.stderr = StringIO()
call_command(
"update_index",
verbosity=2,
workers=10,
batchsize=2,
)
err = sys.stderr.getvalue()
sys.stderr = old_stderr
print(err)
self.assertNotIn("xapian.DatabaseLockError", err)
self.verify_indexed_documents()

View File

@ -236,13 +236,6 @@ class XapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
self.sq.add_filter(SQ(django_ct='time'))
self.assertExpectedQuery(self.sq.build_query(), 'CONTENTTYPEtime')
def test_unphrased_id(self):
'An internal ID should NOT be phrased so one can exclude IDs.'
self.sq.add_filter(SQ(id__in=['testing123', 'testing456']))
expected = '(Qtesting123 OR Qtesting456)'
self.assertExpectedQuery(
query=self.sq.build_query(), string_or_list=expected)
class SearchQueryTestCase(HaystackBackendTestCase, TestCase):
"""

View File

@ -1,8 +1,5 @@
# This file was modified by Matthias Portzel on Dec 22nd, 2021 and Aug 5th, 2022
import datetime
import pickle
from pathlib import Path
import os
import re
import shutil
@ -11,8 +8,6 @@ import sys
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from filelock import FileLock
from haystack import connections
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
from haystack.constants import ID, DJANGO_ID, DJANGO_CT, DEFAULT_OPERATOR
@ -21,8 +16,8 @@ from haystack.inputs import AutoQuery
from haystack.models import SearchResult
from haystack.utils import get_identifier, get_model_ct
NGRAM_MIN_LENGTH = getattr(settings, 'XAPIAN_NGRAM_MIN_LENGTH', 2)
NGRAM_MAX_LENGTH = getattr(settings, 'XAPIAN_NGRAM_MAX_LENGTH', 15)
NGRAM_MIN_LENGTH = 2
NGRAM_MAX_LENGTH = 15
LONG_TERM = re.compile(b'[^\s]{239,}')
LONG_TERM_METHOD = getattr(settings, 'XAPIAN_LONG_TERM_METHOD', 'truncate')
@ -48,8 +43,6 @@ TERM_PREFIXES = {
'field': 'X'
}
_EXACT_SEARCHFIELDS = frozenset((DJANGO_CT, DJANGO_ID, ID))
MEMORY_DB_NAME = ':memory:'
DEFAULT_XAPIAN_FLAGS = (
@ -84,24 +77,6 @@ INTEGER_FORMAT = '%012d'
# texts with positional information
TERMPOS_DISTANCE = 100
def filelocked(func):
"""Decorator to wrap a XapianSearchBackend method in a filelock."""
def wrapper(self, *args, **kwargs):
"""Run the function inside a lock."""
if self.path == MEMORY_DB_NAME or not self.use_lockfile:
func(self, *args, **kwargs)
else:
lockfile = Path(self.filelock.lock_file)
lockfile.parent.mkdir(parents=True, exist_ok=True)
lockfile.touch()
with self.filelock:
func(self, *args, **kwargs)
return wrapper
class InvalidIndexError(HaystackError):
"""Raised when an index can not be opened."""
pass
@ -138,7 +113,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
begin = -sys.maxsize - 1
elif field_type == 'float':
begin = float('-inf')
elif field_type in ['date', 'datetime']:
elif field_type == 'date' or field_type == 'datetime':
begin = '00010101000000'
elif end == '*':
if field_type == 'text':
@ -147,7 +122,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
end = sys.maxsize
elif field_type == 'float':
end = float('inf')
elif field_type in ['date', 'datetime']:
elif field_type == 'date' or field_type == 'datetime':
end = '99990101000000'
if field_type == 'float':
@ -177,7 +152,13 @@ class XapianSearchBackend(BaseSearchBackend):
`SearchBackend` defines the Xapian search backend for use with the Haystack
API for Django search.
It uses the Xapian Python bindings to interface with Xapian.
It uses the Xapian Python bindings to interface with Xapian, and as
such is subject to this bug: <http://trac.xapian.org/ticket/364> when
Django is running with mod_python or mod_wsgi under Apache.
Until this issue has been fixed by Xapian, it is neccessary to set
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
`PythonInterpreter main_interpreter` when using mod_python.
In order to use this backend, `PATH` must be included in the
`connection_options`. This should point to a location where you would your
@ -197,9 +178,6 @@ class XapianSearchBackend(BaseSearchBackend):
Also sets the stemming language to be used to `language`.
"""
self.use_lockfile = bool(
getattr(settings, 'HAYSTACK_XAPIAN_USE_LOCKFILE', True)
)
super().__init__(connection_alias, **connection_options)
if not 'PATH' in connection_options:
@ -214,10 +192,6 @@ class XapianSearchBackend(BaseSearchBackend):
except FileExistsError:
pass
if self.use_lockfile:
lockfile = Path(self.path) / "lockfile"
self.filelock = FileLock(lockfile)
self.flags = connection_options.get('FLAGS', DEFAULT_XAPIAN_FLAGS)
self.language = getattr(settings, 'HAYSTACK_XAPIAN_LANGUAGE', 'english')
@ -261,7 +235,6 @@ class XapianSearchBackend(BaseSearchBackend):
self._update_cache()
return self._columns
@filelocked
def update(self, index, iterable, commit=True):
"""
Updates the `index` with any objects in `iterable` by adding/updating
@ -513,7 +486,6 @@ class XapianSearchBackend(BaseSearchBackend):
finally:
database.close()
@filelocked
def remove(self, obj, commit=True):
"""
Remove indexes for `obj` from the database.
@ -1445,7 +1417,7 @@ class XapianSearchQuery(BaseSearchQuery):
Assumes term is not a list.
"""
if field_type == 'text' and field_name not in _EXACT_SEARCHFIELDS:
if field_type == 'text' and field_name not in (DJANGO_CT,):
term = '^ %s $' % term
query = self._phrase_query(term.split(), field_name, field_type)
else:
@ -1620,7 +1592,7 @@ def _term_to_xapian_value(term, field_type):
value = INTEGER_FORMAT % term
elif field_type == 'float':
value = xapian.sortable_serialise(term)
elif field_type in ['date', 'datetime']:
elif field_type == 'date' or field_type == 'datetime':
if field_type == 'date':
# http://stackoverflow.com/a/1937636/931303 and comments
term = datetime.datetime.combine(term, datetime.time())
@ -1683,7 +1655,7 @@ def _from_xapian_value(value, field_type):
return int(value)
elif field_type == 'float':
return xapian.sortable_unserialise(value)
elif field_type in ['date', 'datetime']:
elif field_type == 'date' or field_type == 'datetime':
datetime_value = datetime.datetime.strptime(value, DATETIME_FORMAT)
if field_type == 'datetime':
return datetime_value