Compare commits

...

15 Commits

Author SHA1 Message Date
Matthias Portzel d9c27b07f6 Merge remote-tracking branch 'upstream/master' 2023-11-30 12:35:07 -05:00
Claude Paroz 30c727e653 Prepare 3.1.0 release (confirmed Django 4.1 support) 2023-03-19 12:54:34 +01:00
László Károlyi 90593c07b7 Allow more internal data exact searches
Bump version

Fix syntax error

Adjust CHANGELOG.rst

Add test for __exact on ID
2023-03-19 12:28:10 +01:00
Matthias Portzel a88357b213 Add notice of modification for GPLv2 compliance 2022-08-05 20:46:52 -04:00
Matthias Portzel adb15372d8 Merge remote-tracking branch 'upstream/master' 2022-08-05 20:22:54 -04:00
Patryk Szczepański 251e924122
Add ability to configure NGRAM's 2022-07-08 21:52:10 +02:00
Patryk Szczepański 9a792e10e3 Shorten ifs using membership logic 2022-07-08 08:47:19 +02:00
Matthias Portzel ffde9b14ed Merge branch 'master' of https://github.com/notanumber/xapian-haystack 2022-04-04 10:30:09 -04:00
Alejandro R. Sedeño aa3425e0e9
Bump Xapian version to 1.4.19 2022-03-19 11:41:39 +01:00
AJ Slater 870b48dfcd
Management Command Database Locking 2022-02-10 20:44:21 +01:00
Matthias Portzel 4bcb3efa70 Merge branch 'master' of https://github.com/notanumber/xapian-haystack 2022-02-08 12:14:52 -05:00
Alejandro R. Sedeño 3fc4cfe46d GitHub Actions: rework test matrix
Drop Django 3.1 (EoL)
Add Django 4.0
Add Python 3.8 (Minimum version for Django 4.0)
Add Python 3.10 to the general matrix, not just for Djagno 3.2

Exclude Django 2.2 from Python 3.10 testing.
Exclude Django 4.0 from Python 3.7 testing.

Django 2.2 will EoL in a little over a month, at which point we can
trim the matrix down again by dropping it and Python 3.9 if we want
to.
2022-02-08 08:17:06 +01:00
AJ Slater 515651f893
tests for non-multiprocessing management commands 2022-02-07 21:01:01 +01:00
Claude Paroz 65adf90601 Remove obsolete comment backend docstring.
xapian issue #364 is solved in xapian 1.3+.
2022-02-05 10:56:25 +01:00
Claude Paroz 77d1fc8ef6 Dropped support for Python 3.6 2022-02-05 10:53:22 +01:00
9 changed files with 186 additions and 30 deletions

View File

@ -10,8 +10,8 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.6', '3.9', '3.10']
xapian-version: ['1.4.18']
python-version: ['3.7', '3.8', '3.9', '3.10']
xapian-version: ['1.4.19']
steps:
- name: Set up Python ${{ matrix.python-version }}
@ -41,14 +41,16 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.6', '3.9']
django-version: ['2.2', '3.1', '3.2']
xapian-version: ['1.4.18']
include:
# Django added python 3.10 support in 3.2.9
- python-version: '3.10'
django-version: '3.2'
xapian-version: '1.4.18'
python-version: ['3.7', '3.8', '3.9', '3.10']
django-version: ['3.2', '4.0', '4.1']
xapian-version: ['1.4.19']
filelock-version: ['3.4.2']
exclude:
# Django dropped python 3.7 support in 4.0
- python-version: '3.7'
django-version: '4.0'
- python-version: '3.7'
django-version: '4.1'
steps:
- name: Set up Python ${{ matrix.python-version }}
@ -70,7 +72,7 @@ jobs:
- name: Install Django and other Python dependencies
run: |
python -m pip install --upgrade pip
pip install django~=${{ matrix.django-version }} coveralls xapian*.whl
pip install django~=${{ matrix.django-version }} filelock~=${{ matrix.filelock-version }} coveralls xapian*.whl
- name: Checkout django-haystack
uses: actions/checkout@v2

View File

@ -2,6 +2,16 @@
xapian-haystack Changelog
=========================
v3.1.0 (2023-03-19)
-------------------
- Add DJANGO_CT, DJANGO_ID, ID to be used with '__exact' internally.
- Ability to configure ngram min and max lengths.
- Supported Django versions: 3.2, 4.0, 4.1
- Dropped support for Python 3.6.
- Fixed DatabaseLocked errors when running management commands with
multiple workers.
v3.0.1 (2021-11-12)
-------------------

View File

@ -92,6 +92,10 @@ The backend has the following optional settings:
See `here <http://xapian.org/docs/apidoc/html/classXapian_1_1QueryParser.html#ac7dc3b55b6083bd3ff98fc8b2726c8fd>`__ for
more information about the different strategies.
- ``XAPIAN_NGRAM_MIN_LENGTH``, ``XAPIAN_NGRAM_MAX_LENGTH``: options for custom configuration of ngrams (phrases) length.
- ``HAYSTACK_XAPIAN_USE_LOCKFILE``: Use a lockfile to prevent database locking errors when running management commands with multiple workers.
Defaults to `True`.
Testing
-------

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# first argument of the script is Xapian version (e.g. 1.4.18)
# first argument of the script is Xapian version (e.g. 1.4.19)
VERSION=$1

View File

@ -1,2 +1,3 @@
Django>=2.2
Django-Haystack>=3.0
filelock>=3.4

View File

@ -8,7 +8,7 @@ def read(fname):
setup(
name='xapian-haystack',
version='3.0.1',
version='3.1.0',
description='A Xapian backend for Haystack',
long_description=read('README.rst'),
long_description_content_type='text/x-rst',
@ -18,15 +18,16 @@ setup(
'License :: OSI Approved :: GNU General Public License (GPL)',
'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
'Framework :: Django',
'Programming Language :: Python :: 3 :: Only',
],
author='Jorge C. Leitão',
author_email='jorgecarleitao@gmail.com',
url='http://github.com/notanumber/xapian-haystack',
download_url='http://github.com/notanumber/xapian-haystack/tarball/2.1.0',
url='https://github.com/notanumber/xapian-haystack',
license='GPL2',
py_modules=['xapian_backend'],
install_requires=[
'django>=2.2',
'django>=3.2',
'django-haystack>=2.8.0',
'filelock>=3.4',
]
)

View File

@ -0,0 +1,103 @@
import sys
from io import StringIO
from unittest import TestCase
from django.core.management import call_command
from ..models import BlogEntry
from ..search_indexes import BlogSearchIndex
from .test_backend import BackendFeaturesTestCase, HaystackBackendTestCase
class ManagementCommandTestCase(HaystackBackendTestCase, TestCase):
NUM_BLOG_ENTRIES = 20
def get_index(self):
return BlogSearchIndex()
def setUp(self):
super().setUp()
self.sample_objs = []
for i in range(1, self.NUM_BLOG_ENTRIES + 1):
entry = BackendFeaturesTestCase.get_entry(i)
self.sample_objs.append(entry)
entry.save()
def verify_indexed_document_count(self, expected):
count = self.backend.document_count()
self.assertEqual(count, expected)
def verify_indexed_documents(self):
"""Confirm that the documents in the search index match the database"""
count = self.backend.document_count()
self.assertEqual(count, self.NUM_BLOG_ENTRIES)
pks = set(BlogEntry.objects.values_list("pk", flat=True))
doc_ids = set()
database = self.backend._database()
for pk in pks:
xapian_doc = database.get_document(pk)
doc_id = xapian_doc.get_docid()
doc_ids.add(doc_id)
database.close()
self.assertSetEqual(pks, doc_ids)
def test_clear(self):
self.backend.update(self.index, BlogEntry.objects.all())
self.verify_indexed_documents()
call_command("clear_index", interactive=False, verbosity=0)
self.verify_indexed_document_count(0)
def test_update(self):
self.verify_indexed_document_count(0)
call_command("update_index", verbosity=0)
self.verify_indexed_documents()
def test_rebuild(self):
self.verify_indexed_document_count(0)
call_command("rebuild_index", interactive=False, verbosity=0)
self.verify_indexed_documents()
def test_remove(self):
self.verify_indexed_document_count(0)
call_command("update_index", verbosity=0)
self.verify_indexed_documents()
# Remove three instances.
three_pks = BlogEntry.objects.all()[:3].values_list("pk", flat=True)
BlogEntry.objects.filter(pk__in=three_pks).delete()
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES)
# Plain ``update_index`` doesn't fix it.
call_command("update_index", verbosity=0)
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES)
# … but remove does:
call_command("update_index", remove=True, verbosity=0)
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES - 3)
def test_multiprocessing(self):
self.verify_indexed_document_count(0)
old_stderr = sys.stderr
sys.stderr = StringIO()
call_command(
"update_index",
verbosity=2,
workers=10,
batchsize=2,
)
err = sys.stderr.getvalue()
sys.stderr = old_stderr
print(err)
self.assertNotIn("xapian.DatabaseLockError", err)
self.verify_indexed_documents()

View File

@ -236,6 +236,13 @@ class XapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
self.sq.add_filter(SQ(django_ct='time'))
self.assertExpectedQuery(self.sq.build_query(), 'CONTENTTYPEtime')
def test_unphrased_id(self):
'An internal ID should NOT be phrased so one can exclude IDs.'
self.sq.add_filter(SQ(id__in=['testing123', 'testing456']))
expected = '(Qtesting123 OR Qtesting456)'
self.assertExpectedQuery(
query=self.sq.build_query(), string_or_list=expected)
class SearchQueryTestCase(HaystackBackendTestCase, TestCase):
"""

View File

@ -1,5 +1,8 @@
# This file was modified by Matthias Portzel on Dec 22nd, 2021 and Aug 5th, 2022
import datetime
import pickle
from pathlib import Path
import os
import re
import shutil
@ -8,6 +11,8 @@ import sys
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from filelock import FileLock
from haystack import connections
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
from haystack.constants import ID, DJANGO_ID, DJANGO_CT, DEFAULT_OPERATOR
@ -16,8 +21,8 @@ from haystack.inputs import AutoQuery
from haystack.models import SearchResult
from haystack.utils import get_identifier, get_model_ct
NGRAM_MIN_LENGTH = 2
NGRAM_MAX_LENGTH = 15
NGRAM_MIN_LENGTH = getattr(settings, 'XAPIAN_NGRAM_MIN_LENGTH', 2)
NGRAM_MAX_LENGTH = getattr(settings, 'XAPIAN_NGRAM_MAX_LENGTH', 15)
LONG_TERM = re.compile(b'[^\s]{239,}')
LONG_TERM_METHOD = getattr(settings, 'XAPIAN_LONG_TERM_METHOD', 'truncate')
@ -43,6 +48,8 @@ TERM_PREFIXES = {
'field': 'X'
}
_EXACT_SEARCHFIELDS = frozenset((DJANGO_CT, DJANGO_ID, ID))
MEMORY_DB_NAME = ':memory:'
DEFAULT_XAPIAN_FLAGS = (
@ -77,6 +84,24 @@ INTEGER_FORMAT = '%012d'
# texts with positional information
TERMPOS_DISTANCE = 100
def filelocked(func):
"""Decorator to wrap a XapianSearchBackend method in a filelock."""
def wrapper(self, *args, **kwargs):
"""Run the function inside a lock."""
if self.path == MEMORY_DB_NAME or not self.use_lockfile:
func(self, *args, **kwargs)
else:
lockfile = Path(self.filelock.lock_file)
lockfile.parent.mkdir(parents=True, exist_ok=True)
lockfile.touch()
with self.filelock:
func(self, *args, **kwargs)
return wrapper
class InvalidIndexError(HaystackError):
"""Raised when an index can not be opened."""
pass
@ -113,7 +138,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
begin = -sys.maxsize - 1
elif field_type == 'float':
begin = float('-inf')
elif field_type == 'date' or field_type == 'datetime':
elif field_type in ['date', 'datetime']:
begin = '00010101000000'
elif end == '*':
if field_type == 'text':
@ -122,7 +147,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
end = sys.maxsize
elif field_type == 'float':
end = float('inf')
elif field_type == 'date' or field_type == 'datetime':
elif field_type in ['date', 'datetime']:
end = '99990101000000'
if field_type == 'float':
@ -152,13 +177,7 @@ class XapianSearchBackend(BaseSearchBackend):
`SearchBackend` defines the Xapian search backend for use with the Haystack
API for Django search.
It uses the Xapian Python bindings to interface with Xapian, and as
such is subject to this bug: <http://trac.xapian.org/ticket/364> when
Django is running with mod_python or mod_wsgi under Apache.
Until this issue has been fixed by Xapian, it is neccessary to set
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
`PythonInterpreter main_interpreter` when using mod_python.
It uses the Xapian Python bindings to interface with Xapian.
In order to use this backend, `PATH` must be included in the
`connection_options`. This should point to a location where you would your
@ -178,6 +197,9 @@ class XapianSearchBackend(BaseSearchBackend):
Also sets the stemming language to be used to `language`.
"""
self.use_lockfile = bool(
getattr(settings, 'HAYSTACK_XAPIAN_USE_LOCKFILE', True)
)
super().__init__(connection_alias, **connection_options)
if not 'PATH' in connection_options:
@ -192,6 +214,10 @@ class XapianSearchBackend(BaseSearchBackend):
except FileExistsError:
pass
if self.use_lockfile:
lockfile = Path(self.path) / "lockfile"
self.filelock = FileLock(lockfile)
self.flags = connection_options.get('FLAGS', DEFAULT_XAPIAN_FLAGS)
self.language = getattr(settings, 'HAYSTACK_XAPIAN_LANGUAGE', 'english')
@ -235,6 +261,7 @@ class XapianSearchBackend(BaseSearchBackend):
self._update_cache()
return self._columns
@filelocked
def update(self, index, iterable, commit=True):
"""
Updates the `index` with any objects in `iterable` by adding/updating
@ -486,6 +513,7 @@ class XapianSearchBackend(BaseSearchBackend):
finally:
database.close()
@filelocked
def remove(self, obj, commit=True):
"""
Remove indexes for `obj` from the database.
@ -1417,7 +1445,7 @@ class XapianSearchQuery(BaseSearchQuery):
Assumes term is not a list.
"""
if field_type == 'text' and field_name not in (DJANGO_CT,):
if field_type == 'text' and field_name not in _EXACT_SEARCHFIELDS:
term = '^ %s $' % term
query = self._phrase_query(term.split(), field_name, field_type)
else:
@ -1592,7 +1620,7 @@ def _term_to_xapian_value(term, field_type):
value = INTEGER_FORMAT % term
elif field_type == 'float':
value = xapian.sortable_serialise(term)
elif field_type == 'date' or field_type == 'datetime':
elif field_type in ['date', 'datetime']:
if field_type == 'date':
# http://stackoverflow.com/a/1937636/931303 and comments
term = datetime.datetime.combine(term, datetime.time())
@ -1655,7 +1683,7 @@ def _from_xapian_value(value, field_type):
return int(value)
elif field_type == 'float':
return xapian.sortable_unserialise(value)
elif field_type == 'date' or field_type == 'datetime':
elif field_type in ['date', 'datetime']:
datetime_value = datetime.datetime.strptime(value, DATETIME_FORMAT)
if field_type == 'datetime':
return datetime_value