Compare commits
15 Commits
0be3ed9f72
...
master
Author | SHA1 | Date |
---|---|---|
Matthias Portzel | d9c27b07f6 | |
Claude Paroz | 30c727e653 | |
László Károlyi | 90593c07b7 | |
Matthias Portzel | a88357b213 | |
Matthias Portzel | adb15372d8 | |
Patryk Szczepański | 251e924122 | |
Patryk Szczepański | 9a792e10e3 | |
Matthias Portzel | ffde9b14ed | |
Alejandro R. Sedeño | aa3425e0e9 | |
AJ Slater | 870b48dfcd | |
Matthias Portzel | 4bcb3efa70 | |
Alejandro R. Sedeño | 3fc4cfe46d | |
AJ Slater | 515651f893 | |
Claude Paroz | 65adf90601 | |
Claude Paroz | 77d1fc8ef6 |
|
@ -10,8 +10,8 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.6', '3.9', '3.10']
|
||||
xapian-version: ['1.4.18']
|
||||
python-version: ['3.7', '3.8', '3.9', '3.10']
|
||||
xapian-version: ['1.4.19']
|
||||
|
||||
steps:
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
|
@ -41,14 +41,16 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.6', '3.9']
|
||||
django-version: ['2.2', '3.1', '3.2']
|
||||
xapian-version: ['1.4.18']
|
||||
include:
|
||||
# Django added python 3.10 support in 3.2.9
|
||||
- python-version: '3.10'
|
||||
django-version: '3.2'
|
||||
xapian-version: '1.4.18'
|
||||
python-version: ['3.7', '3.8', '3.9', '3.10']
|
||||
django-version: ['3.2', '4.0', '4.1']
|
||||
xapian-version: ['1.4.19']
|
||||
filelock-version: ['3.4.2']
|
||||
exclude:
|
||||
# Django dropped python 3.7 support in 4.0
|
||||
- python-version: '3.7'
|
||||
django-version: '4.0'
|
||||
- python-version: '3.7'
|
||||
django-version: '4.1'
|
||||
|
||||
steps:
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
|
@ -70,7 +72,7 @@ jobs:
|
|||
- name: Install Django and other Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install django~=${{ matrix.django-version }} coveralls xapian*.whl
|
||||
pip install django~=${{ matrix.django-version }} filelock~=${{ matrix.filelock-version }} coveralls xapian*.whl
|
||||
|
||||
- name: Checkout django-haystack
|
||||
uses: actions/checkout@v2
|
||||
|
|
|
@ -2,6 +2,16 @@
|
|||
xapian-haystack Changelog
|
||||
=========================
|
||||
|
||||
v3.1.0 (2023-03-19)
|
||||
-------------------
|
||||
|
||||
- Add DJANGO_CT, DJANGO_ID, ID to be used with '__exact' internally.
|
||||
- Ability to configure ngram min and max lengths.
|
||||
- Supported Django versions: 3.2, 4.0, 4.1
|
||||
- Dropped support for Python 3.6.
|
||||
- Fixed DatabaseLocked errors when running management commands with
|
||||
multiple workers.
|
||||
|
||||
v3.0.1 (2021-11-12)
|
||||
-------------------
|
||||
|
||||
|
|
|
@ -92,6 +92,10 @@ The backend has the following optional settings:
|
|||
See `here <http://xapian.org/docs/apidoc/html/classXapian_1_1QueryParser.html#ac7dc3b55b6083bd3ff98fc8b2726c8fd>`__ for
|
||||
more information about the different strategies.
|
||||
|
||||
- ``XAPIAN_NGRAM_MIN_LENGTH``, ``XAPIAN_NGRAM_MAX_LENGTH``: options for custom configuration of ngrams (phrases) length.
|
||||
|
||||
- ``HAYSTACK_XAPIAN_USE_LOCKFILE``: Use a lockfile to prevent database locking errors when running management commands with multiple workers.
|
||||
Defaults to `True`.
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/env bash
|
||||
# first argument of the script is Xapian version (e.g. 1.4.18)
|
||||
# first argument of the script is Xapian version (e.g. 1.4.19)
|
||||
|
||||
VERSION=$1
|
||||
|
||||
|
|
|
@ -1,2 +1,3 @@
|
|||
Django>=2.2
|
||||
Django-Haystack>=3.0
|
||||
filelock>=3.4
|
||||
|
|
9
setup.py
9
setup.py
|
@ -8,7 +8,7 @@ def read(fname):
|
|||
|
||||
setup(
|
||||
name='xapian-haystack',
|
||||
version='3.0.1',
|
||||
version='3.1.0',
|
||||
description='A Xapian backend for Haystack',
|
||||
long_description=read('README.rst'),
|
||||
long_description_content_type='text/x-rst',
|
||||
|
@ -18,15 +18,16 @@ setup(
|
|||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||
'Topic :: Internet :: WWW/HTTP :: Indexing/Search',
|
||||
'Framework :: Django',
|
||||
'Programming Language :: Python :: 3 :: Only',
|
||||
],
|
||||
author='Jorge C. Leitão',
|
||||
author_email='jorgecarleitao@gmail.com',
|
||||
url='http://github.com/notanumber/xapian-haystack',
|
||||
download_url='http://github.com/notanumber/xapian-haystack/tarball/2.1.0',
|
||||
url='https://github.com/notanumber/xapian-haystack',
|
||||
license='GPL2',
|
||||
py_modules=['xapian_backend'],
|
||||
install_requires=[
|
||||
'django>=2.2',
|
||||
'django>=3.2',
|
||||
'django-haystack>=2.8.0',
|
||||
'filelock>=3.4',
|
||||
]
|
||||
)
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
import sys
|
||||
from io import StringIO
|
||||
from unittest import TestCase
|
||||
|
||||
from django.core.management import call_command
|
||||
|
||||
from ..models import BlogEntry
|
||||
from ..search_indexes import BlogSearchIndex
|
||||
from .test_backend import BackendFeaturesTestCase, HaystackBackendTestCase
|
||||
|
||||
|
||||
class ManagementCommandTestCase(HaystackBackendTestCase, TestCase):
|
||||
|
||||
NUM_BLOG_ENTRIES = 20
|
||||
|
||||
def get_index(self):
|
||||
return BlogSearchIndex()
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
||||
self.sample_objs = []
|
||||
|
||||
for i in range(1, self.NUM_BLOG_ENTRIES + 1):
|
||||
entry = BackendFeaturesTestCase.get_entry(i)
|
||||
self.sample_objs.append(entry)
|
||||
entry.save()
|
||||
|
||||
def verify_indexed_document_count(self, expected):
|
||||
count = self.backend.document_count()
|
||||
self.assertEqual(count, expected)
|
||||
|
||||
def verify_indexed_documents(self):
|
||||
"""Confirm that the documents in the search index match the database"""
|
||||
|
||||
count = self.backend.document_count()
|
||||
self.assertEqual(count, self.NUM_BLOG_ENTRIES)
|
||||
|
||||
pks = set(BlogEntry.objects.values_list("pk", flat=True))
|
||||
doc_ids = set()
|
||||
database = self.backend._database()
|
||||
for pk in pks:
|
||||
xapian_doc = database.get_document(pk)
|
||||
doc_id = xapian_doc.get_docid()
|
||||
doc_ids.add(doc_id)
|
||||
database.close()
|
||||
|
||||
self.assertSetEqual(pks, doc_ids)
|
||||
|
||||
def test_clear(self):
|
||||
self.backend.update(self.index, BlogEntry.objects.all())
|
||||
self.verify_indexed_documents()
|
||||
|
||||
call_command("clear_index", interactive=False, verbosity=0)
|
||||
self.verify_indexed_document_count(0)
|
||||
|
||||
def test_update(self):
|
||||
self.verify_indexed_document_count(0)
|
||||
|
||||
call_command("update_index", verbosity=0)
|
||||
self.verify_indexed_documents()
|
||||
|
||||
def test_rebuild(self):
|
||||
self.verify_indexed_document_count(0)
|
||||
|
||||
call_command("rebuild_index", interactive=False, verbosity=0)
|
||||
self.verify_indexed_documents()
|
||||
|
||||
def test_remove(self):
|
||||
self.verify_indexed_document_count(0)
|
||||
|
||||
call_command("update_index", verbosity=0)
|
||||
self.verify_indexed_documents()
|
||||
|
||||
# Remove three instances.
|
||||
three_pks = BlogEntry.objects.all()[:3].values_list("pk", flat=True)
|
||||
BlogEntry.objects.filter(pk__in=three_pks).delete()
|
||||
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES)
|
||||
|
||||
# Plain ``update_index`` doesn't fix it.
|
||||
call_command("update_index", verbosity=0)
|
||||
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES)
|
||||
|
||||
# … but remove does:
|
||||
call_command("update_index", remove=True, verbosity=0)
|
||||
self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES - 3)
|
||||
|
||||
def test_multiprocessing(self):
|
||||
self.verify_indexed_document_count(0)
|
||||
|
||||
old_stderr = sys.stderr
|
||||
sys.stderr = StringIO()
|
||||
call_command(
|
||||
"update_index",
|
||||
verbosity=2,
|
||||
workers=10,
|
||||
batchsize=2,
|
||||
)
|
||||
err = sys.stderr.getvalue()
|
||||
sys.stderr = old_stderr
|
||||
print(err)
|
||||
self.assertNotIn("xapian.DatabaseLockError", err)
|
||||
self.verify_indexed_documents()
|
|
@ -236,6 +236,13 @@ class XapianSearchQueryTestCase(HaystackBackendTestCase, TestCase):
|
|||
self.sq.add_filter(SQ(django_ct='time'))
|
||||
self.assertExpectedQuery(self.sq.build_query(), 'CONTENTTYPEtime')
|
||||
|
||||
def test_unphrased_id(self):
|
||||
'An internal ID should NOT be phrased so one can exclude IDs.'
|
||||
self.sq.add_filter(SQ(id__in=['testing123', 'testing456']))
|
||||
expected = '(Qtesting123 OR Qtesting456)'
|
||||
self.assertExpectedQuery(
|
||||
query=self.sq.build_query(), string_or_list=expected)
|
||||
|
||||
|
||||
class SearchQueryTestCase(HaystackBackendTestCase, TestCase):
|
||||
"""
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
# This file was modified by Matthias Portzel on Dec 22nd, 2021 and Aug 5th, 2022
|
||||
|
||||
import datetime
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
@ -8,6 +11,8 @@ import sys
|
|||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
|
||||
from filelock import FileLock
|
||||
|
||||
from haystack import connections
|
||||
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query
|
||||
from haystack.constants import ID, DJANGO_ID, DJANGO_CT, DEFAULT_OPERATOR
|
||||
|
@ -16,8 +21,8 @@ from haystack.inputs import AutoQuery
|
|||
from haystack.models import SearchResult
|
||||
from haystack.utils import get_identifier, get_model_ct
|
||||
|
||||
NGRAM_MIN_LENGTH = 2
|
||||
NGRAM_MAX_LENGTH = 15
|
||||
NGRAM_MIN_LENGTH = getattr(settings, 'XAPIAN_NGRAM_MIN_LENGTH', 2)
|
||||
NGRAM_MAX_LENGTH = getattr(settings, 'XAPIAN_NGRAM_MAX_LENGTH', 15)
|
||||
|
||||
LONG_TERM = re.compile(b'[^\s]{239,}')
|
||||
LONG_TERM_METHOD = getattr(settings, 'XAPIAN_LONG_TERM_METHOD', 'truncate')
|
||||
|
@ -43,6 +48,8 @@ TERM_PREFIXES = {
|
|||
'field': 'X'
|
||||
}
|
||||
|
||||
_EXACT_SEARCHFIELDS = frozenset((DJANGO_CT, DJANGO_ID, ID))
|
||||
|
||||
MEMORY_DB_NAME = ':memory:'
|
||||
|
||||
DEFAULT_XAPIAN_FLAGS = (
|
||||
|
@ -77,6 +84,24 @@ INTEGER_FORMAT = '%012d'
|
|||
# texts with positional information
|
||||
TERMPOS_DISTANCE = 100
|
||||
|
||||
|
||||
def filelocked(func):
|
||||
"""Decorator to wrap a XapianSearchBackend method in a filelock."""
|
||||
|
||||
def wrapper(self, *args, **kwargs):
|
||||
"""Run the function inside a lock."""
|
||||
if self.path == MEMORY_DB_NAME or not self.use_lockfile:
|
||||
func(self, *args, **kwargs)
|
||||
else:
|
||||
lockfile = Path(self.filelock.lock_file)
|
||||
lockfile.parent.mkdir(parents=True, exist_ok=True)
|
||||
lockfile.touch()
|
||||
with self.filelock:
|
||||
func(self, *args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class InvalidIndexError(HaystackError):
|
||||
"""Raised when an index can not be opened."""
|
||||
pass
|
||||
|
@ -113,7 +138,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
|||
begin = -sys.maxsize - 1
|
||||
elif field_type == 'float':
|
||||
begin = float('-inf')
|
||||
elif field_type == 'date' or field_type == 'datetime':
|
||||
elif field_type in ['date', 'datetime']:
|
||||
begin = '00010101000000'
|
||||
elif end == '*':
|
||||
if field_type == 'text':
|
||||
|
@ -122,7 +147,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
|||
end = sys.maxsize
|
||||
elif field_type == 'float':
|
||||
end = float('inf')
|
||||
elif field_type == 'date' or field_type == 'datetime':
|
||||
elif field_type in ['date', 'datetime']:
|
||||
end = '99990101000000'
|
||||
|
||||
if field_type == 'float':
|
||||
|
@ -152,13 +177,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
`SearchBackend` defines the Xapian search backend for use with the Haystack
|
||||
API for Django search.
|
||||
|
||||
It uses the Xapian Python bindings to interface with Xapian, and as
|
||||
such is subject to this bug: <http://trac.xapian.org/ticket/364> when
|
||||
Django is running with mod_python or mod_wsgi under Apache.
|
||||
|
||||
Until this issue has been fixed by Xapian, it is neccessary to set
|
||||
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
|
||||
`PythonInterpreter main_interpreter` when using mod_python.
|
||||
It uses the Xapian Python bindings to interface with Xapian.
|
||||
|
||||
In order to use this backend, `PATH` must be included in the
|
||||
`connection_options`. This should point to a location where you would your
|
||||
|
@ -178,6 +197,9 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
|
||||
Also sets the stemming language to be used to `language`.
|
||||
"""
|
||||
self.use_lockfile = bool(
|
||||
getattr(settings, 'HAYSTACK_XAPIAN_USE_LOCKFILE', True)
|
||||
)
|
||||
super().__init__(connection_alias, **connection_options)
|
||||
|
||||
if not 'PATH' in connection_options:
|
||||
|
@ -192,6 +214,10 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
except FileExistsError:
|
||||
pass
|
||||
|
||||
if self.use_lockfile:
|
||||
lockfile = Path(self.path) / "lockfile"
|
||||
self.filelock = FileLock(lockfile)
|
||||
|
||||
self.flags = connection_options.get('FLAGS', DEFAULT_XAPIAN_FLAGS)
|
||||
self.language = getattr(settings, 'HAYSTACK_XAPIAN_LANGUAGE', 'english')
|
||||
|
||||
|
@ -235,6 +261,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
self._update_cache()
|
||||
return self._columns
|
||||
|
||||
@filelocked
|
||||
def update(self, index, iterable, commit=True):
|
||||
"""
|
||||
Updates the `index` with any objects in `iterable` by adding/updating
|
||||
|
@ -486,6 +513,7 @@ class XapianSearchBackend(BaseSearchBackend):
|
|||
finally:
|
||||
database.close()
|
||||
|
||||
@filelocked
|
||||
def remove(self, obj, commit=True):
|
||||
"""
|
||||
Remove indexes for `obj` from the database.
|
||||
|
@ -1417,7 +1445,7 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
|
||||
Assumes term is not a list.
|
||||
"""
|
||||
if field_type == 'text' and field_name not in (DJANGO_CT,):
|
||||
if field_type == 'text' and field_name not in _EXACT_SEARCHFIELDS:
|
||||
term = '^ %s $' % term
|
||||
query = self._phrase_query(term.split(), field_name, field_type)
|
||||
else:
|
||||
|
@ -1592,7 +1620,7 @@ def _term_to_xapian_value(term, field_type):
|
|||
value = INTEGER_FORMAT % term
|
||||
elif field_type == 'float':
|
||||
value = xapian.sortable_serialise(term)
|
||||
elif field_type == 'date' or field_type == 'datetime':
|
||||
elif field_type in ['date', 'datetime']:
|
||||
if field_type == 'date':
|
||||
# http://stackoverflow.com/a/1937636/931303 and comments
|
||||
term = datetime.datetime.combine(term, datetime.time())
|
||||
|
@ -1655,7 +1683,7 @@ def _from_xapian_value(value, field_type):
|
|||
return int(value)
|
||||
elif field_type == 'float':
|
||||
return xapian.sortable_unserialise(value)
|
||||
elif field_type == 'date' or field_type == 'datetime':
|
||||
elif field_type in ['date', 'datetime']:
|
||||
datetime_value = datetime.datetime.strptime(value, DATETIME_FORMAT)
|
||||
if field_type == 'datetime':
|
||||
return datetime_value
|
||||
|
|
Loading…
Reference in New Issue