157 lines
4.6 KiB
Python
157 lines
4.6 KiB
Python
from elasticsearch import Elasticsearch, RequestsHttpConnection
|
|
from elasticsearch.helpers import bulk
|
|
import config
|
|
|
|
# Establish Elasticsearch Connection
|
|
|
|
if config.ENVIRONMENT == 'local':
|
|
es = Elasticsearch(config.ELASTICSEARCH_LOCAL_HOST)
|
|
|
|
if config.ENVIRONMENT == 'staging':
|
|
es = Elasticsearch(
|
|
hosts = config.ELASTICSEARCH_STAGING_HOST,
|
|
http_auth = config.AWS_AUTH,
|
|
use_ssl = True,
|
|
verify_certs = True,
|
|
connection_class = RequestsHttpConnection
|
|
)
|
|
|
|
# Create Index Settings
|
|
|
|
default_index_settings = {
|
|
'index': {
|
|
'number_of_shards' : 1,
|
|
'number_of_replicas' : 0
|
|
},
|
|
'analysis': {
|
|
'analyzer': {
|
|
'html_analyzer': {
|
|
'type': 'custom',
|
|
'tokenizer': 'standard',
|
|
'char_filter': ['html_strip']
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
chapter_mappings = {
|
|
'doc': {
|
|
'properties': {
|
|
'number': {'type': 'integer'},
|
|
'title': {'type': 'keyword'},
|
|
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
|
|
'search_text': {'type': 'nested'}
|
|
}
|
|
},
|
|
}
|
|
|
|
note_mappings = {
|
|
'doc': {
|
|
'properties': {
|
|
'title': {'type': 'keyword'},
|
|
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
|
|
'search_text': {'type': 'nested'}
|
|
}
|
|
},
|
|
}
|
|
|
|
tag_mappings = {
|
|
'doc': {
|
|
'properties': {
|
|
'title': {'type': 'keyword'},
|
|
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
|
|
'search_text': {'type': 'nested'},
|
|
'color': {'type': 'keyword'}
|
|
}
|
|
}
|
|
}
|
|
|
|
chapter_index_settings = {'settings': default_index_settings, 'mappings': chapter_mappings}
|
|
note_index_settings = {'settings': default_index_settings, 'mappings': note_mappings}
|
|
tag_index_settings = {'settings': default_index_settings, 'mappings': tag_mappings}
|
|
|
|
# Read Seed Data from Files
|
|
|
|
def get_chapter_text_from_seed_data(string):
|
|
string = string.encode('utf-8', 'ignore')
|
|
fname = './seed_data/' + string + '.html'
|
|
HtmlFile = open(fname, 'r')
|
|
chapter_source = HtmlFile.read()
|
|
HtmlFile.close()
|
|
return chapter_source
|
|
|
|
def build_es_chapter_op(id, number, title, chap_file):
|
|
return {'_op_type': 'index', '_id': id, '_source': {
|
|
'number': number,
|
|
'title': title,
|
|
'html_source': get_chapter_text_from_seed_data(chap_file)
|
|
}
|
|
}
|
|
|
|
def build_es_note_op(id, title, text):
|
|
return {'_op_type': 'index', '_id': id, '_source': {
|
|
'title': title,
|
|
'html_source': text,
|
|
}
|
|
}
|
|
|
|
SAMPLE_CHAPTERS = [
|
|
build_es_chapter_op('AWNM3N3mxgFi4og697un', 1, 'Telemachus', 'telem'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vA', 2, 'Nestor', 'nestor'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vB', 3, 'Proteus', 'proteus'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vC', 4, 'Calyspo', 'calypso'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vD', 5, 'Lotus Eaters', 'lotus'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vE', 6, 'Hades', 'hades'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vF', 7, 'Aeolus', 'aeolus'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vG', 8, 'Lestrygonians', 'lestry'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vH', 9, 'Scylla and Charybdis', 'scylla'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vI', 10, 'Wandering Rocks', 'wrocks'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vJ', 11, 'Sirens', 'sirens'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vK', 12, 'Cyclops', 'cyclops'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vL', 13, 'Nausicaa', 'nausicaa'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vM', 14, 'Oxen of the Sun', 'oxen'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vN', 15, 'Circe', 'circe'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vO', 16, 'Eumaeus', 'eumaeus'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vP', 17, 'Ithaca', 'ithaca'),
|
|
build_es_chapter_op('AWNmqpdHxgFi4og697vQ', 18, 'Penelope', 'penelope'),
|
|
]
|
|
|
|
SAMPLE_NOTES = [
|
|
build_es_note_op('AWNmqpdHxgFi4og697vR', 'Kinch', 'A knife'),
|
|
build_es_note_op('AWNmqpdHxgFi4og697vS', 'Lighthouse', 'A lighthouse'),
|
|
]
|
|
|
|
# Manipulate Indices
|
|
|
|
def delete_index(index):
|
|
es.indices.delete(index=index, ignore=[400, 404])
|
|
|
|
def create_index(index, settings):
|
|
es.indices.create(index=index, body=settings)
|
|
|
|
def index_seed_docs(index, docs):
|
|
bulk(es, docs, index=index, doc_type='doc')
|
|
|
|
def refresh_all_indices():
|
|
delete_index('chapters')
|
|
delete_index('notes')
|
|
delete_index('tags')
|
|
print 'Elasticsearch index deleted!'
|
|
create_index('chapters', chapter_index_settings)
|
|
create_index('notes', note_index_settings)
|
|
create_index('tags', tag_index_settings)
|
|
print 'Elasticsearch index created!'
|
|
|
|
def refresh_seed_data():
|
|
index_seed_docs('chapters', SAMPLE_CHAPTERS)
|
|
index_seed_docs('notes', SAMPLE_NOTES)
|
|
print 'Successfully loaded sample data!'
|
|
|
|
def es_setup():
|
|
refresh_all_indices()
|
|
refresh_seed_data()
|
|
|
|
if __name__ == "__main__":
|
|
es_setup()
|
|
|