Connecting to AWS Elasticsearch and extracting connection details to config.py

This commit is contained in:
Alex Hunt 2018-11-25 15:32:09 -08:00
parent 57a9e2d492
commit 5de6b905ab
3 changed files with 171 additions and 127 deletions

4
.gitignore vendored
View File

@ -1,3 +1,4 @@
config.py
/static
!/static/style.css
/lib
@ -12,4 +13,5 @@ pip-selfcheck.json
*.sublime-workspace
.DS_Store
.Python
npm-debug.log
npm-debug.log
venv/

View File

@ -1,11 +1,28 @@
from flask import Blueprint, render_template, abort, jsonify, request
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import json
import boto3
import sys
import config
# Elasticsearch local connection
# TODO: Extract to config
ELASTICSEARCH_HOST = '127.0.0.1:9200'
es = Elasticsearch(ELASTICSEARCH_HOST)
sys.path.insert(0,'..')
# TODO: Figure out which dependency tries to encode input to ascii
reload(sys)
sys.setdefaultencoding("utf-8")
if config.ENV_VARIABLE == 'local':
es = Elasticsearch(config.ELASTICSEARCH_LOCAL_HOST)
if config.ENV_VARIABLE == 'staging':
es = Elasticsearch(
hosts = config.ELASTICSEARCH_STAGING_HOST,
http_auth = config.AWS_AUTH,
use_ssl = True,
verify_certs = True,
connection_class = RequestsHttpConnection
)
api = Blueprint('api', __name__)
@ -15,47 +32,48 @@ def merge_id_and_source(id, source):
return response
# Elasticsearch interface functions
def es_document_list(doc_type):
def es_document_list(index):
body = {
'from': 0, 'size': 10000,
'query': {'match_all': {}},
}
if index == 'chapters':
body['sort'] = [
{'number': {'order': 'asc'}}
]
search = es.search(
index='joyce',
doc_type=doc_type,
index=index,
_source_exclude=['html_source', 'search_text'],
body={
'from': 0, 'size': 10000,
'query': {'match_all': {}},
'sort': [
{'number': {'order': 'asc'}}
]
}
body=body
)
res = []
for x in search['hits']['hits']:
res.append(merge_id_and_source(x['_id'], x['_source']))
return res
def es_get_document(doc_type, id):
def es_get_document(index, id):
res = es.get(
index='joyce',
doc_type=doc_type,
index=index,
id=id
)
data = merge_id_and_source(res['_id'], res['_source'])
return data
def es_index_document(doc_type, id, body):
def es_index_document(index, id, body):
print(index)
res = es.index(
index='joyce',
doc_type=doc_type,
index=index,
doc_type='doc',
id=id,
refresh=True,
body=body
)
return res
def es_create_document(doc_type, body):
def es_create_document(index, body):
res = es.index(
index='joyce',
doc_type=doc_type,
index=index,
doc_type='doc',
refresh=True,
body=body
)
@ -63,25 +81,25 @@ def es_create_document(doc_type, body):
def es_update_number(id, number):
res = es.update(
index='joyce',
doc_type='chapter',
index='chapters',
doc_type='doc',
id=id,
refresh=True,
body={'doc': {'number': number}}
)
def es_delete_document(doc_type, id):
def es_delete_document(index, id):
res = es.delete(
index='joyce',
doc_type=doc_type,
index=index,
doc_type='doc',
id=id,
refresh=True
)
if doc_type == 'chapter':
if index == 'chapters':
return renumber_chapters()
def renumber_chapters():
chapters = es_document_list('chapter')
chapters = es_document_list('chapters')
for index, chapter in enumerate(chapters):
if index + 1 != chapter['number']:
es_update_number(chapter['id'], index + 1)
@ -112,7 +130,7 @@ def group_search_results(es_results):
def es_search_text(body):
search = es.search(
index='joyce',
# index=doc_type,
filter_path=[
'hits.hits._id',
'hits.hits._type',
@ -169,31 +187,32 @@ def es_search_text(body):
''' Get all chapters '''
@api.route('/chapters/')
def get_chapters():
return jsonify(es_document_list('chapter'))
return jsonify(es_document_list('chapters'))
''' Get specific chapter '''
@api.route('/chapters/<string:id>')
def get_chapter(id):
data = es_get_document('chapter', id)
data = es_get_document('chapters', id)
return jsonify(data)
''' New chapter '''
@api.route('/chapters/', methods=['PUT'])
def create_chapter():
es_create_document('chapter', request.data)
return jsonify(es_document_list('chapter'))
es_create_document('chapters', request.data)
return jsonify(es_document_list('chapters'))
''' Write chapter '''
@api.route('/chapters/<string:id>', methods=['POST'])
def write_chapter(id):
es_index_document('chapter', id, request.data)
return jsonify(es_document_list('chapter'))
data=json.loads(request.data)
es_index_document('chapters', id, data)
return jsonify(es_document_list('chapters'))
''' Delete chapter '''
@api.route('/chapters/<string:id>', methods=['DELETE'])
def delete_chapter(id):
es_delete_document('chapter', id)
return jsonify(es_document_list('chapter'))
es_delete_document('chapters', id)
return jsonify(es_document_list('chapters'))
#
# Note API Routes
@ -202,31 +221,31 @@ def delete_chapter(id):
''' Get all notes '''
@api.route('/notes/')
def get_notes():
return jsonify(es_document_list('note'))
return jsonify(es_document_list('notes'))
''' Get specific chapter '''
@api.route('/notes/<string:id>')
def get_note(id):
data = es_get_document('note', id)
data = es_get_document('notes', id)
return jsonify(data)
''' New chapter '''
@api.route('/notes/', methods=['PUT'])
def create_note():
es_create_document('note', request.data)
return jsonify(es_document_list('note'))
es_create_document('notes', request.data)
return jsonify(es_document_list('notes'))
''' Write chapter '''
@api.route('/notes/<string:id>', methods=['POST'])
def write_note(id):
es_index_document('note', id, request.data)
return jsonify(es_document_list('note'))
es_index_document('notes', id, request.data)
return jsonify(es_document_list('notes'))
''' Delete chapter '''
@api.route('/notes/<string:id>', methods=['DELETE'])
def delete_note(id):
es_delete_document('note', id)
return jsonify(es_document_list('note'))
es_delete_document('notes', id)
return jsonify(es_document_list('notes'))
#
# Tag API Routes
@ -235,31 +254,31 @@ def delete_note(id):
''' Get all tags '''
@api.route('/tags/')
def get_tags():
return jsonify(es_document_list('tag'))
return jsonify(es_document_list('tags'))
''' Get specific chapter '''
@api.route('/tags/<string:id>')
def get_tag(id):
data = es_get_document('tag', id)
data = es_get_document('tags', id)
return jsonify(data)
''' New chapter '''
@api.route('/tags/', methods=['PUT'])
def create_tag():
es_create_document('tag', request.data)
return jsonify(es_document_list('tag'))
es_create_document('tags', request.data)
return jsonify(es_document_list('tags'))
''' Write chapter '''
@api.route('/tags/<string:id>', methods=['POST'])
def write_tag(id):
es_index_document('tag', id, request.data)
return jsonify(es_document_list('tag'))
es_index_document('tags', id, request.data)
return jsonify(es_document_list('tags'))
''' Delete chapter '''
@api.route('/tags/<string:id>', methods=['DELETE'])
def delete_tag(id):
es_delete_document('tag', id)
return jsonify(es_document_list('tag'))
es_delete_document('tags', id)
return jsonify(es_document_list('tags'))
#
# Search API Routes

163
setup.py
View File

@ -1,14 +1,18 @@
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, RequestsHttpConnection
from elasticsearch.helpers import bulk
import config
# Elasticsearch index setup
if config.ENV_VARIABLE == 'local':
es = Elasticsearch(config.ELASTICSEARCH_LOCAL_HOST)
#Elasticsearch local connection
#TODO: Extract to config
ELASTICSEARCH_HOST = '127.0.0.1:9200'
es = Elasticsearch(ELASTICSEARCH_HOST)
print 'Elasticsearch index deleted!'
if config.ENV_VARIABLE == 'staging':
es = Elasticsearch(
hosts = config.ELASTICSEARCH_STAGING_HOST,
http_auth = config.AWS_AUTH,
use_ssl = True,
verify_certs = True,
connection_class = RequestsHttpConnection
)
def get_chapter_text_from_seed_data(string):
string = string.encode('ascii', 'ignore')
@ -19,176 +23,195 @@ def get_chapter_text_from_seed_data(string):
return chapter_source
# DELETE INDEX:
es.indices.delete(index='joyce', ignore=[400, 404])
es.indices.delete(index='chapters', ignore=[400, 404])
es.indices.delete(index='notes', ignore=[400, 404])
es.indices.delete(index='tags', ignore=[400, 404])
print 'Elasticsearch index deleted!'
create_index_settings = {
'settings' : {
'index': {
'number_of_shards' : 1,
'number_of_replicas' : 0
},
'analysis': {
'analyzer': {
'html_analyzer': {
'type': 'custom',
'tokenizer': 'standard',
'char_filter': ['html_strip']
}
default_index_settings = {
'index': {
'number_of_shards' : 1,
'number_of_replicas' : 0
},
'analysis': {
'analyzer': {
'html_analyzer': {
'type': 'custom',
'tokenizer': 'standard',
'char_filter': ['html_strip']
}
}
},
'mappings': {
'chapter': {
'properties': {
'number': {'type': 'integer'},
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'search_text': {'type': 'nested'}
}
},
'note': {
'properties': {
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'search_text': {'type': 'nested'}
}
},
'tag': {
'properties': {
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'search_text': {'type': 'nested'},
'color': {'type': 'keyword'}
}
}
}
}
es.indices.create(index='joyce', body=create_index_settings)
chapter_mappings = {
'doc': {
'properties': {
'number': {'type': 'integer'},
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'search_text': {'type': 'nested'}
}
},
}
note_mappings = {
'doc': {
'properties': {
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'search_text': {'type': 'nested'}
}
},
}
tag_mappings = {
'doc': {
'properties': {
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'search_text': {'type': 'nested'},
'color': {'type': 'keyword'}
}
}
}
chapter_index_settings = {'settings': default_index_settings, 'mappings': chapter_mappings}
note_index_settings = {'settings': default_index_settings, 'mappings': note_mappings}
tag_index_settings = {'settings': default_index_settings, 'mappings': tag_mappings}
# TODO: Bulk create
es.indices.create(index='chapters', body=chapter_index_settings)
es.indices.create(index='notes', body=note_index_settings)
es.indices.create(index='tags', body=tag_index_settings)
print 'Elasticsearch index created!'
# Sample data
SAMPLE_DATA = [
{'_op_type': 'index','_type': 'chapter', '_id': 'AWNM3N3mxgFi4og697un', '_source': {
SAMPLE_CHAPTERS = [
{'_op_type': 'index', '_id': 'AWNM3N3mxgFi4og697un', '_source': {
'number': 1,
'title': 'Telemachus',
'html_source': get_chapter_text_from_seed_data('telem')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vA', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vA', '_source': {
'number': 2,
'title': 'Nestor',
'html_source': get_chapter_text_from_seed_data('nestor')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vB', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vB', '_source': {
'number': 3,
'title': 'Proteus',
'html_source': get_chapter_text_from_seed_data('proteus')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vC', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vC', '_source': {
'number': 4,
'title': 'Calyspo',
'html_source': get_chapter_text_from_seed_data('calypso')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vD', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vD', '_source': {
'number': 5,
'title': 'Lotus Eaters',
'html_source': get_chapter_text_from_seed_data('lotus')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vE', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vE', '_source': {
'number': 6,
'title': 'Hades',
'html_source': get_chapter_text_from_seed_data('hades')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vF', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vF', '_source': {
'number': 7,
'title': 'Aeolus',
'html_source': get_chapter_text_from_seed_data('aeolus')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vG', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vG', '_source': {
'number': 8,
'title': 'Lestrygonians',
'html_source': get_chapter_text_from_seed_data('lestry')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vH', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vH', '_source': {
'number': 9,
'title': 'Scylla and Charybdis',
'html_source': get_chapter_text_from_seed_data('scylla')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vI', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vI', '_source': {
'number': 10,
'title': 'Wandering Rocks',
'html_source': get_chapter_text_from_seed_data('wrocks')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vJ', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vJ', '_source': {
'number': 11,
'title': 'Sirens',
'html_source': get_chapter_text_from_seed_data('sirens')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vK', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vK', '_source': {
'number': 12,
'title': 'Cyclops',
'html_source': get_chapter_text_from_seed_data('cyclops')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vL', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vL', '_source': {
'number': 13,
'title': 'Nausicaa',
'html_source': get_chapter_text_from_seed_data('nausicaa')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vM', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vM', '_source': {
'number': 14,
'title': 'Oxen of the Sun',
'html_source': get_chapter_text_from_seed_data('oxen')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vN', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vN', '_source': {
'number': 15,
'title': 'Circe',
'html_source': get_chapter_text_from_seed_data('circe')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vO', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vO', '_source': {
'number': 16,
'title': 'Eumaeus',
'html_source': get_chapter_text_from_seed_data('eumaeus')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vP', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vP', '_source': {
'number': 17,
'title': 'Ithaca',
'html_source': get_chapter_text_from_seed_data('ithaca')
},
},
{'_op_type': 'index', '_type': 'chapter','_id': 'AWNmqpdHxgFi4og697vQ', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vQ', '_source': {
'number': 18,
'title': 'Penelope',
'html_source': get_chapter_text_from_seed_data('penelope')
},
},
{'_op_type': 'index', '_type': 'note','_id': 'AWNmqpdHxgFi4og697vR', '_source': {
]
SAMPLE_NOTES = [
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vR', '_source': {
'title': 'Kinch',
'html_source': 'A knife'
},
},
{'_op_type': 'index', '_type': 'note','_id': 'AWNmqpdHxgFi4og697vS', '_source': {
{'_op_type': 'index', '_id': 'AWNmqpdHxgFi4og697vS', '_source': {
'title': 'Lighthouse',
'html_source': 'A lighthouse'
},
},
]
bulk(es, SAMPLE_DATA, index='joyce', doc_type='chapter')
bulk(es, SAMPLE_CHAPTERS, index='chapters', doc_type='doc')
bulk(es, SAMPLE_NOTES, index='notes', doc_type='doc')
print('Successfully loaded sample data!')