Save a plain-text version of document for search highlighting

This commit is contained in:
Alex Hunt 2018-04-28 18:38:19 -04:00
parent bfabe75e97
commit 6b27755a6a
15 changed files with 248 additions and 120 deletions

View File

@ -22,6 +22,25 @@
+ Abstract Notes Page into Documents Page, Create Text Route +
+ Make Links Pop Modals +
+ Make Remove Highlight Work with Contextual Disabling +
TODAY:
- HTML Char Strip ES Search
- Index HTML-Stripped Field for Searching
- Update Mappings in Setup.py
- Update api.py to duplicate text field
- Search Result Components
- Light Refactoring
TOMORROW:
- Implement React-Router
- Review dependencies
SHORT LIST:
- Make Annotate Mode Ignore Keyboard Input
- More Responsive CSS
- Functioning Search Page

View File

@ -18,7 +18,7 @@ def es_document_list(doc_type):
search = es.search(
index='joyce',
doc_type=doc_type,
_source_exclude=['text'],
_source_exclude=['html_source', 'plain_text'],
body={
'from': 0, 'size': 10000,
'query': {'match_all': {}},
@ -84,63 +84,81 @@ def renumber_chapters():
for index, chapter in enumerate(chapters):
if index + 1 != chapter['number']:
es_update_number(chapter['id'], index + 1)
return chapters
return chapters
def group_search_results(es_results):
types = set([])
output_results = {}
for result in es_results:
types.add(result['_type'])
for type in types:
list = []
for result in es_results:
entry = {'id': result['_id'], 'highlight': result['highlight']['plain_text']}
list.append(entry)
output_results[type] = list
return output_results
def es_search_text(body):
search = es.search(
index='joyce',
# _source_exclude=['text'],
filter_path=['hits.hits._id', 'hits.hits._type', 'hits.hits.highlight', 'hits.hits.title'],
body={
'from': 0, 'size': 10,
'from': 0,
'size': 10,
'query': {
'match': {
'text': {
'query': body,
'analyzer': 'html_analyzer'
'plain_text': {
'query': body
}
}
},
'highlight' : {
'fields' : {
'text': {
'_all' : { 'pre_tags' : [''], 'post_tags' : [''] },
'plain_text': {
'matched_fields': 'text',
'type': 'unified',
'type': 'unified'
}
}
}
}
)
return search['hits']['hits']
# TODO: Handle no hits
results = search['hits']['hits']
grouped_results = group_search_results(results)
# return search
return grouped_results
#
# Chapter API Routes
#
""" Get all chapters """
''' Get all chapters '''
@api.route('/chapters/')
def get_chapters():
return jsonify(es_document_list('chapter'))
""" Get specific chapter """
''' Get specific chapter '''
@api.route('/chapters/<string:id>')
def get_chapter(id):
data = es_get_document('chapter', id)
return jsonify(data)
""" New chapter """
''' New chapter '''
@api.route('/chapters/', methods=['PUT'])
def create_chapter():
es_create_document('chapter', request.data)
return jsonify(es_document_list('chapter'))
""" Write chapter """
''' Write chapter '''
@api.route('/chapters/<string:id>', methods=['POST'])
def write_chapter(id):
es_index_document('chapter', id, request.data)
return jsonify(es_document_list('chapter'))
""" Delete chapter """
''' Delete chapter '''
@api.route('/chapters/<string:id>', methods=['DELETE'])
def delete_chapter(id):
es_delete_document('chapter', id)
@ -150,30 +168,30 @@ def delete_chapter(id):
# Note API Routes
#
""" Get all notes """
''' Get all notes '''
@api.route('/notes/')
def get_notes():
return jsonify(es_document_list('note'))
""" Get specific chapter """
''' Get specific chapter '''
@api.route('/notes/<string:id>')
def get_note(id):
data = es_get_document('note', id)
return jsonify(data)
""" New chapter """
''' New chapter '''
@api.route('/notes/', methods=['PUT'])
def create_note():
es_create_document('note', request.data)
return jsonify(es_document_list('note'))
""" Write chapter """
''' Write chapter '''
@api.route('/notes/<string:id>', methods=['POST'])
def write_note(id):
es_index_document('note', id, request.data)
return jsonify(es_document_list('note'))
""" Delete chapter """
''' Delete chapter '''
@api.route('/notes/<string:id>', methods=['DELETE'])
def delete_note(id):
es_delete_document('note', id)
@ -183,7 +201,7 @@ def delete_note(id):
# Search API Routes
#
""" Basic Text Search """
''' Basic Text Search '''
@api.route('/search/', methods=['POST'])
def search_text():
return jsonify(es_search_text(request.data))

View File

@ -14,6 +14,7 @@
"bootstrap": "^4.1.0",
"draft-js": "^0.10.4",
"draft-js-export-html": "^1.2.0",
"draft-js-export-markdown": "^1.2.0",
"draft-js-import-html": "^1.2.1",
"file-loader": "^1.1.11",
"font-awesome": "^4.7.0",

View File

@ -40,19 +40,17 @@ create_index_settings = {
'mappings': {
'chapter': {
'properties': {
'number': {'type': 'integer'},
'title': {'type': 'keyword'},
'text': {
'type': 'text',
}
}
},
'number': {'type': 'integer'},
'title': {'type': 'keyword'},
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'plain_text': {'type': 'text'}
}
},
'note': {
'properties': {
'title': {'type': 'keyword'},
'text': {
'type': 'text',
}
'html_source': {'type': 'text', 'analyzer': 'html_analyzer'},
'plain_text': {'type': 'text'}
}
}
}
@ -66,119 +64,119 @@ SAMPLE_DATA = [
{'_op_type': 'index','_type': 'chapter', '_source': {
'number': 1,
'title': 'Telemachus',
'text': get_chapter_text_from_seed_data('telem')
'html_source': get_chapter_text_from_seed_data('telem')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 2,
'title': 'Nestor',
'text': get_chapter_text_from_seed_data('nestor')
'html_source': get_chapter_text_from_seed_data('nestor')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 3,
'title': 'Proteus',
'text': get_chapter_text_from_seed_data('proteus')
'html_source': get_chapter_text_from_seed_data('proteus')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 4,
'title': 'Calyspo',
'text': get_chapter_text_from_seed_data('calypso')
'html_source': get_chapter_text_from_seed_data('calypso')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 5,
'title': 'Lotus Eaters',
'text': get_chapter_text_from_seed_data('lotus')
'html_source': get_chapter_text_from_seed_data('lotus')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 6,
'title': 'Hades',
'text': get_chapter_text_from_seed_data('hades')
'html_source': get_chapter_text_from_seed_data('hades')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 7,
'title': 'Aeolus',
'text': get_chapter_text_from_seed_data('aeolus')
'html_source': get_chapter_text_from_seed_data('aeolus')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 8,
'title': 'Lestrygonians',
'text': get_chapter_text_from_seed_data('lestry')
'html_source': get_chapter_text_from_seed_data('lestry')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 9,
'title': 'Scylla and Charybdis',
'text': get_chapter_text_from_seed_data('scylla')
'html_source': get_chapter_text_from_seed_data('scylla')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 10,
'title': 'Wandering Rocks',
'text': get_chapter_text_from_seed_data('wrocks')
'html_source': get_chapter_text_from_seed_data('wrocks')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 11,
'title': 'Sirens',
'text': get_chapter_text_from_seed_data('sirens')
'html_source': get_chapter_text_from_seed_data('sirens')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 12,
'title': 'Cyclops',
'text': get_chapter_text_from_seed_data('cyclops')
'html_source': get_chapter_text_from_seed_data('cyclops')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 13,
'title': 'Nausicaa',
'text': get_chapter_text_from_seed_data('nausicaa')
'html_source': get_chapter_text_from_seed_data('nausicaa')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 14,
'title': 'Oxen of the Sun',
'text': get_chapter_text_from_seed_data('oxen')
'html_source': get_chapter_text_from_seed_data('oxen')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 15,
'title': 'Circe',
'text': get_chapter_text_from_seed_data('circe')
'html_source': get_chapter_text_from_seed_data('circe')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 16,
'title': 'Eumaeus',
'text': get_chapter_text_from_seed_data('eumaeus')
'html_source': get_chapter_text_from_seed_data('eumaeus')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 17,
'title': 'Ithaca',
'text': get_chapter_text_from_seed_data('ithaca')
'html_source': get_chapter_text_from_seed_data('ithaca')
},
},
{'_op_type': 'index', '_type': 'chapter', '_source': {
'number': 18,
'title': 'Penelope',
'text': get_chapter_text_from_seed_data('penelope')
'html_source': get_chapter_text_from_seed_data('penelope')
},
},
{'_op_type': 'index', '_type': 'note', '_source': {
'title': 'Kinch',
'text': 'A knife'
'html_source': 'A knife'
},
},
{'_op_type': 'index', '_type': 'note', '_source': {
'title': 'Lighthouse',
'text': 'A lighthouse'
'html_source': 'A lighthouse'
},
},
]

View File

@ -50,7 +50,7 @@ export const editTextReceived = data =>
type: 'SET_CURRENT_DOCUMENT',
id: id,
docType: docType
})
})
export const createNewDocument = docType =>
({
@ -168,4 +168,27 @@ export const editTextReceived = data =>
type: 'UPDATE_SEARCH_INPUT',
data: searchInput.target.value
})
}
// export const updateSearchResults = searchResults => {
// return ({
// type: 'UPDATE_SEARCH_RESULTS',
// data: searchResults
// })
// }
export const clickSearch = searchInput => {
console.log('click action gets ', searchInput)
return ({
type: 'CLICK_SEARCH',
data: searchInput
})
}
export const getSearchResults = (response={}) => {
return ({
type: 'GET_SEARCH_RESULTS',
data: response.data,
status: response.status ? response.status : 'request'
})
}

View File

@ -31,19 +31,19 @@ export const NoteButton = ({note, currentNote, onClick}) =>
</button>
</div>
export const HighlightButton = ({highlightToggle, onHighlightClick}) =>
export const HighlightButton = ({highlightToggle, onClick}) =>
<div>
<div id='highlight_button' className='text-center'>
<button onClick={onHighlightClick} className={highlightToggle ? 'btn btn-primary btn-lg' : 'btn btn-outline-primary btn-lg'}>
<button onClick={onClick} className={highlightToggle ? 'btn btn-primary btn-lg' : 'btn btn-outline-primary btn-lg'}>
{highlightToggle ? 'Hide Notes' : 'Highlight Notes'}
</button>
</div>
</div>
export const SearchButton = () =>
export const SearchButton = ({searchInput, onClick}) =>
<div>
<div id='search_button' className='text-center'>
<button className='btn btn-primary btn-sm'>
<button className='btn btn-primary btn-sm' onClick={()=>onClick(searchInput)}>
Search <i className='fa fa_inline fa-search'></i>
</button>
</div>

View File

@ -0,0 +1,13 @@
import React from 'react'
const SearchResultsBox = ({searchResults}) =>
<div className='search_results_box'>
<div className='chapter_search_results'>
<h4>Chapters</h4>
{searchResults.chapter && searchResults.chapter.map(result =>
<div key={result.id}>{result.highlight}</div>
)}
</div>
</div>
export default SearchResultsBox

View File

@ -3,12 +3,18 @@ import { connect } from 'react-redux'
import { Editor } from 'draft-js'
import { SearchButton } from '../components/button'
import { updateSearchInput } from '../actions'
import SearchResultsBox from '../components/searchResultsBox'
import { updateSearchInput, clickSearch } from '../actions'
const JoyceSearchContent = ({searchResults, searchInput, onSearchInputChange}) =>
<div id='search_content' className='col-md-11'>
<input type='text' value={searchInput} onChange={onSearchInputChange}/>
<SearchButton />
const JoyceSearchContent = ({searchResults, searchInput, onSearchInputChange, onSearchClick}) =>
<div id='search_content' className='row'>
<div className='col-sm-2'>
<SearchButton onClick={onSearchClick} searchInput={searchInput}/>
</div>
<div className='col-sm-10'>
<input id='search_input' type='text' value={searchInput} onChange={onSearchInputChange}/>
</div>
<SearchResultsBox searchResults={searchResults} />
</div>
const mapStateToProps = state => {
@ -22,6 +28,9 @@ const mapDispatchToProps = dispatch => {
return {
onSearchInputChange: searchInput => {
dispatch(updateSearchInput(searchInput))
},
onSearchClick: searchInput => {
dispatch(clickSearch(searchInput))
}
}
}

View File

@ -8,9 +8,7 @@ const JoyceSearchPage = () =>
<div>
<Navbar />
<div id='joyce_search' className='container-fluid'>
<div className="row">
<JoyceSearchContentContainer />
</div>
<JoyceSearchContentContainer />
</div>
</div>

48
src/middleware/http.js Normal file
View File

@ -0,0 +1,48 @@
import axios from 'axios'
const apiRoute = '/api/'
// Axios HTTP Functions
export const HTTPGetDocumentList = (docType, state) =>
axios.get(apiRoute + docType).then(res => {
return {status: 'success', docType: docType, state: state, data: res.data}
}).catch(error => {
return {status: 'error', docType: docType, state: state, data: error}
})
export const HTTPGetDocumentText = (id, docType, state) =>
axios.get(apiRoute + docType + '/' + id).then(res => {
return {id: id, status: 'success', docType: docType, state: state, data: res.data}
}).catch(error => {
return {id: id, status: 'error', docType: docType, state: state, data: error}
})
export const HTTPDeleteDocument = (id, docType) =>
axios.delete(apiRoute + docType + '/' + id).then(res => {
return {id: id, status: 'success', docType: docType, data: res.data}
}).catch(error => {
return {id: id, status: 'error', docType: docType, data: error}
})
export const HTTPPutCreateDocument = (docType, data) =>
axios.put(apiRoute + docType + '/', data).then(res => {
return {status: 'success', docType: docType, data: res.data}
}).catch(error => {
return {status: 'error', docType: docType, data: error}
})
export const HTTPPostWriteDocument = (id, docType, data) =>
axios.post(apiRoute + docType + '/' + id, data).then(res => {
return {id: data.id, status: 'success', docType: docType, data: res.data}
}).catch(error => {
return {id: id, status: 'error', docType: docType, data: error}
})
export const HTTPPostSearchResults = (data) =>
axios.post(apiRoute + 'search/', { data }).then(res => {
console.log('data is ', data)
return {status: 'success', data: res.data}
}).catch(error => {
return {status: 'error', data: res.data}
})

View File

@ -1,5 +1,7 @@
import axios from 'axios'
import { stateToHTML } from 'draft-js-export-html'
import { stateToMarkdown } from 'draft-js-export-markdown'
import { convertToRaw } from 'draft-js'
import {
getDocumentList,
@ -8,47 +10,43 @@ import {
saveDocument,
setCurrentDocument,
createNewChapter,
getSearchResults
} from '../actions'
import {
HTTPGetDocumentList,
HTTPGetDocumentText,
HTTPDeleteDocument,
HTTPPutCreateDocument,
HTTPPostWriteDocument,
HTTPPostSearchResults } from './http.js'
import { getFirstDocument } from '../mixins/firstDocument'
let apiRoute = '/api/'
const html_export_options = {
entityStyleFn: (entity) => {
const entityType = entity.get('type').toUpperCase()
if (entityType === 'LINK') {
const data = entity.getData()
return {
element: 'a',
attributes: {
'href': data.url,
'data-target': '#annotation_modal',
'data-toggle': 'modal'
}
}
}
}
}
// Axios HTTP Functions
const HTTPGetDocumentList = (docType, state) =>
axios.get(apiRoute + docType).then(res => {
return {status: 'success', docType: docType, state: state, data: res.data}
}).catch(error => {
return {status: 'error', docType: docType, state: state, data: error}
})
const HTTPGetDocumentText = (id, docType, state) =>
axios.get(apiRoute + docType + '/' + id).then(res => {
return {id: id, status: 'success', docType: docType, state: state, data: res.data}
}).catch(error => {
return {id: id, status: 'error', docType: docType, state: state, data: error}
})
const HTTPDeleteDocument = (id, docType) =>
axios.delete(apiRoute + docType + '/' + id).then(res => {
return {id: id, status: 'success', docType: docType, data: res.data}
}).catch(error => {
return {id: id, status: 'error', docType: docType, data: error}
})
const HTTPPutCreateDocument = (docType, data) =>
axios.put(apiRoute + docType + '/', data).then(res => {
return {status: 'success', docType: docType, data: res.data}
}).catch(error => {
return {status: 'error', docType: docType, data: error}
})
const HTTPPostWriteDocument = (id, docType, data) =>
axios.post(apiRoute + docType + '/' + id, data).then(res => {
return {id: data.id, status: 'success', docType: docType, data: res.data}
}).catch(error => {
return {id: id, status: 'error', docType: docType, data: error}
})
const convertToPlainText = contentState => {
const rawState = convertToRaw(contentState)
return rawState.blocks.reduce(
(plaintText, block) => plaintText + block.text + '\n',
''
)
}
// API Middleware
export const joyceAPI = store => next => action => {
@ -105,23 +103,7 @@ export const joyceAPI = store => next => action => {
break
case 'SUBMIT_DOCUMENT_EDIT':
const textContent = action.editorState.getCurrentContent()
const options = {
entityStyleFn: (entity) => {
const entityType = entity.get('type').toUpperCase()
if (entityType === 'LINK') {
const data = entity.getData()
return {
element: 'a',
attributes: {
'href': data.url,
'data-target': '#annotation_modal',
'data-toggle': 'modal'
}
}
}
}
}
const data = { title: action.documentTitleInput, text: stateToHTML(textContent, options) }
const data = { title: action.documentTitleInput, html_source: stateToHTML(textContent, html_export_options), plain_text: convertToPlainText(textContent) }
if (action.currentDocument.id) {
data.id = action.currentDocument.id
}
@ -151,6 +133,17 @@ export const joyceAPI = store => next => action => {
case 'SELECT_ANNOTATION_NOTE':
store.dispatch(getDocumentText({id: action.id, docType: 'notes', state: 'annotationNote'}))
break
// Search Action Middleware
case 'CLICK_SEARCH':
store.dispatch(getSearchResults({data: action.data}))
break
case 'GET_SEARCH_RESULTS':
if (action.status === 'request') {
HTTPPostSearchResults(action.data).then(response =>
store.dispatch(getSearchResults(response))
)
}
break
default:
break
}

View File

@ -12,7 +12,7 @@ const currentDocument = (state={}, action) => {
return {}
}
case 'CREATE_DOCUMENT':
return {id: null, number: null, title: '', text: ''}
return {id: null, number: null, title: '', html_source: ''}
default:
return state
}

View File

@ -29,7 +29,7 @@ const editorState = (state=blankEditor, action) => {
switch(action.type) {
case 'GET_DOCUMENT_TEXT':
if (action.status === 'success' && action.state === 'currentDocument') {
const editorState = EditorState.createWithContent(stateFromHTML(action.data.text), decorator)
const editorState = EditorState.createWithContent(stateFromHTML(action.data.html_source), decorator)
return editorState
} else if (action.status === 'request' && action.state === 'currentDocument') {
return blankEditor

View File

@ -1,5 +1,9 @@
const searchResults = (state={}, action) => {
const searchResults = (state='', action) => {
switch(action.type) {
case 'GET_SEARCH_RESULTS':
if (action.status === 'success') {
return action.data
} else { return state }
default:
return state
}

View File

@ -20,3 +20,7 @@ $fa-font-path: "../../node_modules/font-awesome/fonts";
#joyce_search > div {
margin-top: 20px;
}
#search_input {
width: 100%;
}