forked from tildeverse/tilde.news
Search: drop sphinx for searching, use sql fulltext
Sphinx's searchd is very buggy, often crashing on rebuilding its indexes, not starting up, or just spinning at 100% CPU. It is also a big external dependency that might not be available on other hosting platforms. This removes the ability to search both comments and stories at once, though I don't know how useful that was anyway. We gain boolean searching and the ability to more carefully craft queries using particular keywords (in the future) like "user:blah" or "tag:blah" to narrow things down.
This commit is contained in:
parent
ac6b9f5963
commit
10cde11f83
5
Gemfile
5
Gemfile
|
@ -8,11 +8,6 @@ gem "mysql2", ">= 0.3.14"
|
|||
|
||||
# uncomment to use PostgreSQL
|
||||
# gem "pg"
|
||||
#
|
||||
# NOTE: If you use PostgreSQL, you must still leave enabled the above mysql2
|
||||
# gem for Sphinx full text search to function.
|
||||
|
||||
gem "thinking-sphinx", "~> 3.1.2"
|
||||
|
||||
gem "uglifier", ">= 1.3.0"
|
||||
gem "jquery-rails", "~> 3.1.3"
|
||||
|
|
13
Gemfile.lock
13
Gemfile.lock
|
@ -57,9 +57,6 @@ GEM
|
|||
activesupport (>= 4.1.0)
|
||||
htmlentities (4.3.4)
|
||||
i18n (0.8.1)
|
||||
innertube (1.1.0)
|
||||
joiner (0.3.4)
|
||||
activerecord (>= 4.1.0)
|
||||
jquery-rails (3.1.4)
|
||||
railties (>= 3.0, < 5.0)
|
||||
thor (>= 0.14, < 2.0)
|
||||
|
@ -69,7 +66,6 @@ GEM
|
|||
machinist (2.0)
|
||||
mail (2.6.4)
|
||||
mime-types (>= 1.16, < 4)
|
||||
middleware (0.1.0)
|
||||
mime-types (3.1)
|
||||
mime-types-data (~> 3.2015)
|
||||
mime-types-data (3.2016.0521)
|
||||
|
@ -108,7 +104,6 @@ GEM
|
|||
thor (>= 0.18.1, < 2.0)
|
||||
raindrops (0.13.0)
|
||||
rake (12.0.0)
|
||||
riddle (1.5.11)
|
||||
rotp (3.3.0)
|
||||
rqrcode (0.10.1)
|
||||
chunky_png (~> 1.0)
|
||||
|
@ -139,13 +134,6 @@ GEM
|
|||
activesupport (>= 4.0)
|
||||
sprockets (>= 3.0.0)
|
||||
sqlite3 (1.3.9)
|
||||
thinking-sphinx (3.1.2)
|
||||
activerecord (>= 3.1.0)
|
||||
builder (>= 2.1.2)
|
||||
innertube (>= 1.0.2)
|
||||
joiner (>= 0.2.0)
|
||||
middleware (>= 0.1.0)
|
||||
riddle (>= 1.5.11)
|
||||
thor (0.19.4)
|
||||
thread_safe (0.3.6)
|
||||
tzinfo (1.2.3)
|
||||
|
@ -179,7 +167,6 @@ DEPENDENCIES
|
|||
rqrcode
|
||||
rspec-rails (~> 3.5, >= 3.5.2)
|
||||
sqlite3
|
||||
thinking-sphinx (~> 3.1.2)
|
||||
uglifier (>= 1.3.0)
|
||||
unicorn
|
||||
|
||||
|
|
|
@ -19,12 +19,7 @@ class SearchController < ApplicationController
|
|||
end
|
||||
|
||||
if @search.valid?
|
||||
begin
|
||||
@search.search_for_user!(@user)
|
||||
rescue ThinkingSphinx::ConnectionError
|
||||
flash[:error] = "Sorry, but the search engine is currently out " <<
|
||||
"of order"
|
||||
end
|
||||
@search.search_for_user!(@user)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,12 +0,0 @@
|
|||
ThinkingSphinx::Index.define :comment, :with => :active_record do
|
||||
indexes comment
|
||||
indexes short_id
|
||||
indexes user.username, :as => :author
|
||||
|
||||
has Comment.score_sql, :as => :score, :type => :bigint, :sortable => true
|
||||
|
||||
has is_deleted
|
||||
has created_at
|
||||
|
||||
where sanitize_sql(:is_deleted => false, :is_moderated => false)
|
||||
end
|
|
@ -1,26 +0,0 @@
|
|||
ThinkingSphinx::Index.define :story, :with => :active_record do
|
||||
indexes description
|
||||
indexes short_id
|
||||
indexes tags(:tag), :as => :tags
|
||||
indexes title
|
||||
indexes url
|
||||
indexes user.username, :as => :author
|
||||
indexes story_cache
|
||||
|
||||
has created_at, :sortable => true
|
||||
has hotness, is_expired
|
||||
has Story.score_sql, :as => :score, :type => :bigint, :sortable => true
|
||||
|
||||
# opts[:with] = { :id => ... } doesn't seem to work when sphinx searches on
|
||||
# story_core, so give this column a different name to restrict on
|
||||
has id, :as => :story_id
|
||||
|
||||
set_property :field_weights => {
|
||||
:upvotes => 15,
|
||||
:title => 10,
|
||||
:story_cache => 10,
|
||||
:tags => 5,
|
||||
}
|
||||
|
||||
where sanitize_sql(:is_expired => false)
|
||||
end
|
|
@ -11,7 +11,7 @@ class Search
|
|||
|
||||
def initialize
|
||||
@q = ""
|
||||
@what = "all"
|
||||
@what = "stories"
|
||||
@order = "relevance"
|
||||
|
||||
@page = 1
|
||||
|
@ -22,7 +22,7 @@ class Search
|
|||
end
|
||||
|
||||
def max_matches
|
||||
ThinkingSphinx::Configuration.instance.settings["max_matches"] || 1000
|
||||
100
|
||||
end
|
||||
|
||||
def persisted?
|
||||
|
@ -44,19 +44,18 @@ class Search
|
|||
((total - 1) / self.per_page.to_i) + 1
|
||||
end
|
||||
|
||||
def search_for_user!(user)
|
||||
opts = {
|
||||
:ranker => :bm25,
|
||||
:page => [ self.page, self.page_count ].min,
|
||||
:per_page => self.per_page,
|
||||
:include => [ :story, :user ],
|
||||
}
|
||||
|
||||
if order == "newest"
|
||||
opts[:order] = "created_at DESC"
|
||||
elsif order == "points"
|
||||
opts[:order] = "score DESC"
|
||||
def what
|
||||
case @what
|
||||
when "comments"
|
||||
"comments"
|
||||
else
|
||||
"stories"
|
||||
end
|
||||
end
|
||||
|
||||
def search_for_user!(user)
|
||||
self.results = []
|
||||
self.total_results = 0
|
||||
|
||||
# extract domain query since it must be done separately
|
||||
domain = nil
|
||||
|
@ -66,77 +65,107 @@ class Search
|
|||
end
|
||||
}.join(" ")
|
||||
|
||||
if domain.present?
|
||||
self.what = "stories"
|
||||
begin
|
||||
reg = Regexp.new("//([^/]*\.)?#{domain}/")
|
||||
rescue RegexpError
|
||||
return false
|
||||
qwords = ActiveRecord::Base.connection.quote_string(words)
|
||||
|
||||
base = nil
|
||||
|
||||
case self.what
|
||||
when "stories"
|
||||
base = Story.unmerged.where(:is_expired => false).
|
||||
includes({ :taggings => :tag }, :user)
|
||||
|
||||
if domain.present?
|
||||
begin
|
||||
reg = Regexp.new("//([^/]*\.)?#{domain}/")
|
||||
base = base.where("`url` REGEXP '" +
|
||||
ActiveRecord::Base.connection.quote_string(reg.source) + "'")
|
||||
rescue RegexpError
|
||||
return false
|
||||
end
|
||||
end
|
||||
|
||||
story_ids = Story.select(:id).where("`url` REGEXP '" +
|
||||
ActiveRecord::Base.connection.quote_string(reg.source) + "'").
|
||||
collect(&:id)
|
||||
base.where!(
|
||||
"(MATCH(title) AGAINST('#{qwords}' IN BOOLEAN MODE) OR " +
|
||||
"MATCH(description) AGAINST('#{qwords}' IN BOOLEAN MODE) OR " +
|
||||
"MATCH(story_cache) AGAINST('#{qwords}' IN BOOLEAN MODE))"
|
||||
)
|
||||
|
||||
if story_ids.any?
|
||||
opts[:with] = { :story_id => story_ids }
|
||||
else
|
||||
self.results = []
|
||||
self.total_results = 0
|
||||
self.page = 0
|
||||
return false
|
||||
self.results = base.select(
|
||||
"stories.*, " +
|
||||
"MATCH(title) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_title, " +
|
||||
"MATCH(description) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_description, " +
|
||||
"MATCH(story_cache) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_story_cache"
|
||||
)
|
||||
|
||||
case self.order
|
||||
when "relevance"
|
||||
self.results.order!(
|
||||
"(rel_title * 2) DESC, " +
|
||||
"(rel_description * 1.5) DESC, " +
|
||||
"(rel_story_cache) DESC"
|
||||
)
|
||||
when "newest"
|
||||
self.results.order!("created_at DESC")
|
||||
when "points"
|
||||
self.results.order!("#{Story.score_sql} DESC")
|
||||
end
|
||||
|
||||
when "comments"
|
||||
base = Comment.active.where(
|
||||
"MATCH(comment) AGAINST('#{qwords}' IN BOOLEAN MODE)"
|
||||
).includes(:user, :story)
|
||||
|
||||
self.results = base.select(
|
||||
"comments.*, " +
|
||||
"MATCH(comment) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_comment"
|
||||
)
|
||||
|
||||
case self.order
|
||||
when "relevance"
|
||||
self.results.order!("rel_comment DESC")
|
||||
when "newest"
|
||||
self.results.order!("created_at DESC")
|
||||
when "points"
|
||||
self.results.order!("#{Comment.score_sql} DESC")
|
||||
end
|
||||
end
|
||||
|
||||
opts[:classes] = case what
|
||||
when "all"
|
||||
[ Story, Comment ]
|
||||
when "comments"
|
||||
[ Comment ]
|
||||
when "stories"
|
||||
[ Story ]
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
query = Riddle.escape(words)
|
||||
|
||||
# go go gadget search
|
||||
self.total_results = -1
|
||||
self.results = ThinkingSphinx.search query, opts
|
||||
self.total_results = self.results.total_entries
|
||||
self.total_results = base.count
|
||||
|
||||
if self.page > self.page_count
|
||||
self.page = self.page_count
|
||||
end
|
||||
if self.page < 1
|
||||
self.page = 1
|
||||
end
|
||||
|
||||
# bind votes for both types
|
||||
self.results = self.results.
|
||||
limit(self.per_page).
|
||||
offset((self.page - 1) * self.per_page)
|
||||
|
||||
if opts[:classes].include?(Comment) && user
|
||||
votes = Vote.comment_votes_by_user_for_comment_ids_hash(user.id,
|
||||
self.results.select{|r| r.class == Comment }.map{|c| c.id })
|
||||
# if a user is logged in, fetch their votes for what's on the page
|
||||
if user
|
||||
case what
|
||||
when "stories"
|
||||
votes = Vote.story_votes_by_user_for_story_ids_hash(user.id,
|
||||
self.results.map{|s| s.id })
|
||||
|
||||
self.results.each do |r|
|
||||
if r.class == Comment && votes[r.id]
|
||||
r.current_vote = votes[r.id]
|
||||
self.results.each do |r|
|
||||
if votes[r.id]
|
||||
r.vote = votes[r.id]
|
||||
end
|
||||
end
|
||||
|
||||
when "comments"
|
||||
votes = Vote.comment_votes_by_user_for_comment_ids_hash(user.id,
|
||||
self.results.map{|c| c.id })
|
||||
|
||||
self.results.each do |r|
|
||||
if votes[r.id]
|
||||
r.current_vote = votes[r.id]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if opts[:classes].include?(Story) && user
|
||||
votes = Vote.story_votes_by_user_for_story_ids_hash(user.id,
|
||||
self.results.select{|r| r.class == Story }.map{|s| s.id })
|
||||
|
||||
self.results.each do |r|
|
||||
if r.class == Story && votes[r.id]
|
||||
r.vote = votes[r.id]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
rescue ThinkingSphinx::ConnectionError => e
|
||||
self.results = []
|
||||
self.total_results = -1
|
||||
raise e
|
||||
end
|
||||
end
|
||||
|
|
|
@ -11,12 +11,7 @@
|
|||
</div>
|
||||
|
||||
<div class="boxline">
|
||||
<label class="required">Include:</label>
|
||||
|
||||
<%= radio_button_tag "what", "all", @search.what == "all" %>
|
||||
<label for="what_all" class="normal">All</label>
|
||||
|
||||
|
||||
<label class="required">Search:</label>
|
||||
|
||||
<%= radio_button_tag "what", "stories", @search.what == "stories" %>
|
||||
<label for="what_stories" class="normal">Stories</label>
|
||||
|
|
9
db/migrate/20170607170604_add_fulltext_indicies.rb
Normal file
9
db/migrate/20170607170604_add_fulltext_indicies.rb
Normal file
|
@ -0,0 +1,9 @@
|
|||
class AddFulltextIndicies < ActiveRecord::Migration
|
||||
def change
|
||||
add_index(:stories, :title, :type => :fulltext)
|
||||
add_index(:stories, :description, :type => :fulltext)
|
||||
add_index(:stories, :story_cache, :type => :fulltext)
|
||||
|
||||
add_index(:comments, :comment, :type => :fulltext)
|
||||
end
|
||||
end
|
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema.define(version: 20170607153224) do
|
||||
ActiveRecord::Schema.define(version: 20170607170604) do
|
||||
|
||||
create_table "comments", force: :cascade do |t|
|
||||
t.datetime "created_at", null: false
|
||||
|
@ -32,6 +32,7 @@ ActiveRecord::Schema.define(version: 20170607153224) do
|
|||
t.integer "hat_id", limit: 4
|
||||
end
|
||||
|
||||
add_index "comments", ["comment"], name: "index_comments_on_comment", type: :fulltext
|
||||
add_index "comments", ["confidence"], name: "confidence_idx", using: :btree
|
||||
add_index "comments", ["short_id"], name: "short_id", unique: true, using: :btree
|
||||
add_index "comments", ["story_id", "short_id"], name: "story_id_short_id", using: :btree
|
||||
|
@ -138,12 +139,15 @@ ActiveRecord::Schema.define(version: 20170607153224) do
|
|||
end
|
||||
|
||||
add_index "stories", ["created_at"], name: "index_stories_on_created_at", using: :btree
|
||||
add_index "stories", ["description"], name: "index_stories_on_description", type: :fulltext
|
||||
add_index "stories", ["hotness"], name: "hotness_idx", using: :btree
|
||||
add_index "stories", ["is_expired", "is_moderated"], name: "is_idxes", using: :btree
|
||||
add_index "stories", ["is_expired"], name: "index_stories_on_is_expired", using: :btree
|
||||
add_index "stories", ["is_moderated"], name: "index_stories_on_is_moderated", using: :btree
|
||||
add_index "stories", ["merged_story_id"], name: "index_stories_on_merged_story_id", using: :btree
|
||||
add_index "stories", ["short_id"], name: "unique_short_id", unique: true, using: :btree
|
||||
add_index "stories", ["story_cache"], name: "index_stories_on_story_cache", type: :fulltext
|
||||
add_index "stories", ["title"], name: "index_stories_on_title", type: :fulltext
|
||||
add_index "stories", ["twitter_id"], name: "index_stories_on_twitter_id", using: :btree
|
||||
add_index "stories", ["url"], name: "url", length: {"url"=>191}, using: :btree
|
||||
add_index "stories", ["user_id"], name: "index_stories_on_user_id", using: :btree
|
||||
|
|
Loading…
Reference in New Issue
Block a user