Search: drop sphinx for searching, use sql fulltext

Sphinx's searchd is very buggy, often crashing on rebuilding its
indexes, not starting up, or just spinning at 100% CPU.

It is also a big external dependency that might not be available on
other hosting platforms.

This removes the ability to search both comments and stories at
once, though I don't know how useful that was anyway.  We gain
boolean searching and the ability to more carefully craft queries
using particular keywords (in the future) like "user:blah" or
"tag:blah" to narrow things down.
This commit is contained in:
joshua stein 2017-06-07 15:03:15 -05:00
parent ac6b9f5963
commit 10cde11f83
9 changed files with 115 additions and 139 deletions

View File

@ -8,11 +8,6 @@ gem "mysql2", ">= 0.3.14"
# uncomment to use PostgreSQL
# gem "pg"
#
# NOTE: If you use PostgreSQL, you must still leave enabled the above mysql2
# gem for Sphinx full text search to function.
gem "thinking-sphinx", "~> 3.1.2"
gem "uglifier", ">= 1.3.0"
gem "jquery-rails", "~> 3.1.3"

View File

@ -57,9 +57,6 @@ GEM
activesupport (>= 4.1.0)
htmlentities (4.3.4)
i18n (0.8.1)
innertube (1.1.0)
joiner (0.3.4)
activerecord (>= 4.1.0)
jquery-rails (3.1.4)
railties (>= 3.0, < 5.0)
thor (>= 0.14, < 2.0)
@ -69,7 +66,6 @@ GEM
machinist (2.0)
mail (2.6.4)
mime-types (>= 1.16, < 4)
middleware (0.1.0)
mime-types (3.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2016.0521)
@ -108,7 +104,6 @@ GEM
thor (>= 0.18.1, < 2.0)
raindrops (0.13.0)
rake (12.0.0)
riddle (1.5.11)
rotp (3.3.0)
rqrcode (0.10.1)
chunky_png (~> 1.0)
@ -139,13 +134,6 @@ GEM
activesupport (>= 4.0)
sprockets (>= 3.0.0)
sqlite3 (1.3.9)
thinking-sphinx (3.1.2)
activerecord (>= 3.1.0)
builder (>= 2.1.2)
innertube (>= 1.0.2)
joiner (>= 0.2.0)
middleware (>= 0.1.0)
riddle (>= 1.5.11)
thor (0.19.4)
thread_safe (0.3.6)
tzinfo (1.2.3)
@ -179,7 +167,6 @@ DEPENDENCIES
rqrcode
rspec-rails (~> 3.5, >= 3.5.2)
sqlite3
thinking-sphinx (~> 3.1.2)
uglifier (>= 1.3.0)
unicorn

View File

@ -19,12 +19,7 @@ class SearchController < ApplicationController
end
if @search.valid?
begin
@search.search_for_user!(@user)
rescue ThinkingSphinx::ConnectionError
flash[:error] = "Sorry, but the search engine is currently out " <<
"of order"
end
@search.search_for_user!(@user)
end
end

View File

@ -1,12 +0,0 @@
ThinkingSphinx::Index.define :comment, :with => :active_record do
indexes comment
indexes short_id
indexes user.username, :as => :author
has Comment.score_sql, :as => :score, :type => :bigint, :sortable => true
has is_deleted
has created_at
where sanitize_sql(:is_deleted => false, :is_moderated => false)
end

View File

@ -1,26 +0,0 @@
ThinkingSphinx::Index.define :story, :with => :active_record do
indexes description
indexes short_id
indexes tags(:tag), :as => :tags
indexes title
indexes url
indexes user.username, :as => :author
indexes story_cache
has created_at, :sortable => true
has hotness, is_expired
has Story.score_sql, :as => :score, :type => :bigint, :sortable => true
# opts[:with] = { :id => ... } doesn't seem to work when sphinx searches on
# story_core, so give this column a different name to restrict on
has id, :as => :story_id
set_property :field_weights => {
:upvotes => 15,
:title => 10,
:story_cache => 10,
:tags => 5,
}
where sanitize_sql(:is_expired => false)
end

View File

@ -11,7 +11,7 @@ class Search
def initialize
@q = ""
@what = "all"
@what = "stories"
@order = "relevance"
@page = 1
@ -22,7 +22,7 @@ class Search
end
def max_matches
ThinkingSphinx::Configuration.instance.settings["max_matches"] || 1000
100
end
def persisted?
@ -44,19 +44,18 @@ class Search
((total - 1) / self.per_page.to_i) + 1
end
def search_for_user!(user)
opts = {
:ranker => :bm25,
:page => [ self.page, self.page_count ].min,
:per_page => self.per_page,
:include => [ :story, :user ],
}
if order == "newest"
opts[:order] = "created_at DESC"
elsif order == "points"
opts[:order] = "score DESC"
def what
case @what
when "comments"
"comments"
else
"stories"
end
end
def search_for_user!(user)
self.results = []
self.total_results = 0
# extract domain query since it must be done separately
domain = nil
@ -66,77 +65,107 @@ class Search
end
}.join(" ")
if domain.present?
self.what = "stories"
begin
reg = Regexp.new("//([^/]*\.)?#{domain}/")
rescue RegexpError
return false
qwords = ActiveRecord::Base.connection.quote_string(words)
base = nil
case self.what
when "stories"
base = Story.unmerged.where(:is_expired => false).
includes({ :taggings => :tag }, :user)
if domain.present?
begin
reg = Regexp.new("//([^/]*\.)?#{domain}/")
base = base.where("`url` REGEXP '" +
ActiveRecord::Base.connection.quote_string(reg.source) + "'")
rescue RegexpError
return false
end
end
story_ids = Story.select(:id).where("`url` REGEXP '" +
ActiveRecord::Base.connection.quote_string(reg.source) + "'").
collect(&:id)
base.where!(
"(MATCH(title) AGAINST('#{qwords}' IN BOOLEAN MODE) OR " +
"MATCH(description) AGAINST('#{qwords}' IN BOOLEAN MODE) OR " +
"MATCH(story_cache) AGAINST('#{qwords}' IN BOOLEAN MODE))"
)
if story_ids.any?
opts[:with] = { :story_id => story_ids }
else
self.results = []
self.total_results = 0
self.page = 0
return false
self.results = base.select(
"stories.*, " +
"MATCH(title) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_title, " +
"MATCH(description) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_description, " +
"MATCH(story_cache) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_story_cache"
)
case self.order
when "relevance"
self.results.order!(
"(rel_title * 2) DESC, " +
"(rel_description * 1.5) DESC, " +
"(rel_story_cache) DESC"
)
when "newest"
self.results.order!("created_at DESC")
when "points"
self.results.order!("#{Story.score_sql} DESC")
end
when "comments"
base = Comment.active.where(
"MATCH(comment) AGAINST('#{qwords}' IN BOOLEAN MODE)"
).includes(:user, :story)
self.results = base.select(
"comments.*, " +
"MATCH(comment) AGAINST('#{qwords}' IN BOOLEAN MODE) AS rel_comment"
)
case self.order
when "relevance"
self.results.order!("rel_comment DESC")
when "newest"
self.results.order!("created_at DESC")
when "points"
self.results.order!("#{Comment.score_sql} DESC")
end
end
opts[:classes] = case what
when "all"
[ Story, Comment ]
when "comments"
[ Comment ]
when "stories"
[ Story ]
else
[]
end
query = Riddle.escape(words)
# go go gadget search
self.total_results = -1
self.results = ThinkingSphinx.search query, opts
self.total_results = self.results.total_entries
self.total_results = base.count
if self.page > self.page_count
self.page = self.page_count
end
if self.page < 1
self.page = 1
end
# bind votes for both types
self.results = self.results.
limit(self.per_page).
offset((self.page - 1) * self.per_page)
if opts[:classes].include?(Comment) && user
votes = Vote.comment_votes_by_user_for_comment_ids_hash(user.id,
self.results.select{|r| r.class == Comment }.map{|c| c.id })
# if a user is logged in, fetch their votes for what's on the page
if user
case what
when "stories"
votes = Vote.story_votes_by_user_for_story_ids_hash(user.id,
self.results.map{|s| s.id })
self.results.each do |r|
if r.class == Comment && votes[r.id]
r.current_vote = votes[r.id]
self.results.each do |r|
if votes[r.id]
r.vote = votes[r.id]
end
end
when "comments"
votes = Vote.comment_votes_by_user_for_comment_ids_hash(user.id,
self.results.map{|c| c.id })
self.results.each do |r|
if votes[r.id]
r.current_vote = votes[r.id]
end
end
end
end
if opts[:classes].include?(Story) && user
votes = Vote.story_votes_by_user_for_story_ids_hash(user.id,
self.results.select{|r| r.class == Story }.map{|s| s.id })
self.results.each do |r|
if r.class == Story && votes[r.id]
r.vote = votes[r.id]
end
end
end
rescue ThinkingSphinx::ConnectionError => e
self.results = []
self.total_results = -1
raise e
end
end

View File

@ -11,12 +11,7 @@
</div>
<div class="boxline">
<label class="required">Include:</label>
<%= radio_button_tag "what", "all", @search.what == "all" %>
<label for="what_all" class="normal">All</label>
&nbsp;
<label class="required">Search:</label>
<%= radio_button_tag "what", "stories", @search.what == "stories" %>
<label for="what_stories" class="normal">Stories</label>

View File

@ -0,0 +1,9 @@
class AddFulltextIndicies < ActiveRecord::Migration
def change
add_index(:stories, :title, :type => :fulltext)
add_index(:stories, :description, :type => :fulltext)
add_index(:stories, :story_cache, :type => :fulltext)
add_index(:comments, :comment, :type => :fulltext)
end
end

View File

@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20170607153224) do
ActiveRecord::Schema.define(version: 20170607170604) do
create_table "comments", force: :cascade do |t|
t.datetime "created_at", null: false
@ -32,6 +32,7 @@ ActiveRecord::Schema.define(version: 20170607153224) do
t.integer "hat_id", limit: 4
end
add_index "comments", ["comment"], name: "index_comments_on_comment", type: :fulltext
add_index "comments", ["confidence"], name: "confidence_idx", using: :btree
add_index "comments", ["short_id"], name: "short_id", unique: true, using: :btree
add_index "comments", ["story_id", "short_id"], name: "story_id_short_id", using: :btree
@ -138,12 +139,15 @@ ActiveRecord::Schema.define(version: 20170607153224) do
end
add_index "stories", ["created_at"], name: "index_stories_on_created_at", using: :btree
add_index "stories", ["description"], name: "index_stories_on_description", type: :fulltext
add_index "stories", ["hotness"], name: "hotness_idx", using: :btree
add_index "stories", ["is_expired", "is_moderated"], name: "is_idxes", using: :btree
add_index "stories", ["is_expired"], name: "index_stories_on_is_expired", using: :btree
add_index "stories", ["is_moderated"], name: "index_stories_on_is_moderated", using: :btree
add_index "stories", ["merged_story_id"], name: "index_stories_on_merged_story_id", using: :btree
add_index "stories", ["short_id"], name: "unique_short_id", unique: true, using: :btree
add_index "stories", ["story_cache"], name: "index_stories_on_story_cache", type: :fulltext
add_index "stories", ["title"], name: "index_stories_on_title", type: :fulltext
add_index "stories", ["twitter_id"], name: "index_stories_on_twitter_id", using: :btree
add_index "stories", ["url"], name: "url", length: {"url"=>191}, using: :btree
add_index "stories", ["user_id"], name: "index_stories_on_user_id", using: :btree