349 lines
9.3 KiB
Ruby
349 lines
9.3 KiB
Ruby
# typed: false
|
|
|
|
# results:
|
|
# not performed
|
|
# nothing found
|
|
# invalid: negative w/o positive term
|
|
# invalid: multiple domains
|
|
# invalid: unknown tag
|
|
# results
|
|
|
|
class Search
|
|
attr_reader :q, :what, :order, :page, :searcher
|
|
attr_reader :invalid_because, :parse_tree
|
|
|
|
# takes untrusted params from controller, so sanitize
|
|
def initialize params, user
|
|
@q = params[:q]
|
|
@parse_tree = if params[:q].present?
|
|
SearchParser.new.parse(params[:q])
|
|
else
|
|
[]
|
|
end
|
|
|
|
@what = if %w[stories comments].include? params[:what]
|
|
params[:what].to_sym
|
|
else
|
|
:comments
|
|
end
|
|
|
|
@order = if %w[newest relevance score].include? params[:order]
|
|
params[:order].to_sym
|
|
else
|
|
:newest
|
|
end
|
|
|
|
@page = params[:page].to_i
|
|
@page = 1 if @page == 0
|
|
|
|
@searcher = user
|
|
|
|
@results = nil
|
|
@results_count = params[:results_count] || -1
|
|
@invalid_because = nil
|
|
end
|
|
|
|
# returns @results so perform_* can return from calling this
|
|
def invalid reason
|
|
@invalid_because = reason
|
|
@results_count = 0
|
|
@results = searched_model.none
|
|
end
|
|
|
|
def max_matches
|
|
100
|
|
end
|
|
|
|
def per_page
|
|
20
|
|
end
|
|
|
|
def to_param
|
|
{
|
|
q: @q,
|
|
what: @what,
|
|
order: @order,
|
|
page: @page
|
|
}
|
|
end
|
|
|
|
def page_count
|
|
total = results_count.to_i
|
|
|
|
if total == -1 || total > max_matches
|
|
total = max_matches
|
|
end
|
|
|
|
((total - 1) / per_page.to_i) + 1
|
|
end
|
|
|
|
def perform!
|
|
return (@results = searched_model.none) if q.blank? || parse_tree.blank?
|
|
if what == :stories
|
|
perform_story_search!
|
|
else
|
|
perform_comment_search!
|
|
end
|
|
end
|
|
|
|
# security: must prevent sql injection
|
|
# it assumes SearchParser prevents "
|
|
def flatten_title tree
|
|
if tree.keys.first == :term
|
|
ActiveRecord::Base.connection.quote_string(tree.values.first.to_s)
|
|
elsif tree.keys.first == :quoted
|
|
'"' + tree.values.first.map(&:values).flatten.join(" ").gsub("'", "\\\\'") + '"'
|
|
end
|
|
end
|
|
|
|
# security: must prevent sql injection
|
|
# strip all nonword except -_' so people can search for contractions like "don't"
|
|
# some of these are search operators, some sql injection
|
|
# https://mariadb.com/kb/ru/full-text-index-overview/#in-boolean-mode
|
|
# surprise: + is not in \p{Punct}
|
|
def strip_operators s
|
|
s.to_s
|
|
.gsub(/[^\p{Word}']/, " ")
|
|
.gsub("'", "\\\\'")
|
|
.strip
|
|
end
|
|
|
|
# not security-sensitive, mariadb ignores 1 and 2 character terms and
|
|
# stripping them allows the frontend to explain they're ignored
|
|
def strip_short_terms(s)
|
|
s.to_s.gsub(/\b\p{Word}\p{Word}?\b/, "").strip
|
|
end
|
|
|
|
def perform_comment_search!
|
|
query = Comment.accessible_to_user(searcher).for_presentation
|
|
|
|
terms = []
|
|
n_commenters = 0
|
|
n_domains = 0
|
|
n_submitters = 0
|
|
n_tags = 0
|
|
tags = nil
|
|
url = false
|
|
title = false
|
|
|
|
# array of hashes, type => value(s)
|
|
parse_tree.each do |node|
|
|
type, value = node.first
|
|
case type
|
|
when :commenter, :user
|
|
n_commenters += 1
|
|
return invalid("A comment only has one commenter") if n_commenters > 1
|
|
query.joins!(:user).where!(users: {username: value.to_s})
|
|
when :domain
|
|
n_domains += 1
|
|
return invalid("A story can't be from multiple domains at once") if n_domains > 1
|
|
query.joins!(story: [:domain]).where!(story: {domains: {domain: value.to_s}})
|
|
when :submitter
|
|
n_submitters += 1
|
|
return invalid("A story only has one submitter") if n_submitters > 1
|
|
query.joins!(story: :user).where!(story: {users: {username: value.to_s}})
|
|
when :tag
|
|
n_tags += 1
|
|
tags ||= Tag.none.select(:id)
|
|
tags.or!(Tag.where(tag: value.to_s))
|
|
# TODO unknown tag
|
|
when :title
|
|
title = true
|
|
value = flatten_title value
|
|
query.where!(
|
|
story: Story.joins(:story_text).where(
|
|
"MATCH(story_texts.title) AGAINST ('+#{value}' in boolean mode)"
|
|
)
|
|
)
|
|
when :url
|
|
url = true
|
|
query
|
|
.joins!(:story)
|
|
.and!(
|
|
Story.where(url: value.to_s)
|
|
.or(Story.where(normalized_url: Utils.normalize_url(value.to_s)))
|
|
)
|
|
when :negated
|
|
# TODO
|
|
when :quoted
|
|
terms.append '"' + strip_operators(value) + '"'
|
|
when :term, :catchall
|
|
val = strip_short_terms(strip_operators(value))
|
|
# if punctuation is replaced with a space, this would generate a terms search
|
|
# AGAINST('+' in boolean mode)
|
|
terms.append val if !val.empty?
|
|
end
|
|
end
|
|
if terms.any?
|
|
terms_sql = <<~SQL.tr("\n", " ")
|
|
MATCH(comment)
|
|
AGAINST ('#{terms.map { |s| "+#{s}" }.join(", ")}' in boolean mode)
|
|
SQL
|
|
query.where! terms_sql
|
|
end
|
|
if tags
|
|
query.where!(
|
|
story: Story
|
|
.joins(:tags)
|
|
.where(tags: tags)
|
|
.group("stories.id")
|
|
.having("count(distinct tags.id) = #{n_tags}")
|
|
)
|
|
end
|
|
|
|
# don't allow blank searches for all records when strip_ removes all data
|
|
if n_commenters == 0 &&
|
|
n_domains == 0 &&
|
|
n_submitters == 0 &&
|
|
n_tags == 0 &&
|
|
!url &&
|
|
!title &&
|
|
terms.empty?
|
|
return invalid("No search terms recognized")
|
|
end
|
|
|
|
@results_count = query.dup.count
|
|
# with_tags uses group_by, so count returns a hash
|
|
@results_count = @results_count.count if @results_count.is_a? Hash
|
|
|
|
case order
|
|
when :newest
|
|
query.order!(id: :desc)
|
|
when :relevance
|
|
# relevance is undefined without search terms so sort by score
|
|
if terms.any?
|
|
query.order!(Arel.sql(terms_sql + " DESC"))
|
|
else
|
|
query.order!(score: :desc)
|
|
end
|
|
when :score
|
|
query.order!(score: :desc)
|
|
end
|
|
|
|
query.limit!(per_page)
|
|
query.offset!((page - 1) * per_page)
|
|
query
|
|
end
|
|
|
|
def perform_story_search!
|
|
query = Story.base(@user).for_presentation
|
|
|
|
terms = []
|
|
n_domains = 0
|
|
n_submitters = 0
|
|
n_tags = 0
|
|
tags = nil
|
|
url = false
|
|
title = false
|
|
|
|
# array of hashes, type => value(s)
|
|
parse_tree.each do |node|
|
|
type, value = node.first
|
|
case type
|
|
when :commenter
|
|
return invalid("Doesn't make sense to search Stories by commenter")
|
|
when :domain
|
|
n_domains += 1
|
|
return invalid("A story can't be from multiple domains at once") if n_domains > 1
|
|
query.joins!(:domain).where!(domains: {domain: value.to_s})
|
|
when :submitter, :user
|
|
n_submitters += 1
|
|
return invalid("A story only has one submitter") if n_submitters > 1
|
|
query.joins!(:user).where!(user: {username: value.to_s})
|
|
when :tag
|
|
n_tags += 1
|
|
tags ||= Tag.none.select(:id)
|
|
tags.or!(Tag.where(tag: value.to_s))
|
|
# TODO unknown tag
|
|
when :title
|
|
title = true
|
|
value = flatten_title value
|
|
query.joins!(:story_text).where!(
|
|
"MATCH(story_texts.title) AGAINST ('+#{value}' in boolean mode)"
|
|
)
|
|
when :url
|
|
url = true
|
|
query.and!(
|
|
Story.where(url: value.to_s)
|
|
.or(Story.where(normalized_url: Utils.normalize_url(value.to_s)))
|
|
)
|
|
when :negated
|
|
# TODO
|
|
when :quoted
|
|
terms.append '"' + strip_operators(value) + '"'
|
|
when :term, :catchall
|
|
val = strip_short_terms(strip_operators(value))
|
|
# if punctuation is replaced with a space, this would generate a terms search
|
|
# AGAINST('+' in boolean mode)
|
|
terms.append val if !val.empty?
|
|
end
|
|
end
|
|
if terms.any?
|
|
terms_sql = <<~SQL.tr("\n", " ")
|
|
MATCH(story_texts.title, story_texts.description, story_texts.body)
|
|
AGAINST ('#{terms.map { |s| "+#{s}" }.join(", ")}' in boolean mode)
|
|
SQL
|
|
query.joins!(:story_text).where! terms_sql
|
|
end
|
|
if tags
|
|
# This searches tags by subquery because otherwise Rails recognizes the join against tags and
|
|
# thinks the .tags association preload is satisfied, so returned stories will only have the
|
|
# searched-for tags.
|
|
query.joins!(<<~SQL.tr("\n", "")
|
|
inner join (
|
|
select stories.id
|
|
from stories
|
|
join taggings on taggings.story_id = stories.id
|
|
where taggings.tag_id in (#{tags.to_sql})
|
|
group by stories.id
|
|
having count(distinct taggings.id) = #{n_tags}
|
|
) as stories_with_tags on stories_with_tags.id = stories.id
|
|
SQL
|
|
)
|
|
end
|
|
|
|
# don't allow blank searches for all records when strip_ removes all data
|
|
if n_domains == 0 &&
|
|
n_submitters == 0 &&
|
|
n_tags == 0 &&
|
|
!url &&
|
|
!title &&
|
|
terms.empty?
|
|
return invalid("No search terms recognized")
|
|
end
|
|
|
|
@results_count = query.dup.count
|
|
|
|
case order
|
|
when :newest
|
|
query.order!(id: :desc)
|
|
when :relevance
|
|
# relevance is undefined without search terms so sort by score
|
|
if terms.any?
|
|
query.order!(Arel.sql(terms_sql + " desc"))
|
|
else
|
|
query.order!(score: :desc)
|
|
end
|
|
when :score
|
|
query.order!(score: :desc)
|
|
end
|
|
|
|
query.limit!(per_page)
|
|
query.offset!((page - 1) * per_page)
|
|
query
|
|
end
|
|
|
|
def results
|
|
@results ||= perform!
|
|
end
|
|
|
|
def results_count
|
|
perform! if @results.nil?
|
|
@results_count
|
|
end
|
|
|
|
def searched_model
|
|
(what == :stories) ? Story : Comment
|
|
end
|
|
end
|