From 37d495eeeb810127e0c6c62bff865c1ba66f45c8 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 3 Jun 2018 18:12:55 -0500 Subject: [PATCH] keyword mute: Store keywords as a list This has a couple of advantages over the regex approach: - Keywords are individually addressable, which makes it easier to gather statistics (#363) - Keywords can be individually applied to different feeds, e.g. skipping mentions (#454) It *does* end up creating many more Regexp objects. I'm not yet sure if the difference is significant. --- app/models/glitch/keyword_mute.rb | 78 +++++++++++++++++-------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 17ebc5b5e..e7cbbe617 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -33,68 +33,74 @@ class Glitch::KeywordMute < ApplicationRecord Rails.cache.delete(TagMatcher.cache_key(account_id)) end - class RegexpMatcher - attr_reader :account_id - attr_reader :regex + class CachedKeywordMute + attr_reader :keyword + attr_reader :whole_word - def initialize(account_id) - @account_id = account_id - regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text } - @regex = /#{regex_text}/ + def initialize(keyword, whole_word) + @keyword = keyword + @whole_word = whole_word end - protected - - def keywords - Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword) - end - - def boundary_regex_for_keyword(keyword) + def boundary_regex_for_keyword sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ end + + def matches?(str) + str =~ (whole_word ? boundary_regex_for_keyword : /#{keyword}/i) + end end - class TextMatcher < RegexpMatcher + class Matcher + attr_reader :account_id + attr_reader :words + + def initialize(account_id) + @account_id = account_id + @words = Rails.cache.fetch(self.class.cache_key(account_id)) { fetch_keywords } + end + + protected + + def fetch_keywords + Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword).map do |whole_word, keyword| + CachedKeywordMute.new(transform_keyword(keyword), whole_word) + end + end + + def transform_keyword(keyword) + keyword + end + end + + class TextMatcher < Matcher def self.cache_key(account_id) format('keyword_mutes:regex:text:%s', account_id) end def matches?(str) - !!(regex =~ str) - end - - private - - def make_regex_text - kws = keywords.map! do |whole_word, keyword| - whole_word ? boundary_regex_for_keyword(keyword) : /(?i:#{Regexp.escape(keyword)})/ - end - - Regexp.union(kws).source + words.any? { |w| w.matches?(str) } end end - class TagMatcher < RegexpMatcher + class TagMatcher < Matcher def self.cache_key(account_id) format('keyword_mutes:regex:tag:%s', account_id) end def matches?(tags) - tags.pluck(:name).any? { |n| regex =~ n } + tags.pluck(:name).any? do |n| + words.any? { |w| w.matches?(n) } + end end - private + protected - def make_regex_text - kws = keywords.map! do |whole_word, keyword| - term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword - whole_word ? boundary_regex_for_keyword(term) : term - end - - Regexp.union(kws).source + def transform_keyword(kw) + Tag::HASHTAG_RE =~ kw ? $1 : kw end end end