From 9915d11c0d7a15b6775af8e78fcc4d836368f88d Mon Sep 17 00:00:00 2001
From: Eugen Rochko <eugen@zeonfederated.com>
Date: Tue, 22 Dec 2020 17:13:55 +0100
Subject: [PATCH] Fix unnecessary queries when batch-removing statuses, 100x
 faster (#15387)

---
 app/models/favourite.rb                       |   2 +-
 app/models/status.rb                          |  12 +-
 app/services/batched_remove_status_service.rb |  91 +++++++------
 app/services/delete_account_service.rb        | 128 ++++++++++++++----
 config/initializers/chewy.rb                  |   5 +
 lib/chewy/strategy/custom_sidekiq.rb          |  25 +---
 .../batched_remove_status_service_spec.rb     |   5 +
 7 files changed, 168 insertions(+), 100 deletions(-)

diff --git a/app/models/favourite.rb b/app/models/favourite.rb
index bf0ec4449..35028b7dd 100644
--- a/app/models/favourite.rb
+++ b/app/models/favourite.rb
@@ -36,7 +36,7 @@ class Favourite < ApplicationRecord
   end
 
   def decrement_cache_counters
-    return if association(:status).loaded? && (status.marked_for_destruction? || status.marked_for_mass_destruction?)
+    return if association(:status).loaded? && status.marked_for_destruction?
     status&.decrement_count!(:favourites_count)
   end
 end
diff --git a/app/models/status.rb b/app/models/status.rb
index 96d90e1c2..b426f9d5b 100644
--- a/app/models/status.rb
+++ b/app/models/status.rb
@@ -228,14 +228,6 @@ class Status < ApplicationRecord
     @emojis = CustomEmoji.from_text(fields.join(' '), account.domain)
   end
 
-  def mark_for_mass_destruction!
-    @marked_for_mass_destruction = true
-  end
-
-  def marked_for_mass_destruction?
-    @marked_for_mass_destruction
-  end
-
   def replies_count
     status_stat&.replies_count || 0
   end
@@ -430,7 +422,7 @@ class Status < ApplicationRecord
   end
 
   def decrement_counter_caches
-    return if direct_visibility? || marked_for_mass_destruction?
+    return if direct_visibility?
 
     account&.decrement_count!(:statuses_count)
     reblog&.decrement_count!(:reblogs_count) if reblog?
@@ -440,7 +432,7 @@ class Status < ApplicationRecord
   def unlink_from_conversations
     return unless direct_visibility?
 
-    mentioned_accounts = mentions.includes(:account).map(&:account)
+    mentioned_accounts = (association(:mentions).loaded? ? mentions : mentions.includes(:account)).map(&:account)
     inbox_owners       = mentioned_accounts.select(&:local?) + (account.local? ? [account] : [])
 
     inbox_owners.each do |inbox_owner|
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
index 28e5468b3..63ab89f2d 100644
--- a/app/services/batched_remove_status_service.rb
+++ b/app/services/batched_remove_status_service.rb
@@ -3,29 +3,45 @@
 class BatchedRemoveStatusService < BaseService
   include Redisable
 
-  # Delete given statuses and reblogs of them
-  # Remove statuses from home feeds
-  # Push delete events to streaming API for home feeds and public feeds
-  # @param [Enumerable<Status>] statuses A preferably batched array of statuses
+  # Delete multiple statuses and reblogs of them as efficiently as possible
+  # @param [Enumerable<Status>] statuses An array of statuses
   # @param [Hash] options
-  # @option [Boolean] :skip_side_effects
+  # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
   def call(statuses, **options)
-    statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a }
+    ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account])
 
-    @mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a }
-    @tags     = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) }
+    statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
 
-    @json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
+    # The conversations for direct visibility statuses also need
+    # to be manually updated. This part is not efficient but we
+    # rely on direct visibility statuses being relatively rare.
+    statuses_with_account_conversations = statuses.select(&:direct_visibility?)
 
-    statuses.each do |status|
-      status.mark_for_mass_destruction!
-      status.destroy
+    ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account])
+
+    statuses_with_account_conversations.each do |status|
+      status.send(:unlink_from_conversations)
     end
 
+    # We do not batch all deletes into one to avoid having a long-running
+    # transaction lock the database, but we use the delete method instead
+    # of destroy to avoid all callbacks. We rely on foreign keys to
+    # cascade the delete faster without loading the associations.
+    statuses_and_reblogs.each(&:delete)
+
+    # Since we skipped all callbacks, we also need to manually
+    # deindex the statuses
+    Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs)
+
     return if options[:skip_side_effects]
 
+    ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags)
+
+    @tags          = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } }
+    @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
+
     # Batch by source account
-    statuses.group_by(&:account_id).each_value do |account_statuses|
+    statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
       account = account_statuses.first.account
 
       next unless account
@@ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService
     end
 
     # Cannot be batched
-    statuses.each do |status|
-      unpush_from_public_timelines(status)
+    redis.pipelined do
+      statuses_and_reblogs.each do |status|
+        unpush_from_public_timelines(status)
+      end
     end
   end
 
   private
 
   def unpush_from_home_timelines(account, statuses)
-    recipients = account.followers_for_local_distribution.to_a
-
-    recipients << account if account.local?
-
-    recipients.each do |follower|
+    account.followers_for_local_distribution.includes(:user).find_each do |follower|
       statuses.each do |status|
         FeedManager.instance.unpush_from_home(follower, status)
       end
     end
+
+    return unless account.local?
+
+    statuses.each do |status|
+      FeedManager.instance.unpush_from_home(account, status)
+    end
   end
 
   def unpush_from_list_timelines(account, statuses)
-    account.lists_for_local_distribution.select(:id, :account_id).each do |list|
+    account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list|
       statuses.each do |status|
         FeedManager.instance.unpush_from_list(list, status)
       end
@@ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService
 
     payload = @json_payloads[status.id]
 
-    redis.pipelined do
-      redis.publish('timeline:public', payload)
-      if status.local?
-        redis.publish('timeline:public:local', payload)
-      else
-        redis.publish('timeline:public:remote', payload)
-      end
-      if status.media_attachments.any?
-        redis.publish('timeline:public:media', payload)
-        if status.local?
-          redis.publish('timeline:public:local:media', payload)
-        else
-          redis.publish('timeline:public:remote:media', payload)
-        end
-      end
+    redis.publish('timeline:public', payload)
+    redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
 
-      @tags[status.id].each do |hashtag|
-        redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload)
-        redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local?
-      end
+    if status.media_attachments.any?
+      redis.publish('timeline:public:media', payload)
+      redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
+    end
+
+    @tags[status.id].each do |hashtag|
+      redis.publish("timeline:hashtag:#{hashtag}", payload)
+      redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
     end
   end
 end
diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb
index fa834e775..5123a4697 100644
--- a/app/services/delete_account_service.rb
+++ b/app/services/delete_account_service.rb
@@ -6,15 +6,21 @@ class DeleteAccountService < BaseService
   ASSOCIATIONS_ON_SUSPEND = %w(
     account_pins
     active_relationships
+    aliases
     block_relationships
     blocked_by_relationships
+    bookmarks
     conversation_mutes
     conversations
     custom_filters
+    devices
     domain_blocks
     favourites
+    featured_tags
     follow_requests
+    identity_proofs
     list_accounts
+    migrations
     mute_relationships
     muted_by_relationships
     notifications
@@ -25,6 +31,29 @@ class DeleteAccountService < BaseService
     status_pins
   ).freeze
 
+  # The following associations have no important side-effects
+  # in callbacks and all of their own associations are secured
+  # by foreign keys, making them safe to delete without loading
+  # into memory
+  ASSOCIATIONS_WITHOUT_SIDE_EFFECTS = %w(
+    account_pins
+    aliases
+    conversation_mutes
+    conversations
+    custom_filters
+    devices
+    domain_blocks
+    featured_tags
+    follow_requests
+    identity_proofs
+    migrations
+    mute_relationships
+    muted_by_relationships
+    notifications
+    scheduled_statuses
+    status_pins
+  )
+
   ASSOCIATIONS_ON_DESTROY = %w(
     reports
     targeted_moderation_notes
@@ -55,19 +84,25 @@ class DeleteAccountService < BaseService
 
     @options[:skip_activitypub] = true if @options[:skip_side_effects]
 
-    reject_follows!
-    undo_follows!
-    purge_user!
-    purge_profile!
+    distribute_activities!
     purge_content!
     fulfill_deletion_request!
   end
 
   private
 
-  def reject_follows!
-    return if @account.local? || !@account.activitypub? || @options[:skip_activitypub]
+  def distribute_activities!
+    return if skip_activitypub?
 
+    if @account.local?
+      delete_actor!
+    elsif @account.activitypub?
+      reject_follows!
+      undo_follows!
+    end
+  end
+
+  def reject_follows!
     # When deleting a remote account, the account obviously doesn't
     # actually become deleted on its origin server, i.e. unlike a
     # locally deleted account it continues to have access to its home
@@ -81,8 +116,6 @@ class DeleteAccountService < BaseService
   end
 
   def undo_follows!
-    return if @account.local? || !@account.activitypub? || @options[:skip_activitypub]
-
     # When deleting a remote account, the account obviously doesn't
     # actually become deleted on its origin server, but following relationships
     # are severed on our end. Therefore, make the remote server aware that the
@@ -97,7 +130,7 @@ class DeleteAccountService < BaseService
   def purge_user!
     return if !@account.local? || @account.user.nil?
 
-    if @options[:reserve_email]
+    if keep_user_record?
       @account.user.disable!
       @account.user.invites.where(uses: 0).destroy_all
     else
@@ -106,34 +139,52 @@ class DeleteAccountService < BaseService
   end
 
   def purge_content!
-    distribute_delete_actor! if @account.local? && !@options[:skip_side_effects]
+    purge_user!
+    purge_profile!
+    purge_statuses!
+    purge_media_attachments!
+    purge_polls!
+    purge_generated_notifications!
+    purge_other_associations!
 
+    @account.destroy unless keep_account_record?
+  end
+
+  def purge_statuses!
     @account.statuses.reorder(nil).find_in_batches do |statuses|
-      statuses.reject! { |status| reported_status_ids.include?(status.id) } if @options[:reserve_username]
-      BatchedRemoveStatusService.new.call(statuses, skip_side_effects: @options[:skip_side_effects])
-    end
+      statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record?
 
+      BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?)
+    end
+  end
+
+  def purge_media_attachments!
     @account.media_attachments.reorder(nil).find_each do |media_attachment|
-      next if @options[:reserve_username] && reported_status_ids.include?(media_attachment.status_id)
+      next if keep_account_record? && reported_status_ids.include?(media_attachment.status_id)
 
       media_attachment.destroy
     end
+  end
 
+  def purge_polls!
     @account.polls.reorder(nil).find_each do |poll|
-      next if @options[:reserve_username] && reported_status_ids.include?(poll.status_id)
+      next if keep_account_record? && reported_status_ids.include?(poll.status_id)
 
-      # We can safely delete the poll rather than destroy it, as any non-reported
-      # status should have been deleted already, as long as we take care of
-      # notifications.
-      Notification.where(poll: poll).delete_all
       poll.delete
     end
+  end
 
+  def purge_generated_notifications!
+    # By deleting polls and statuses without callbacks, we've left behind
+    # polymorphically associated notifications generated by this account
+
+    Notification.where(from_account: @account).in_batches.delete_all
+  end
+
+  def purge_other_associations!
     associations_for_destruction.each do |association_name|
-      destroy_all(@account.public_send(association_name))
+      purge_association(association_name)
     end
-
-    @account.destroy unless @options[:reserve_username]
   end
 
   def purge_profile!
@@ -141,7 +192,7 @@ class DeleteAccountService < BaseService
     # there is no point wasting time updating
     # its values first
 
-    return unless @options[:reserve_username]
+    return unless keep_account_record?
 
     @account.silenced_at       = nil
     @account.suspended_at      = @options[:suspended_at] || Time.now.utc
@@ -156,6 +207,7 @@ class DeleteAccountService < BaseService
     @account.followers_count   = 0
     @account.following_count   = 0
     @account.moved_to_account  = nil
+    @account.also_known_as     = []
     @account.trust_level       = :untrusted
     @account.avatar.destroy
     @account.header.destroy
@@ -166,11 +218,17 @@ class DeleteAccountService < BaseService
     @account.deletion_request&.destroy
   end
 
-  def destroy_all(association)
-    association.in_batches.destroy_all
+  def purge_association(association_name)
+    association = @account.public_send(association_name)
+
+    if ASSOCIATIONS_WITHOUT_SIDE_EFFECTS.include?(association_name)
+      association.in_batches.delete_all
+    else
+      association.in_batches.destroy_all
+    end
   end
 
-  def distribute_delete_actor!
+  def delete_actor!
     ActivityPub::DeliveryWorker.push_bulk(delivery_inboxes) do |inbox_url|
       [delete_actor_json, @account.id, inbox_url]
     end
@@ -197,10 +255,26 @@ class DeleteAccountService < BaseService
   end
 
   def associations_for_destruction
-    if @options[:reserve_username]
+    if keep_account_record?
       ASSOCIATIONS_ON_SUSPEND
     else
       ASSOCIATIONS_ON_SUSPEND + ASSOCIATIONS_ON_DESTROY
     end
   end
+
+  def keep_user_record?
+    @options[:reserve_email]
+  end
+
+  def keep_account_record?
+    @options[:reserve_username]
+  end
+
+  def skip_side_effects?
+    @options[:skip_side_effects]
+  end
+
+  def skip_activitypub?
+    @options[:skip_activitypub]
+  end
 end
diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb
index 8f54abf77..9fc9b2f1a 100644
--- a/config/initializers/chewy.rb
+++ b/config/initializers/chewy.rb
@@ -12,6 +12,10 @@ Chewy.settings = {
   sidekiq: { queue: 'pull' },
 }
 
+# We use our own async strategy even outside the request-response
+# cycle, which takes care of checking if ElasticSearch is enabled
+# or not. However, mind that for the Rails console, the :urgent
+# strategy is set automatically with no way to override it.
 Chewy.root_strategy              = :custom_sidekiq
 Chewy.request_strategy           = :custom_sidekiq
 Chewy.use_after_commit_callbacks = false
@@ -37,6 +41,7 @@ Elasticsearch::Transport::Client.prepend Module.new {
     super arguments
   end
 }
+
 Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
   def create(arguments = {})
     arguments[:include_type_name] = true
diff --git a/lib/chewy/strategy/custom_sidekiq.rb b/lib/chewy/strategy/custom_sidekiq.rb
index 3e54326ba..794ae4ed4 100644
--- a/lib/chewy/strategy/custom_sidekiq.rb
+++ b/lib/chewy/strategy/custom_sidekiq.rb
@@ -2,29 +2,10 @@
 
 module Chewy
   class Strategy
-    class CustomSidekiq < Base
-      class Worker
-        include ::Sidekiq::Worker
-
-        sidekiq_options queue: 'pull'
-
-        def perform(type, ids, options = {})
-          options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async
-          type.constantize.import!(ids, options)
-        end
+    class CustomSidekiq < Sidekiq
+      def update(_type, _objects, _options = {})
+        super if Chewy.enabled?
       end
-
-      def update(type, objects, _options = {})
-        return unless Chewy.enabled?
-
-        ids = type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)
-
-        return if ids.empty?
-
-        Worker.perform_async(type.name, ids)
-      end
-
-      def leave; end
     end
   end
 end
diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb
index f84256f18..239859f06 100644
--- a/spec/services/batched_remove_status_service_spec.rb
+++ b/spec/services/batched_remove_status_service_spec.rb
@@ -26,6 +26,11 @@ RSpec.describe BatchedRemoveStatusService, type: :service do
     subject.call([status1, status2])
   end
 
+  it 'removes statuses' do
+    expect { Status.find(status1.id) }.to raise_error ActiveRecord::RecordNotFound
+    expect { Status.find(status2.id) }.to raise_error ActiveRecord::RecordNotFound
+  end
+
   it 'removes statuses from author\'s home feed' do
     expect(HomeFeed.new(alice).get(10)).to_not include([status1.id, status2.id])
   end