braintree · rkrage · Jun 6, 2024 · Jun 6, 2024 · Jun 6, 2024 · jcoleman
diff --git a/README.md b/README.md
@@ -457,34 +457,40 @@ safe_partman_reapply_privileges :table
 
 #### safely\_acquire\_lock\_for\_table
 
-Acquires a lock on a table using the following algorithm:
+Acquires a lock (in `ACCESS EXCLUSIVE` mode by default) on a table using the following algorithm:
 
 1. Verify that no long-running queries are using the table.
     - If long-running queries are currently using the table, sleep `PgHaMigrations::LOCK_TIMEOUT_SECONDS` and check again.
 2. If no long-running queries are currently using the table, optimistically attempt to lock the table (with a timeout of `PgHaMigrations::LOCK_TIMEOUT_SECONDS`).
     - If the lock is not acquired, sleep `PgHaMigrations::LOCK_FAILURE_RETRY_DELAY_MULTLIPLIER * PgHaMigrations::LOCK_TIMEOUT_SECONDS`, and start again at step 1.
 3. If the lock is acquired, proceed to run the given block.
 
-Safely acquire an access exclusive lock for a table.
-
 ```ruby
 safely_acquire_lock_for_table(:table) do
   ...
 end
 ```
 
-Safely acquire a lock for a table in a different mode.
+Safely acquire a lock on a table in `SHARE` mode.
 
 ```ruby
 safely_acquire_lock_for_table(:table, mode: :share) do
   ...
 end
 ```
 
+Safely acquire a lock on multiple tables in `EXCLUSIVE` mode.
+
+```ruby
+safely_acquire_lock_for_table(:table_a, :table_b, mode: :exclusive) do
+  ...
+end
+```
+
 Note:
 
-We enforce that only one table (or a table and its partitions) can be locked at a time.
-Attempting to acquire a nested lock on a different table will result in an error.
+We enforce that only one set of tables can be locked at a time.
+Attempting to acquire a nested lock on a different set of tables will result in an error.
 
 #### adjust\_lock\_timeout
 

diff --git a/lib/pg_ha_migrations/relation.rb b/lib/pg_ha_migrations/relation.rb
@@ -14,7 +14,7 @@ def initialize(name, schema, mode=nil)
     end
 
     def conflicts_with?(other)
-      self == other && (
+      self.eql?(other) && (
         mode.nil? || other.mode.nil? || mode.conflicts_with?(other.mode)
       )
     end
@@ -30,8 +30,12 @@ def present?
       name.present? && schema.present?
     end
 
-    def ==(other)
-      other.is_a?(Relation) && name == other.name && schema == other.schema
+    def eql?(other)
+      other.is_a?(Relation) && hash == other.hash
+    end
+
+    def hash
+      [name, schema].hash
     end
   end
 
@@ -152,4 +156,26 @@ def valid?
       SQL
     end
   end
+
+  class TableCollection < Set
+    def self.from_table_names(tables, mode=nil)
+      new(tables) { |table| Table.from_table_name(table, mode) }
+    end
+
+    def mode
+      first&.mode
+    end
+
+    def to_sql
+      map(&:fully_qualified_name).join(", ")
+    end
+
+    def with_partitions
+      tables = flat_map do |table|
+        table.partitions(include_sub_partitions: true, include_self: true)
+      end
+
+      self.class.new(tables)
+    end
+  end
 end
diff --git a/lib/pg_ha_migrations/safe_statements.rb b/lib/pg_ha_migrations/safe_statements.rb
@@ -3,6 +3,14 @@ def safe_added_columns_without_default_value
     @safe_added_columns_without_default_value ||= []
   end
 
+  # This variable is used to track nested lock acquisition.
+  # Each element is a PgHaMigrations::TableCollection object.
+  # The order of the array represents the current call stack,
+  # where the most recent method call is the last element.
+  def safely_acquire_lock_for_table_history
+    @safely_acquire_lock_for_table_history ||= []
+  end
+
   def safe_create_table(table, options={}, &block)
     if options[:force]
       raise PgHaMigrations::UnsafeMigrationError.new(":force is NOT SAFE! Explicitly call unsafe_drop_table first if you want to recreate an existing table")
@@ -528,40 +536,54 @@ def exec_migration(conn, direction)
     super(conn, direction)
   end
 
-  def safely_acquire_lock_for_table(table, mode: :access_exclusive, &block)
-    nested_target_table = Thread.current[__method__]
+  def safely_acquire_lock_for_table(*tables, mode: :access_exclusive, &block)
+    # So this variable is always available in the ensure block
+    successfully_acquired_lock = false
 
     _check_postgres_adapter!
 
-    target_table = PgHaMigrations::Table.from_table_name(table, mode)
+    target_tables = PgHaMigrations::TableCollection.from_table_names(tables, mode)
 
-    if nested_target_table
-      if nested_target_table != target_table
-        raise PgHaMigrations::InvalidMigrationError, "Nested lock detected! Cannot acquire lock on #{target_table.fully_qualified_name} while #{nested_target_table.fully_qualified_name} is locked."
-      elsif nested_target_table.mode < target_table.mode
-        raise PgHaMigrations::InvalidMigrationError, "Lock escalation detected! Cannot change lock level from :#{nested_target_table.mode} to :#{target_table.mode} for #{target_table.fully_qualified_name}."
-      end
-    else
-      Thread.current[__method__] = target_table
-    end
+    # Grab the latest locked tables from the call stack.
+    # This will be nil if we are not in a nested context.
+    nested_target_tables = safely_acquire_lock_for_table_history.last
 
-    # Locking a partitioned table will also lock child tables (including sub-partitions),
-    # so we need to check for blocking queries on those tables as well
-    target_tables = target_table.partitions(include_sub_partitions: true, include_self: true)
+    if nested_target_tables
+      if !target_tables.subset?(nested_target_tables)
+        raise PgHaMigrations::InvalidMigrationError,
+          "Nested lock detected! Cannot acquire lock on #{target_tables.to_sql} " \
+          "while #{nested_target_tables.to_sql} is locked."
+      end
 
-    successfully_acquired_lock = false
+      if nested_target_tables.mode < target_tables.mode
+        raise PgHaMigrations::InvalidMigrationError,
+          "Lock escalation detected! Cannot change lock level from :#{nested_target_tables.mode} " \
+          "to :#{target_tables.mode} for #{target_tables.to_sql}."
+      end
+    end
 
     until successfully_acquired_lock
-      while (
+      loop do
+        # If in a nested context and all of the above checks have passed,
+        # we have already acquired the lock so this check is unnecessary.
+        # In fact, it could actually cause a deadlock if a blocking query
+        # was executed shortly after the initial lock acquisition.
+        break if nested_target_tables
+
         blocking_transactions = PgHaMigrations::BlockingDatabaseTransactions.find_blocking_transactions("#{PgHaMigrations::LOCK_TIMEOUT_SECONDS} seconds")
-        blocking_transactions.any? do |query|
+
+        # Locking a partitioned table will also lock child tables (including sub-partitions),
+        # so we need to check for blocking queries on those tables as well
+        target_tables_with_partitions = target_tables.with_partitions
+
+        break unless blocking_transactions.any? do |query|
           query.tables_with_locks.any? do |locked_table|
-            target_tables.any? do |target_table|
+            target_tables_with_partitions.any? do |target_table|
               target_table.conflicts_with?(locked_table)
             end
           end
         end
-      )
+
         say "Waiting on blocking transactions:"
         blocking_transactions.each do |blocking_transaction|
           say blocking_transaction.description
@@ -570,16 +592,15 @@ def safely_acquire_lock_for_table(table, mode: :access_exclusive, &block)
       end
 
       connection.transaction do
-        adjust_timeout_method = connection.postgresql_version >= 9_03_00 ? :adjust_lock_timeout : :adjust_statement_timeout
         begin
-          method(adjust_timeout_method).call(PgHaMigrations::LOCK_TIMEOUT_SECONDS) do
-            connection.execute("LOCK #{target_table.fully_qualified_name} IN #{target_table.mode.to_sql} MODE;")
+          adjust_statement_timeout(PgHaMigrations::LOCK_TIMEOUT_SECONDS) do
+            connection.execute("LOCK #{target_tables.to_sql} IN #{target_tables.mode.to_sql} MODE;")
           end
           successfully_acquired_lock = true
         rescue ActiveRecord::StatementInvalid => e
-          if e.message =~ /PG::LockNotAvailable.+ lock timeout/ || e.message =~ /PG::QueryCanceled.+ statement timeout/
+          if e.message =~ /PG::QueryCanceled.+ statement timeout/
             sleep_seconds = PgHaMigrations::LOCK_FAILURE_RETRY_DELAY_MULTLIPLIER * PgHaMigrations::LOCK_TIMEOUT_SECONDS
-            say "Timed out trying to acquire #{target_table.mode.to_sql} lock on the #{target_table.fully_qualified_name} table."
+            say "Timed out trying to acquire #{target_tables.mode.to_sql} lock on #{target_tables.to_sql}."
             say "Sleeping for #{sleep_seconds}s to allow potentially queued up queries to finish before continuing."
             sleep(sleep_seconds)
 
@@ -590,12 +611,13 @@ def safely_acquire_lock_for_table(table, mode: :access_exclusive, &block)
         end
 
         if successfully_acquired_lock
+          safely_acquire_lock_for_table_history.push(target_tables)
           block.call
         end
       end
     end
   ensure
-    Thread.current[__method__] = nil unless nested_target_table
+    safely_acquire_lock_for_table_history.pop if successfully_acquired_lock
   end
 
   # since rails versions < 7.1 has bug which does not handle symbol for