From b1eeb47d1800e5220118631641e271dc979fda86 Mon Sep 17 00:00:00 2001 From: schcamille <97245253+schcamille@users.noreply.github.com> Date: Tue, 18 Apr 2023 10:44:23 +0200 Subject: [PATCH] Improve the performance of intercept_load (#5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Don’t raise an exception in dry run when the safe type symbol isn't found trusted. * Don’t process symbol table diff for gems, as SAR ignore SQL statements made in gems * Don’t process symbol table diff for already processed "require" code paths * Small performance improvement in the symbol table diff operation * Small fix when using "require_relative" with .so files --- README.md | 2 +- lib/safe_active_record/load.rb | 61 ++++++++++++++++++-- lib/safe_active_record/safe_query_manager.rb | 4 ++ lib/safe_active_record/safe_type.rb | 8 ++- spec/load_spec.rb | 8 +++ spec/loaded_6.so | 0 6 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 spec/loaded_6.so diff --git a/README.md b/README.md index 94c8e5c..42b1a57 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ The method takes in a hash into which a few options can be passed: * `safe_query_mode`: `:strict` or `:lax`; `:lax` mode allows usage of `RiskilyAssumeTrustedString` type. * `dry_run`: true/false, default to false; when set to true, only warnings - will be emitted but otherwise an exception will be raised when unconforming + will be emitted, an exception will not be raised when unconforming types are passed in. * `intercept_load`: true/false, default to false; when set to true, `require`/`load`/`require_relative` will be intercepted in order to diff --git a/lib/safe_active_record/load.rb b/lib/safe_active_record/load.rb index 1b54a9d..81f0f07 100644 --- a/lib/safe_active_record/load.rb +++ b/lib/safe_active_record/load.rb @@ -15,8 +15,48 @@ # limitations under the License. require 'safe_active_record/safe_query_manager' +require 'set' module SafeActiveRecord + @visited = Set.new + @gemlist = Set.new + + def self.init_visited + if $LOADED_FEATURES.is_a? Enumerable + @visited.merge($LOADED_FEATURES) + # Also merge the relative imported paths + $LOADED_FEATURES.each do |path| + # Add the relative path to the visited set, removing the base path from LOAD_PATH and the file extension + $LOAD_PATH.each { |start_path| @visited.add(path[start_path.size + 1..-4]) if path.start_with?(start_path) } + end + end + + # Usually gem path starts with the gem name, exceptions incl. google gems + # TODO: find a better way to do this without hardcoding specific use-cases + @gemlist.merge(Gem::Specification.map { |g| g.name.start_with?('google-') ? 'google' : g.name }) + end + + def self.skip_symbols_diff(args, method) + if args.instance_of?(Array) && args.size == 1 && args[0].instance_of?(String) + # Load is mainly used for development to reload code without restarting the app + return true if @visited.include?(args[0]) && method == 'require' + + # We don't need to process gems as SAR ignore SQL statements made in them + return true if args[0].start_with?(Gem.dir) + + start_path = args[0].split('/', 2)[0] + return true if !start_path.nil? && !start_path.empty? && @gemlist.include?(start_path) + end + false + end + + def self.visit(args, method) + return if method == 'load' + return unless args.instance_of?(Array) && args.size == 1 && args[0].instance_of?(String) + + @visited.add(args[0]) + end + def self.apply_load_patch(safe_query_mgr) # intercept_load should not be used in production if defined?(::Rails.env) && defined?(::Rails.logger.warn) && ::Rails.env.production? @@ -34,13 +74,24 @@ def self.apply_load_patch(safe_query_mgr) undef_method original_method define_method original_method do |*args| - if safe_query_mgr.activated? && safe_query_mgr.intercept_load? + if safe_query_mgr.activated? && safe_query_mgr.intercept_load? && !SafeActiveRecord.skip_symbols_diff(args, original_method.to_s) pre_symbols = Symbol.all_symbols result = method(:"safe_ar_original_#{original_method}").call(*args) - delta = Symbol.all_symbols - pre_symbols - safe_query_mgr.add_safe_queries delta + if result + post_symbols = Symbol.all_symbols + # If the last symbol of the pre table is the same in the post table, this means only additions happened + # (no symbol deletion), so we can get the delta by slicing the post-table using the pre-table size as lower + # boundary + delta = if post_symbols[pre_symbols.size - 1] == pre_symbols[-1] + post_symbols[pre_symbols.size...] + else + post_symbols - pre_symbols + end + safe_query_mgr.add_safe_queries delta if delta.is_a? Enumerable + end + SafeActiveRecord.visit(args, original_method.to_s) else result = method(:"safe_ar_original_#{original_method}").call(*args) end @@ -53,7 +104,7 @@ def self.apply_load_patch(safe_query_mgr) apply_patch.call(:require) apply_patch.call(:load) - # Kernel.reqiure_relative needs special treatment due to relative path rebase. + # Kernel.require_relative needs special treatment due to relative path rebase. unless Kernel.private_method_defined? :safe_ar_original_require_relative Kernel.module_eval do alias_method :safe_ar_original_require_relative, :require_relative @@ -69,7 +120,9 @@ def self.apply_load_patch(safe_query_mgr) # No need to calculate the delta of symbol since `require` is already # decorated. + # Both .rb and .so files can be imported. C extentions use .so files for example. abspath = "#{abspath}.rb" unless File.exist?(abspath) + abspath = "#{abspath[...-3]}.so" unless File.exist?(abspath) require(File.realpath(abspath)) end end diff --git a/lib/safe_active_record/safe_query_manager.rb b/lib/safe_active_record/safe_query_manager.rb index 158abd2..c490058 100644 --- a/lib/safe_active_record/safe_query_manager.rb +++ b/lib/safe_active_record/safe_query_manager.rb @@ -63,6 +63,10 @@ def activate!(options = {}) end + # Add the current loaded dependencies to the visited global set to improve + # intercept load performance + SafeActiveRecord.init_visited if @options[:intercept_load] + add_safe_queries Symbol.all_symbols @safe_queries.freeze unless @options[:intercept_load] diff --git a/lib/safe_active_record/safe_type.rb b/lib/safe_active_record/safe_type.rb index 7740749..40f9def 100644 --- a/lib/safe_active_record/safe_type.rb +++ b/lib/safe_active_record/safe_type.rb @@ -28,8 +28,12 @@ def initialize(str) raise StandardErrors, "SafeQueryManager must be activated before instantiation of #{self.class.name}!" unless mgr.activated? unless mgr.safe_query?(str) - raise ArgumentError, "The symbol isn't found trusted. It could be that it is created dynamically from a string," \ - "or the source where the caller belongs to isn't eager loaded." + exception = ArgumentError.new( + "The symbol isn't found trusted when calling `#{caller_locations.first}`. It could be that it is created " \ + "dynamically from a string, or the source where the caller belongs to isn't eager loaded." + ) + SafeActiveRecord.report_violation(exception) + raise exception unless mgr.dry_run? end @raw_str = str.to_s diff --git a/spec/load_spec.rb b/spec/load_spec.rb index 85ae1f8..5789e1c 100644 --- a/spec/load_spec.rb +++ b/spec/load_spec.rb @@ -89,6 +89,14 @@ def remove_load_patch expect(mgr.safe_query?('relative load static query 1'.to_sym)).to be true end + it 'allows loading relative .so files' do + mgr = SafeActiveRecord::SafeQueryManager.new + SafeActiveRecord.apply_load_patch(mgr) + mgr.activate!({ intercept_load: true }) + + expect { require_relative('loaded_6') }.to raise_error(LoadError) + end + after(:each) do remove_load_patch end diff --git a/spec/loaded_6.so b/spec/loaded_6.so new file mode 100644 index 0000000..e69de29