Skip to content

Commit

Permalink
added setting dictionary_file_max_bytes to config the maximum bytes…
Browse files Browse the repository at this point in the history
… size

of the yaml file in `dictionary_path` to overcome the 3MB size limit from
SnakeYaml 1.33

Fixed: #96
  • Loading branch information
kaisecheng committed May 9, 2023
1 parent 6590dce commit 7ef4b48
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 15 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 3.4.1
- Fix the limitation of the size of yaml file that exceeds 3MB

## 3.4.0
- Refactor: leverage scheduler mixin [#93](https://github.com/logstash-plugins/logstash-filter-translate/pull/93)

Expand Down
10 changes: 5 additions & 5 deletions lib/logstash/filters/dictionary/file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ class File

include LogStash::Util::Loggable

def self.create(path, refresh_interval, refresh_behaviour, exact, regex)
def self.create(path, refresh_interval, refresh_behaviour, exact, regex, params)
if /\.y[a]?ml$/.match(path)
instance = YamlFile.new(path, refresh_interval, exact, regex)
instance = YamlFile.new(path, refresh_interval, exact, regex, params["dictionary_file_max_bytes"])
elsif path.end_with?(".json")
instance = JsonFile.new(path, refresh_interval, exact, regex)
elsif path.end_with?(".csv")
Expand All @@ -31,15 +31,15 @@ def self.create(path, refresh_interval, refresh_behaviour, exact, regex)

attr_reader :dictionary, :fetch_strategy

def initialize(path, refresh_interval, exact, regex)
def initialize(path, refresh_interval, exact, regex, file_max_bytes = nil)
@dictionary_path = path
@refresh_interval = refresh_interval
@short_refresh = @refresh_interval <= 300
rw_lock = java.util.concurrent.locks.ReentrantReadWriteLock.new
@write_lock = rw_lock.writeLock
@dictionary = Hash.new
@update_method = method(:merge_dictionary)
initialize_for_file_type
initialize_for_file_type(file_max_bytes)
args = [@dictionary, rw_lock]
klass = case
when exact && regex then FetchStrategy::File::ExactRegex
Expand Down Expand Up @@ -68,7 +68,7 @@ def set_update_strategy(method_sym)

protected

def initialize_for_file_type
def initialize_for_file_type(file_max_bytes)
# sub class specific initializer
end

Expand Down
3 changes: 0 additions & 3 deletions lib/logstash/filters/dictionary/json_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ class JsonFile < File

protected

def initialize_for_file_type
end

def read_file_into_dictionary
content = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
@dictionary.update(LogStash::Json.load(content)) unless content.nil? || content.empty?
Expand Down
12 changes: 7 additions & 5 deletions lib/logstash/filters/dictionary/yaml_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,20 @@ class YamlFile < File

protected

def initialize_for_file_type
def initialize_for_file_type(file_max_bytes)
@visitor = YamlVisitor.create

@parser = Psych::Parser.new(Psych::TreeBuilder.new)
@parser.code_point_limit = file_max_bytes
end

def read_file_into_dictionary
# low level YAML read that tries to create as
# few intermediate objects as possible
# this overwrites the value at key
@visitor.accept_with_dictionary(
@dictionary, Psych.parse_stream(
IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
))
yaml_string = IO.read(@dictionary_path, :mode => 'r:bom|utf-8')
@parser.parse(yaml_string, @dictionary_path)
@visitor.accept_with_dictionary(@dictionary, @parser.handler.root)
end
end
end end end
11 changes: 10 additions & 1 deletion lib/logstash/filters/translate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,11 @@ class Translate < LogStash::Filters::Base
# as the original text, and the second column as the replacement.
config :dictionary_path, :validate => :path

# Setting the maximum bytes size of the file in `dictionary_path`. This setting is effective for YAML file only.
# Snakeyaml 1.33 has a default limit 3MB. YAML file over the limit throws exception. JSON and CSV currently do not have such limit.
# The limit could be too small in some use cases. Setting a bigger number in `dictionary_file_max_bytes` to relax the restriction.
config :dictionary_file_max_bytes, :validate => :number, :default => 3_145_728

# When using a dictionary file, this setting will indicate how frequently
# (in seconds) logstash will check the dictionary file for updates.
config :refresh_interval, :validate => :number, :default => 300
Expand Down Expand Up @@ -180,8 +185,12 @@ def register
)
end

if @dictionary_path && @dictionary_file_max_bytes <= 0
raise LogStash::ConfigurationError, "Please set a positive number in `dictionary_file_max_bytes => #{@dictionary_file_max_bytes}`."
end

if @dictionary_path
@lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex)
@lookup = Dictionary::File.create(@dictionary_path, @refresh_interval, @refresh_behaviour, @exact, @regex, params)
else
@lookup = Dictionary::Memory.new(@dictionary, @exact, @regex)
end
Expand Down
3 changes: 2 additions & 1 deletion logstash-filter-translate.gemspec
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Gem::Specification.new do |s|

s.name = 'logstash-filter-translate'
s.version = '3.4.0'
s.version = '3.4.1'
s.licenses = ['Apache License (2.0)']
s.summary = "Replaces field contents based on a hash or YAML file"
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
Expand All @@ -25,6 +25,7 @@ Gem::Specification.new do |s|
s.add_runtime_dependency 'logstash-mixin-validator_support', '~> 1.0'
s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~> 1.0'
s.add_runtime_dependency "logstash-mixin-scheduler", '~> 1.0'
s.add_runtime_dependency "psych", ">= 5.1.0"

s.add_development_dependency 'logstash-devutils'
s.add_development_dependency 'rspec-sequencing'
Expand Down
39 changes: 39 additions & 0 deletions spec/filters/translate_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,45 @@ def self.build_fixture_path(filename)
end
end

describe "when using a yml file with size limit" do
let(:config) do
{
"source" => "status",
"target" => "translation",
"dictionary_path" => dictionary_path,
"dictionary_file_max_bytes" => dictionary_size # the file is 18 bytes
}
end
let(:dictionary_path) { TranslateUtil.build_fixture_path("dict.yml") }
let(:event) { LogStash::Event.new("status" => "a") }

context "file is over size limit" do
let(:dictionary_size) { 17 }

it "raises exception" do
expect { subject.register }.to raise_error(/The incoming YAML document exceeds/)
end
end

context "file is within size limit" do
let(:dictionary_size) { 18 }

it "returns the exact translation" do
subject.register
subject.filter(event)
expect(event.get("translation")).to eq(1)
end
end

context "file size set to zero" do
let(:dictionary_size) { 0 }

it "raises configuration exception" do
expect { subject.register }.to raise_error(LogStash::ConfigurationError, /Please set a positive number/)
end
end
end

context "when using a map tagged yml file" do
let(:dictionary_path) { TranslateUtil.build_fixture_path("tag-map-dict.yml") }
let(:event) { LogStash::Event.new("status" => "six") }
Expand Down

0 comments on commit 7ef4b48

Please sign in to comment.