diff --git a/CHANGELOG.md b/CHANGELOG.md index efaaeb3..33fedf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## next + - Add nested_objects configuration to parse nested object and arrays + ## 3.3.2 - Fix: avoid panic when handling very-large exponent-notation `_@timestamp` values [#71](https://github.com/logstash-plugins/logstash-input-gelf/pull/71) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 2c5036b..dbd6e6e 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -127,6 +127,14 @@ as an "additional" field, beginning with an underscore. e.g. `\_foo` becomes `foo` +[id="plugins-{type}s-{plugin}-nested_objects"] +===== `nested_objects` + + * Value type is <> + * Default value is `false` + +Whether or not to process dots in fields or leave them in place. + diff --git a/lib/logstash/inputs/gelf.rb b/lib/logstash/inputs/gelf.rb index d8b791b..1d78fe0 100644 --- a/lib/logstash/inputs/gelf.rb +++ b/lib/logstash/inputs/gelf.rb @@ -47,6 +47,9 @@ class LogStash::Inputs::Gelf < LogStash::Inputs::Base # config :strip_leading_underscore, :validate => :boolean, :default => true + # Whether or not to process dots in fields or leave them in place. + config :nested_objects, :validate => :boolean, :default => false + RECONNECT_BACKOFF_SLEEP = 5 TIMESTAMP_GELF_FIELD = "timestamp".freeze SOURCE_HOST_FIELD = "source_host".freeze @@ -235,6 +238,7 @@ def self.new_event(json_gelf, host) def process_event(event) remap_gelf(event) if @remap strip_leading_underscore(event) if @strip_leading_underscore + handle_nested_objects(event) if @nested_objects decorate(event) end @@ -293,4 +297,87 @@ def coerce_timestamp_carefully(value) value = BigDecimal(value) if value.kind_of?(String) self.class.coerce_timestamp(value) end + + def handle_nested_objects(event) + base_target = event.to_hash + base_target.keys.each do |key| + next unless key.include? ?. + value = event.get(key) + previous_key = nil + first_key = nil + target = base_target + + keys = key.split(".") + if key =~ /\.$/ + keys.push(""); + end + + keys.each do |sub_key| + if previous_key.nil? + first_key = sub_key + else + #skip first sub_key + unless array_or_hash_has_element?(target, previous_key) + if key_is_number(sub_key) + set_in_array_or_hash(target, previous_key, Array.new) + else + set_in_array_or_hash(target, previous_key, Hash.new) + end + end + new_target = get_in_array_or_hash(target, previous_key) + if new_target.is_a?(Array) and !key_is_number(sub_key) + # key is not an integer, so we need to convert array to hash + new_target = Hash[new_target.map.with_index { |x, i| [i, x] }] + set_in_array_or_hash(target, previous_key, new_target) + end + target = new_target + end + previous_key = sub_key + end + set_in_array_or_hash(target, previous_key, value) + event.remove(key) + event.set(first_key, base_target[first_key]) + end + rescue => e + @logger.warn("Failed to parse nested objects: #{e.message}") + event.tag("_nested_objects_failure") + end + + def key_is_number(key) + key =~ /^\d+$/ + end + + def get_in_array_or_hash(container, key) + if container.is_a?(Array) + container[Integer(key)] + elsif container.is_a?(Hash) + container[key] + else + raise "not an array or hash" + end + end + + def set_in_array_or_hash(container, key, value) + if container.is_a?(Array) + container[Integer(key)] = value + elsif container.is_a?(Hash) + container[key] = value + else + raise "not an array or hash" + end + end + + def array_or_hash_has_element?(container, key) + if container.is_a?(Array) + if !key_is_number(key) + return false + else + !container[Integer(key)].nil? + end + elsif container.is_a?(Hash) + container.key?(key) + else + raise "not an array or hash" + end + end end diff --git a/spec/inputs/gelf_spec.rb b/spec/inputs/gelf_spec.rb index b2e0e03..2518ef0 100644 --- a/spec/inputs/gelf_spec.rb +++ b/spec/inputs/gelf_spec.rb @@ -238,4 +238,66 @@ def client_bootstrap(gelfclient, queue) end end end + + describe "when handling complex messages" do + let(:host) { "127.0.0.1" } + let(:port) { 12211 } + let(:chunksize) { 1420 } + let(:gelfclient) { GELF::Notifier.new(host, port, chunksize) } + + let(:config) { { "port" => port, "host" => host, "nested_objects" => true } } + let(:queue) { Queue.new } + + subject(:gelf_input) { described_class.new(config) } + + before(:each) do + subject.register + @runner = Thread.new { subject.run(queue) } + + client_bootstrap(gelfclient, queue) + end + + after(:each) do + subject.do_stop + @runner.kill + @runner.join + end + + it "should accept hash fields using dot notation" do + gelfclient.notify!("short_message" => "test nested hash", + "_toto.titi" => "objectValue", + "_empty." => "pouet", + ) + e = queue.pop + expect(e.get("toto")["titi"]).to eq("objectValue") + expect(e.get("empty")[""]).to eq("pouet") + end + + it "should accept array fields using dot notation" do + gelfclient.notify!("short_message" => "test nested array", + "_foo.0" => "first", + "_foo.1" => "second", + "_ca.0.titi" => "1", + "_ca.1.titi" => "2", + ) + e = queue.pop + expect(e.get("foo")).to eq(["first", "second"]) + expect(e.get("ca")[0]["titi"]).to eq("1") + expect(e.get("ca")[1]["titi"]).to eq("2") + end + + it "should accept hash looking like an array" do + gelfclient.notify!("short_message" => "test nested array", + "_not_an_array.0" => "bob", + "_not_an_array.1" => "alice", + "_not_an_array.length" => "carol", + ) + e = queue.pop + value = e.get("not_an_array") + expect(value).to be_a(Hash) + expect(value["0"]).to eq("bob") + expect(value["1"]).to eq("alice") + expect(value["length"]).to eq("carol") + end + end end