Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated fix to Convert dots in field names to nested objects #73

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## next
- Add nested_objects configuration to parse nested object and arrays

## 3.3.2
- Fix: avoid panic when handling very-large exponent-notation `_@timestamp` values [#71](https://github.com/logstash-plugins/logstash-input-gelf/pull/71)

Expand Down
8 changes: 8 additions & 0 deletions docs/index.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ as an "additional" field, beginning with an underscore.

e.g. `\_foo` becomes `foo`

[id="plugins-{type}s-{plugin}-nested_objects"]
===== `nested_objects`

* Value type is <<boolean,boolean>>
* Default value is `false`

Whether or not to process dots in fields or leave them in place.




Expand Down
101 changes: 95 additions & 6 deletions lib/logstash/inputs/gelf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class LogStash::Inputs::Gelf < LogStash::Inputs::Base
#
config :strip_leading_underscore, :validate => :boolean, :default => true

# Whether or not to process dots in fields or leave them in place.
config :nested_objects, :validate => :boolean, :default => false

RECONNECT_BACKOFF_SLEEP = 5
TIMESTAMP_GELF_FIELD = "timestamp".freeze
SOURCE_HOST_FIELD = "source_host".freeze
Expand Down Expand Up @@ -149,9 +152,7 @@ def tcp_listener(output_queue)
event = self.class.new_event(data_in, client.peeraddr[3])
next if event.nil?

remap_gelf(event) if @remap
strip_leading_underscore(event) if @strip_leading_underscore
decorate(event)
process_event(event)
output_queue << event
end

Expand Down Expand Up @@ -206,9 +207,7 @@ def udp_listener(output_queue)
event = self.class.new_event(data, client[3])
next if event.nil?

remap_gelf(event) if @remap
strip_leading_underscore(event) if @strip_leading_underscore
decorate(event)
process_event(event)

output_queue << event
end
Expand Down Expand Up @@ -236,6 +235,13 @@ def self.new_event(json_gelf, host)
event
end

def process_event(event)
remap_gelf(event) if @remap
strip_leading_underscore(event) if @strip_leading_underscore
handle_nested_objects(event) if @nested_objects
decorate(event)
end

# transform a given timestamp value into a proper LogStash::Timestamp, preserving microsecond precision
# and work around a JRuby issue with Time.at loosing fractional part with BigDecimal.
# @param timestamp [Numeric] a Numeric (integer, float or bigdecimal) timestampo representation
Expand Down Expand Up @@ -291,4 +297,87 @@ def coerce_timestamp_carefully(value)
value = BigDecimal(value) if value.kind_of?(String)
self.class.coerce_timestamp(value)
end

def handle_nested_objects(event)
base_target = event.to_hash
base_target.keys.each do |key|
next unless key.include? ?.
value = event.get(key)
previous_key = nil
first_key = nil
target = base_target

keys = key.split(".")
if key =~ /\.$/
keys.push("");
end

keys.each do |sub_key|
if previous_key.nil?
first_key = sub_key
else
#skip first sub_key
unless array_or_hash_has_element?(target, previous_key)
if key_is_number(sub_key)
set_in_array_or_hash(target, previous_key, Array.new)
else
set_in_array_or_hash(target, previous_key, Hash.new)
end
end
new_target = get_in_array_or_hash(target, previous_key)
if new_target.is_a?(Array) and !key_is_number(sub_key)
# key is not an integer, so we need to convert array to hash
new_target = Hash[new_target.map.with_index { |x, i| [i, x] }]
set_in_array_or_hash(target, previous_key, new_target)
end
target = new_target
end
previous_key = sub_key
end
set_in_array_or_hash(target, previous_key, value)
event.remove(key)
event.set(first_key, base_target[first_key])
end
rescue => e
@logger.warn("Failed to parse nested objects: #{e.message}")
event.tag("_nested_objects_failure")
end

def key_is_number(key)
key =~ /^\d+$/
end

def get_in_array_or_hash(container, key)
if container.is_a?(Array)
container[Integer(key)]
elsif container.is_a?(Hash)
container[key]
else
raise "not an array or hash"
end
end

def set_in_array_or_hash(container, key, value)
if container.is_a?(Array)
container[Integer(key)] = value
elsif container.is_a?(Hash)
container[key] = value
else
raise "not an array or hash"
end
end

def array_or_hash_has_element?(container, key)
if container.is_a?(Array)
if !key_is_number(key)
return false
else
!container[Integer(key)].nil?
end
elsif container.is_a?(Hash)
container.key?(key)
else
raise "not an array or hash"
end
end
end
62 changes: 62 additions & 0 deletions spec/inputs/gelf_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,66 @@ def client_bootstrap(gelfclient, queue)
end
end
end

describe "when handling complex messages" do
let(:host) { "127.0.0.1" }
let(:port) { 12211 }
let(:chunksize) { 1420 }
let(:gelfclient) { GELF::Notifier.new(host, port, chunksize) }

let(:config) { { "port" => port, "host" => host, "nested_objects" => true } }
let(:queue) { Queue.new }

subject(:gelf_input) { described_class.new(config) }

before(:each) do
subject.register
@runner = Thread.new { subject.run(queue) }

client_bootstrap(gelfclient, queue)
end

after(:each) do
subject.do_stop
@runner.kill
@runner.join
end

it "should accept hash fields using dot notation" do
gelfclient.notify!("short_message" => "test nested hash",
"_toto.titi" => "objectValue",
"_empty." => "pouet",
)
e = queue.pop
expect(e.get("toto")["titi"]).to eq("objectValue")
expect(e.get("empty")[""]).to eq("pouet")
end

it "should accept array fields using dot notation" do
gelfclient.notify!("short_message" => "test nested array",
"_foo.0" => "first",
"_foo.1" => "second",
"_ca.0.titi" => "1",
"_ca.1.titi" => "2",
)
e = queue.pop
expect(e.get("foo")).to eq(["first", "second"])
expect(e.get("ca")[0]["titi"]).to eq("1")
expect(e.get("ca")[1]["titi"]).to eq("2")
end

it "should accept hash looking like an array" do
gelfclient.notify!("short_message" => "test nested array",
"_not_an_array.0" => "bob",
"_not_an_array.1" => "alice",
"_not_an_array.length" => "carol",
)
e = queue.pop
value = e.get("not_an_array")
expect(value).to be_a(Hash)
expect(value["0"]).to eq("bob")
expect(value["1"]).to eq("alice")
expect(value["length"]).to eq("carol")
end
end
end