From cb766737e47dcb276dff31de4963af64b8e10b95 Mon Sep 17 00:00:00 2001 From: kares Date: Thu, 14 Nov 2019 14:51:28 +0100 Subject: [PATCH 1/8] Test: avoid Fixnum deprecation in spec --- spec/filters/grok_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index f989982..a2615b7 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -199,7 +199,7 @@ def pattern_path(path) sample "400 454.33" do insist { subject.get("foo") } == 400 - insist { subject.get("foo") }.is_a?(Fixnum) + insist { subject.get("foo") }.is_a?(Integer) insist { subject.get("bar") } == 454.33 insist { subject.get("bar") }.is_a?(Float) end From c6d491811e26a8c189332ce6d5af7cae6e7e833b Mon Sep 17 00:00:00 2001 From: kares Date: Thu, 14 Nov 2019 15:15:24 +0100 Subject: [PATCH 2/8] Added: support for grouped timeout per group of patterns --- lib/logstash/filters/grok.rb | 47 ++++++++------- lib/logstash/filters/grok/timeout_support.rb | 62 ++++++++++++++++++++ spec/filters/grok_spec.rb | 47 ++++++++++++++- 3 files changed, 134 insertions(+), 22 deletions(-) create mode 100644 lib/logstash/filters/grok/timeout_support.rb diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index ad6dba2..db23b11 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -140,10 +140,14 @@ # `SYSLOGBASE` pattern which itself is defined by other patterns. # # Another option is to define patterns _inline_ in the filter using `pattern_definitions`. - # This is mostly for convenience and allows user to define a pattern which can be used just in that + # This is mostly for convenience and allows user to define a pattern which can be used just in that # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. # class LogStash::Filters::Grok < LogStash::Filters::Base + require 'logstash/filters/grok/timeout_support' + + include TimeoutSupport + config_name "grok" # A hash of matches of field => value @@ -168,7 +172,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # necessarily need to define this yourself unless you are adding additional # patterns. You can point to multiple pattern directories using this setting. # Note that Grok will read all files in the directory matching the patterns_files_glob - # and assume it's a pattern file (including any tilde backup files). + # and assume it's a pattern file (including any tilde backup files). # [source,ruby] # patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"] # @@ -215,6 +219,12 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # Set to 0 to disable timeouts config :timeout_millis, :validate => :number, :default => 30000 + # Group timeout for multiple patterns instead of having a timeout per pattern. + # When set to true timeout_millis effectively becomes a timeout over several patterns. + # Default is false due backwards compatibility. + # Has only an effect when timeout_millis > 0 + config :timeout_grouped, :validate => :boolean, :default => false + # Tag to apply if a grok regexp times out. config :tag_on_timeout, :validate => :string, :default => '_groktimeout' @@ -278,10 +288,7 @@ def register @match_counter = metric.counter(:matches) @failure_counter = metric.counter(:failures) - # divide by float to allow fractionnal seconds, the Timeout class timeout value is in seconds but the underlying - # executor resolution is in microseconds so fractionnal second parameter down to microseconds is possible. - # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125 - @timeout_seconds = @timeout_millis / 1000.0 + @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout.new end # def register def filter(event) @@ -334,26 +341,24 @@ def match(groks, field, event) end def match_against_groks(groks, field, input, event) + # Convert anything else to string (number, hash, etc) input = input.to_s matched = false - groks.each do |grok| - # Convert anything else to string (number, hash, etc) - matched = grok_till_timeout(grok, field, input) - if matched - grok.capture(matched) {|field, value| handle(field, value, event)} - break if @break_on_match + + context = GrokContext.new + with_timeout_if(@timeout_grouped, context) do + groks.each do |grok| + context.update(grok, field, input) + + matched = with_timeout_if(!@timeout_grouped, context) { grok.execute(input) } + if matched + grok.capture(matched) { |field, value| handle(field, value, event) } + break if @break_on_match + end end end - - matched - end - def grok_till_timeout(grok, field, value) - begin - @timeout_seconds > 0.0 ? Timeout.timeout(@timeout_seconds, TimeoutError) { grok.execute(value) } : grok.execute(value) - rescue TimeoutError - raise GrokTimeoutException.new(grok, field, value) - end + matched end def handle(field, value, event) diff --git a/lib/logstash/filters/grok/timeout_support.rb b/lib/logstash/filters/grok/timeout_support.rb new file mode 100644 index 0000000..fb33833 --- /dev/null +++ b/lib/logstash/filters/grok/timeout_support.rb @@ -0,0 +1,62 @@ +# encoding: utf-8 + +require 'timeout' + +# NOTE: use nested module structure so that `TimeoutError` is picked from parent (`Grok`). +module LogStash module Filters class Grok + # Helper module to manage the timeout helper pieces. + module TimeoutSupport + + def with_timeout_if(condition, context, &block) + if condition + with_timeout(context, &block) + else + yield + end + end + + def with_timeout(context, &block) + @timeout.exec(&block) + rescue TimeoutError => error + handle_timeout(context, error) + end + + def handle_timeout(context, error) + raise GrokTimeoutException.new(context.grok, context.field, context.input) + end + + GrokContext = Struct.new(:grok, :field, :input) do + + def update(grok, field, input) + self.grok = grok + self.field = field + self.input = input + end + + end + + class NoopTimeout + + def exec + yield + end + + end + + class RubyTimeout + + def initialize(timeout_millis) + # divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying + # executor resolution is in microseconds so fractional second parameter down to microseconds is possible. + # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125 + @timeout_seconds = timeout_millis / 1000.0 + end + + def exec(&block) + Timeout.timeout(@timeout_seconds, TimeoutError, &block) + end + + end + + end # TimeoutSupport +end; end; end diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index a2615b7..6a3c72d 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -412,7 +412,7 @@ def pattern_path(path) filter { grok { match => { - message => "(.*a){30}" + "message" => "(.*a){30}" } timeout_millis => 100 } @@ -425,6 +425,51 @@ def pattern_path(path) end end + describe "no timeout on failure with multiple patterns (when timeout not grouped)" do + config <<-CONFIG + filter { + grok { + match => { + "message" => [ + "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}", + "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", + "(.*a){20}" + ] + } + timeout_millis => 500 + timeout_grouped => false + } + } + CONFIG + + sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do + insist { subject.get("tags") }.nil? + end + end + + describe "timeout on grouped (multi-pattern) failure" do + config <<-CONFIG + filter { + grok { + match => { + "message" => [ + "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}", + "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", + "(.*a){20}" + ] + } + timeout_millis => 500 + timeout_grouped => true + } + } + CONFIG + + sample( 'b' * 10 + 'c' * 10 + 'd' * 10 + 'e' * 10 + ' ' + 'a' * 20 ) do + expect(subject.get("tags")).to include("_groktimeout") + expect(subject.get("tags")).not_to include("_grokparsefailure") + end + end + describe "tagging on failure" do config <<-CONFIG filter { From ed14a5f26350e5592e0e8b9f8fddbc65b99d469f Mon Sep 17 00:00:00 2001 From: kares Date: Fri, 15 Nov 2019 15:47:59 +0100 Subject: [PATCH 3/8] Refactor: extract timeout switch in method to classes this is motivated by reduced performance for the default case since JRuby does not yet inline blocks the additional 'dummy' block passing (due timeout) had a small impact on performance. --- lib/logstash/filters/grok.rb | 53 ++++++++++++++++---- lib/logstash/filters/grok/timeout_support.rb | 15 ++++-- 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index db23b11..2664f76 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -289,6 +289,7 @@ def register @failure_counter = metric.counter(:failures) @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout.new + @matcher = @timeout_grouped ? GlobalMatcher.new(self) : DefaultMatcher.new(self) end # def register def filter(event) @@ -342,23 +343,56 @@ def match(groks, field, event) def match_against_groks(groks, field, input, event) # Convert anything else to string (number, hash, etc) - input = input.to_s - matched = false + context = GrokContext.new(field, input.to_s) + @matcher.match(context, groks, event, @break_on_match) + end + + # Internal (base) helper to handle the global timeout switch. + # @private + class Matcher + + def initialize(filter) + @filter = filter + end + + def match(context, groks, event, break_on_match) + matched = false - context = GrokContext.new - with_timeout_if(@timeout_grouped, context) do groks.each do |grok| - context.update(grok, field, input) + context.set_grok(grok) - matched = with_timeout_if(!@timeout_grouped, context) { grok.execute(input) } + matched = execute(context, grok) if matched - grok.capture(matched) { |field, value| handle(field, value, event) } - break if @break_on_match + grok.capture(matched) { |field, value| @filter.handle(field, value, event) } + break if break_on_match end end + + matched + end + + protected + + def execute(context, grok) + grok.execute(context.input) + end + + end + + # @private + class GlobalMatcher < Matcher + # @override + def match(context, groks, event, break_on_match) + @filter.with_timeout(context) { super } end + end - matched + # @private + class DefaultMatcher < Matcher + # @override + def execute(context, grok) + @filter.with_timeout(context) { super } + end end def handle(field, value, event) @@ -383,6 +417,7 @@ def handle(field, value, event) end end end + public :handle def patterns_files_from_paths(paths, glob) patternfiles = [] diff --git a/lib/logstash/filters/grok/timeout_support.rb b/lib/logstash/filters/grok/timeout_support.rb index fb33833..6e6013b 100644 --- a/lib/logstash/filters/grok/timeout_support.rb +++ b/lib/logstash/filters/grok/timeout_support.rb @@ -25,12 +25,17 @@ def handle_timeout(context, error) raise GrokTimeoutException.new(context.grok, context.field, context.input) end - GrokContext = Struct.new(:grok, :field, :input) do + class GrokContext - def update(grok, field, input) - self.grok = grok - self.field = field - self.input = input + attr_reader :grok, :field, :input + + def initialize(field = nil, input = nil) + @field = field + @input = input + end + + def set_grok(grok) + @grok = grok end end From 36f2eee8840caa2ff5bef13957c7280334af669e Mon Sep 17 00:00:00 2001 From: kares Date: Fri, 15 Nov 2019 16:50:22 +0100 Subject: [PATCH 4/8] Refactor: use timeout_scope as the new config name --- lib/logstash/filters/grok.rb | 13 +++++++------ spec/filters/grok_spec.rb | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 2664f76..c9db9c5 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -219,11 +219,12 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # Set to 0 to disable timeouts config :timeout_millis, :validate => :number, :default => 30000 - # Group timeout for multiple patterns instead of having a timeout per pattern. - # When set to true timeout_millis effectively becomes a timeout over several patterns. + # Timeouts are per pattern matched by default but for multiple matches over a + # particular field it's usually better to scope the timeout for the whole event. + # The `timeout_millis` than effectively becomes a timeout over several matches. # Default is false due backwards compatibility. # Has only an effect when timeout_millis > 0 - config :timeout_grouped, :validate => :boolean, :default => false + config :timeout_scope, :validate => %w(pattern event), :default => "pattern" # Tag to apply if a grok regexp times out. config :tag_on_timeout, :validate => :string, :default => '_groktimeout' @@ -289,7 +290,7 @@ def register @failure_counter = metric.counter(:failures) @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout.new - @matcher = @timeout_grouped ? GlobalMatcher.new(self) : DefaultMatcher.new(self) + @matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self) end # def register def filter(event) @@ -380,7 +381,7 @@ def execute(context, grok) end # @private - class GlobalMatcher < Matcher + class EventTimeoutMatcher < Matcher # @override def match(context, groks, event, break_on_match) @filter.with_timeout(context) { super } @@ -388,7 +389,7 @@ def match(context, groks, event, break_on_match) end # @private - class DefaultMatcher < Matcher + class PatternTimeoutMatcher < Matcher # @override def execute(context, grok) @filter.with_timeout(context) { super } diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index 6a3c72d..5d7164d 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -437,7 +437,7 @@ def pattern_path(path) ] } timeout_millis => 500 - timeout_grouped => false + timeout_scope => 'pattern' } } CONFIG @@ -459,7 +459,7 @@ def pattern_path(path) ] } timeout_millis => 500 - timeout_grouped => true + timeout_scope => 'event' } } CONFIG From c93ed12670dff5cede279378c8c6b6e43cb7486c Mon Sep 17 00:00:00 2001 From: kares Date: Fri, 15 Nov 2019 16:53:45 +0100 Subject: [PATCH 5/8] Chore: update changelog + version for release --- CHANGELOG.md | 3 +++ logstash-filter-grok.gemspec | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a04bc9..33cc273 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.1.2 + - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153) + ## 4.1.1 - Fix formatting for code sample [#148](https://github.com/logstash-plugins/logstash-filter-grok/pull/148) diff --git a/logstash-filter-grok.gemspec b/logstash-filter-grok.gemspec index 9c3c37e..72b4227 100644 --- a/logstash-filter-grok.gemspec +++ b/logstash-filter-grok.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-filter-grok' - s.version = '4.1.1' + s.version = '4.2.0' s.licenses = ['Apache License (2.0)'] s.summary = "Parses unstructured event data into fields" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" From a45470fa91fbcf4026d5e778de521c01b3821ce4 Mon Sep 17 00:00:00 2001 From: kares Date: Fri, 15 Nov 2019 17:04:42 +0100 Subject: [PATCH 6/8] Refactor: review and move timeout pieces back --- lib/logstash/filters/grok.rb | 54 ++++++++++++++-- lib/logstash/filters/grok/timeout_support.rb | 67 -------------------- 2 files changed, 49 insertions(+), 72 deletions(-) delete mode 100644 lib/logstash/filters/grok/timeout_support.rb diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index c9db9c5..b13c897 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -144,10 +144,6 @@ # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. # class LogStash::Filters::Grok < LogStash::Filters::Base - require 'logstash/filters/grok/timeout_support' - - include TimeoutSupport - config_name "grok" # A hash of matches of field => value @@ -289,7 +285,7 @@ def register @match_counter = metric.counter(:matches) @failure_counter = metric.counter(:failures) - @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout.new + @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout::INSTANCE @matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self) end # def register @@ -479,4 +475,52 @@ def trunc_value end end end + + def with_timeout(context, &block) + @timeout.exec(&block) + rescue TimeoutError => error + handle_timeout(context, error) + end + public :with_timeout + + def handle_timeout(context, error) + raise GrokTimeoutException.new(context.grok, context.field, context.input) + end + + # @private + class GrokContext + attr_reader :grok, :field, :input + + def initialize(field, input) + @field = field + @input = input + end + + def set_grok(grok) + @grok = grok + end + end + + # @private + class NoopTimeout + INSTANCE = new + + def exec + yield + end + end + + # @private + class RubyTimeout + def initialize(timeout_millis) + # divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying + # executor resolution is in microseconds so fractional second parameter down to microseconds is possible. + # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125 + @timeout_seconds = timeout_millis / 1000.0 + end + + def exec(&block) + Timeout.timeout(@timeout_seconds, TimeoutError, &block) + end + end end # class LogStash::Filters::Grok diff --git a/lib/logstash/filters/grok/timeout_support.rb b/lib/logstash/filters/grok/timeout_support.rb deleted file mode 100644 index 6e6013b..0000000 --- a/lib/logstash/filters/grok/timeout_support.rb +++ /dev/null @@ -1,67 +0,0 @@ -# encoding: utf-8 - -require 'timeout' - -# NOTE: use nested module structure so that `TimeoutError` is picked from parent (`Grok`). -module LogStash module Filters class Grok - # Helper module to manage the timeout helper pieces. - module TimeoutSupport - - def with_timeout_if(condition, context, &block) - if condition - with_timeout(context, &block) - else - yield - end - end - - def with_timeout(context, &block) - @timeout.exec(&block) - rescue TimeoutError => error - handle_timeout(context, error) - end - - def handle_timeout(context, error) - raise GrokTimeoutException.new(context.grok, context.field, context.input) - end - - class GrokContext - - attr_reader :grok, :field, :input - - def initialize(field = nil, input = nil) - @field = field - @input = input - end - - def set_grok(grok) - @grok = grok - end - - end - - class NoopTimeout - - def exec - yield - end - - end - - class RubyTimeout - - def initialize(timeout_millis) - # divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying - # executor resolution is in microseconds so fractional second parameter down to microseconds is possible. - # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125 - @timeout_seconds = timeout_millis / 1000.0 - end - - def exec(&block) - Timeout.timeout(@timeout_seconds, TimeoutError, &block) - end - - end - - end # TimeoutSupport -end; end; end From 0224744975830b2345d95a09c88a65cafb10dade Mon Sep 17 00:00:00 2001 From: kares Date: Fri, 15 Nov 2019 17:25:52 +0100 Subject: [PATCH 7/8] Docs: update plugin docs with timeout_scope config --- docs/index.asciidoc | 17 +++++++++++++++++ lib/logstash/filters/grok.rb | 13 ++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index abe7129..f60dbd6 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -195,6 +195,7 @@ This plugin supports the following configuration options plus the <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No |======================================================================= Also see <> for a list of options supported by all @@ -356,6 +357,22 @@ This will never timeout early, but may take a little longer to timeout. Actual timeout is approximate based on a 250ms quantization. Set to 0 to disable timeouts +[id="plugins-{type}s-{plugin}-timeout_scope"] +===== `timeout_scope` + + * Value type is <> + * Default value is `"pattern"` + * Supported values are `"pattern"` and `"event"` + +When multiple patterns are provided to <>, +the timeout has historically applied to _each_ pattern, incurring overhead +for each and every pattern that is attempted; when the grok filter is +configured with `timeout_scope => event`, the plugin instead enforces +a single timeout across all attempted matches on the event, so it can +achieve similar safeguard against runaway matchers with significantly +less overhead. + +It's usually better to scope the timeout for the whole event. [id="plugins-{type}s-{plugin}-common-options"] diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index b13c897..2771a94 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -215,11 +215,14 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # Set to 0 to disable timeouts config :timeout_millis, :validate => :number, :default => 30000 - # Timeouts are per pattern matched by default but for multiple matches over a - # particular field it's usually better to scope the timeout for the whole event. - # The `timeout_millis` than effectively becomes a timeout over several matches. - # Default is false due backwards compatibility. - # Has only an effect when timeout_millis > 0 + # When multiple patterns are provided to `match`, + # the timeout has historically applied to _each_ pattern, incurring overhead + # for each and every pattern that is attempted; when the grok filter is + # configured with `timeout_scope => 'event'`, the plugin instead enforces + # a single timeout across all attempted matches on the event, so it can + # achieve similar safeguard against runaway matchers with significantly + # less overhead. + # It's usually better to scope the timeout for the whole event. config :timeout_scope, :validate => %w(pattern event), :default => "pattern" # Tag to apply if a grok regexp times out. From 944652e41ae2eb98b77eebf6bf622fe7135f86b9 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Sat, 16 Nov 2019 07:25:14 -0500 Subject: [PATCH 8/8] Chore: forgot to update version in changelog Co-Authored-By: Ry Biesemeyer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 33cc273..32b0cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 4.1.2 +## 4.2.0 - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153) ## 4.1.1