diff --git a/Gemfile b/Gemfile index 32cc6fb..edbbcec 100644 --- a/Gemfile +++ b/Gemfile @@ -9,3 +9,9 @@ if Dir.exist?(logstash_path) && use_logstash_source gem 'logstash-core', :path => "#{logstash_path}/logstash-core" gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" end + +group :test do + gem 'rspec-benchmark', :require => false if RUBY_VERSION >= '2.3' + gem 'logstash-input-generator', :require => false + gem 'logstash-output-null', :require => false +end diff --git a/spec/filters/grok_performance_spec.rb b/spec/filters/grok_performance_spec.rb new file mode 100644 index 0000000..3c17f8d --- /dev/null +++ b/spec/filters/grok_performance_spec.rb @@ -0,0 +1,150 @@ +# encoding: utf-8 +require_relative "../spec_helper" + +begin + require "rspec-benchmark" +rescue LoadError # due testing against LS 5.x +end +RSpec.configure do |config| + config.include RSpec::Benchmark::Matchers if defined? RSpec::Benchmark::Matchers +end + +require "logstash/filters/grok" + +describe LogStash::Filters::Grok do + + subject do + described_class.new(config).tap { |filter| filter.register } + end + + EVENT_COUNT = 300_000 + + describe "base-line performance", :performance => true do + + EXPECTED_MIN_RATE = 15_000 # per second + # NOTE: based on Travis CI (docker) numbers : + # logstash_1_d010d1d29244 | LogStash::Filters::Grok + # logstash_1_d010d1d29244 | base-line performance + # logstash_1_d010d1d29244 | filters/grok parse rate: 14464/sec, elapsed: 20.740866999999998s + # logstash_1_d010d1d29244 | filters/grok parse rate: 29957/sec, elapsed: 10.014199s + # logstash_1_d010d1d29244 | filters/grok parse rate: 32932/sec, elapsed: 9.109601999999999s + + let(:config) do + { 'match' => { "message" => "%{SYSLOGLINE}" }, 'overwrite' => [ "message" ] } + end + + it "matches at least #{EXPECTED_MIN_RATE} events/second" do + max_duration = EVENT_COUNT / EXPECTED_MIN_RATE + message = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" + expect do + duration = measure do + EVENT_COUNT.times { subject.filter(LogStash::Event.new("message" => message)) } + end + puts "filters/grok parse rate: #{"%02.0f/sec" % (EVENT_COUNT / duration)}, elapsed: #{duration}s" + end.to perform_under(max_duration).warmup(1).sample(2).times + end + + end + + describe "timeout", :performance => true do + + ACCEPTED_TIMEOUT_DEGRADATION = 100 # in % (compared to timeout-less run) + # TODO: with more real-world (pipeline) setup this usually gets bellow 10% on average + + MATCH_PATTERNS = { + "message" => [ + "foo0: %{NUMBER:bar}", "foo1: %{NUMBER:bar}", "foo2: %{NUMBER:bar}", "foo3: %{NUMBER:bar}", "foo4: %{NUMBER:bar}", + "foo5: %{NUMBER:bar}", "foo6: %{NUMBER:bar}", "foo7: %{NUMBER:bar}", "foo8: %{NUMBER:bar}", "foo9: %{NUMBER:bar}", + "%{SYSLOGLINE}" + ] + } + + SAMPLE_MESSAGE = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from aaaaaaaa.aaaaaa.net[111.111.11.1]".freeze + + TIMEOUT_MILLIS = 5_000 + + let(:config_wout_timeout) do + { + 'match' => MATCH_PATTERNS, + 'timeout_scope' => "event", + 'timeout_millis' => 0 # 0 - disabled timeout + } + end + + let(:config_with_timeout) do + { + 'match' => MATCH_PATTERNS, + 'timeout_scope' => "event", + 'timeout_millis' => TIMEOUT_MILLIS + } + end + + SAMPLE_COUNT = 2 + + it "has less than #{ACCEPTED_TIMEOUT_DEGRADATION}% overhead" do + filter_wout_timeout = LogStash::Filters::Grok.new(config_wout_timeout).tap(&:register) + wout_timeout_duration = do_sample_filter(filter_wout_timeout) # warmup + puts "filters/grok(timeout => 0) warmed up in #{wout_timeout_duration}" + before_sample! + no_timeout_durations = Array.new(SAMPLE_COUNT).map do + duration = do_sample_filter(filter_wout_timeout) + puts "filters/grok(timeout => 0) took #{duration}" + duration + end + + expected_duration = avg(no_timeout_durations) + expected_duration += (expected_duration / 100) * ACCEPTED_TIMEOUT_DEGRADATION + puts "expected_duration #{expected_duration}" + + filter_with_timeout = LogStash::Filters::Grok.new(config_with_timeout).tap(&:register) + with_timeout_duration = do_sample_filter(filter_with_timeout) # warmup + puts "filters/grok(timeout_scope => event) warmed up in #{with_timeout_duration}" + + before_sample! + expect do + duration = do_sample_filter(filter_with_timeout) + puts "filters/grok(timeout_scope => event) took #{duration}" + duration + end.to perform_under(expected_duration).sample(SAMPLE_COUNT).times + end + + @private + + def do_sample_filter(filter) + sample_event = { "message" => SAMPLE_MESSAGE } + measure do + for _ in (1..EVENT_COUNT) do # EVENT_COUNT.times without the block cost + filter.filter(LogStash::Event.new(sample_event)) + end + end + end + + def sample_event(hash) + LogStash::Event.new("message" => SAMPLE_MESSAGE) + end + + end + + @private + + def measure + start = Time.now + yield + Time.now - start + end + + def avg(ary) + ary.inject(0) { |m, i| m + i } / ary.size.to_f + end + + def before_sample! + 2.times { JRuby.gc } + sleep TIMEOUT_MILLIS / 1000 + end + + def sleep(seconds) + puts "sleeping for #{seconds} seconds (redundant - potential timeout propagation)" + Kernel.sleep(seconds) + end + +end \ No newline at end of file diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index 5d7164d..6fc7f43 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -1,22 +1,5 @@ # encoding: utf-8 -require "logstash/devutils/rspec/spec_helper" -require "stud/temporary" - -module LogStash::Environment - # running the grok code outside a logstash package means - # LOGSTASH_HOME will not be defined, so let's set it here - # before requiring the grok filter - unless self.const_defined?(:LOGSTASH_HOME) - LOGSTASH_HOME = File.expand_path("../../../", __FILE__) - end - - # also :pattern_path method must exist so we define it too - unless self.method_defined?(:pattern_path) - def pattern_path(path) - ::File.join(LOGSTASH_HOME, "patterns", path) - end - end -end +require_relative "../spec_helper" require "logstash/filters/grok" @@ -517,38 +500,6 @@ def pattern_path(path) end end - describe "performance test", :performance => true do - event_count = 100000 - min_rate = 2000 - - max_duration = event_count / min_rate - input = "Nov 24 01:29:01 -0800" - config <<-CONFIG - input { - generator { - count => #{event_count} - message => "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" - } - } - filter { - grok { - match => { "message" => "%{SYSLOGLINE}" } - overwrite => [ "message" ] - } - } - output { null { } } - CONFIG - - 2.times do - start = Time.now - agent do - duration = (Time.now - start) - puts "filters/grok parse rate: #{"%02.0f/sec" % (event_count / duration)}, elapsed: #{duration}s" - insist { duration } < max_duration - end - end - end - describe "single value match with duplicate-named fields in pattern" do config <<-CONFIG filter { diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..dc5abcf --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,19 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "stud/temporary" + +module LogStash::Environment + # running the grok code outside a logstash package means + # LOGSTASH_HOME will not be defined, so let's set it here + # before requiring the grok filter + unless self.const_defined?(:LOGSTASH_HOME) + LOGSTASH_HOME = File.expand_path("../../../", __FILE__) + end + + # also :pattern_path method must exist so we define it too + unless self.method_defined?(:pattern_path) + def pattern_path(path) + ::File.join(LOGSTASH_HOME, "patterns", path) + end + end +end