Skip to content

Commit

Permalink
Test: resurrect + add performance (regression) test
Browse files Browse the repository at this point in the history
a follow-up on GH-153
  • Loading branch information
kares committed Dec 2, 2019
1 parent a317df8 commit 9c9c66f
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 50 deletions.
6 changes: 6 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ if Dir.exist?(logstash_path) && use_logstash_source
gem 'logstash-core', :path => "#{logstash_path}/logstash-core"
gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api"
end

group :test do
gem 'rspec-benchmark', :require => false if RUBY_VERSION >= '2.3'
gem 'logstash-input-generator', :require => false
gem 'logstash-output-null', :require => false
end
150 changes: 150 additions & 0 deletions spec/filters/grok_performance_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# encoding: utf-8
require_relative "../spec_helper"

begin
require "rspec-benchmark"
rescue LoadError # due testing against LS 5.x
end
RSpec.configure do |config|
config.include RSpec::Benchmark::Matchers if defined? RSpec::Benchmark::Matchers
end

require "logstash/filters/grok"

describe LogStash::Filters::Grok do

subject do
described_class.new(config).tap { |filter| filter.register }
end

EVENT_COUNT = 300_000

describe "base-line performance", :performance => true do

EXPECTED_MIN_RATE = 15_000 # per second
# NOTE: based on Travis CI (docker) numbers :
# logstash_1_d010d1d29244 | LogStash::Filters::Grok
# logstash_1_d010d1d29244 | base-line performance
# logstash_1_d010d1d29244 | filters/grok parse rate: 14464/sec, elapsed: 20.740866999999998s
# logstash_1_d010d1d29244 | filters/grok parse rate: 29957/sec, elapsed: 10.014199s
# logstash_1_d010d1d29244 | filters/grok parse rate: 32932/sec, elapsed: 9.109601999999999s

let(:config) do
{ 'match' => { "message" => "%{SYSLOGLINE}" }, 'overwrite' => [ "message" ] }
end

it "matches at least #{EXPECTED_MIN_RATE} events/second" do
max_duration = EVENT_COUNT / EXPECTED_MIN_RATE
message = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]"
expect do
duration = measure do
EVENT_COUNT.times { subject.filter(LogStash::Event.new("message" => message)) }
end
puts "filters/grok parse rate: #{"%02.0f/sec" % (EVENT_COUNT / duration)}, elapsed: #{duration}s"
end.to perform_under(max_duration).warmup(1).sample(2).times
end

end

describe "timeout", :performance => true do

ACCEPTED_TIMEOUT_DEGRADATION = 100 # in % (compared to timeout-less run)
# TODO: with more real-world (pipeline) setup this usually gets bellow 10% on average

MATCH_PATTERNS = {
"message" => [
"foo0: %{NUMBER:bar}", "foo1: %{NUMBER:bar}", "foo2: %{NUMBER:bar}", "foo3: %{NUMBER:bar}", "foo4: %{NUMBER:bar}",
"foo5: %{NUMBER:bar}", "foo6: %{NUMBER:bar}", "foo7: %{NUMBER:bar}", "foo8: %{NUMBER:bar}", "foo9: %{NUMBER:bar}",
"%{SYSLOGLINE}"
]
}

SAMPLE_MESSAGE = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from aaaaaaaa.aaaaaa.net[111.111.11.1]".freeze

TIMEOUT_MILLIS = 5_000

let(:config_wout_timeout) do
{
'match' => MATCH_PATTERNS,
'timeout_scope' => "event",
'timeout_millis' => 0 # 0 - disabled timeout
}
end

let(:config_with_timeout) do
{
'match' => MATCH_PATTERNS,
'timeout_scope' => "event",
'timeout_millis' => TIMEOUT_MILLIS
}
end

SAMPLE_COUNT = 2

it "has less than #{ACCEPTED_TIMEOUT_DEGRADATION}% overhead" do
filter_wout_timeout = LogStash::Filters::Grok.new(config_wout_timeout).tap(&:register)
wout_timeout_duration = do_sample_filter(filter_wout_timeout) # warmup
puts "filters/grok(timeout => 0) warmed up in #{wout_timeout_duration}"
before_sample!
no_timeout_durations = Array.new(SAMPLE_COUNT).map do
duration = do_sample_filter(filter_wout_timeout)
puts "filters/grok(timeout => 0) took #{duration}"
duration
end

expected_duration = avg(no_timeout_durations)
expected_duration += (expected_duration / 100) * ACCEPTED_TIMEOUT_DEGRADATION
puts "expected_duration #{expected_duration}"

filter_with_timeout = LogStash::Filters::Grok.new(config_with_timeout).tap(&:register)
with_timeout_duration = do_sample_filter(filter_with_timeout) # warmup
puts "filters/grok(timeout_scope => event) warmed up in #{with_timeout_duration}"

before_sample!
expect do
duration = do_sample_filter(filter_with_timeout)
puts "filters/grok(timeout_scope => event) took #{duration}"
duration
end.to perform_under(expected_duration).sample(SAMPLE_COUNT).times
end

@private

def do_sample_filter(filter)
sample_event = { "message" => SAMPLE_MESSAGE }
measure do
for _ in (1..EVENT_COUNT) do # EVENT_COUNT.times without the block cost
filter.filter(LogStash::Event.new(sample_event))
end
end
end

def sample_event(hash)
LogStash::Event.new("message" => SAMPLE_MESSAGE)
end

end

@private

def measure
start = Time.now
yield
Time.now - start
end

def avg(ary)
ary.inject(0) { |m, i| m + i } / ary.size.to_f
end

def before_sample!
2.times { JRuby.gc }
sleep TIMEOUT_MILLIS / 1000
end

def sleep(seconds)
puts "sleeping for #{seconds} seconds (redundant - potential timeout propagation)"
Kernel.sleep(seconds)
end

end
51 changes: 1 addition & 50 deletions spec/filters/grok_spec.rb
Original file line number Diff line number Diff line change
@@ -1,22 +1,5 @@
# encoding: utf-8
require "logstash/devutils/rspec/spec_helper"
require "stud/temporary"

module LogStash::Environment
# running the grok code outside a logstash package means
# LOGSTASH_HOME will not be defined, so let's set it here
# before requiring the grok filter
unless self.const_defined?(:LOGSTASH_HOME)
LOGSTASH_HOME = File.expand_path("../../../", __FILE__)
end

# also :pattern_path method must exist so we define it too
unless self.method_defined?(:pattern_path)
def pattern_path(path)
::File.join(LOGSTASH_HOME, "patterns", path)
end
end
end
require_relative "../spec_helper"

require "logstash/filters/grok"

Expand Down Expand Up @@ -517,38 +500,6 @@ def pattern_path(path)
end
end

describe "performance test", :performance => true do
event_count = 100000
min_rate = 2000

max_duration = event_count / min_rate
input = "Nov 24 01:29:01 -0800"
config <<-CONFIG
input {
generator {
count => #{event_count}
message => "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]"
}
}
filter {
grok {
match => { "message" => "%{SYSLOGLINE}" }
overwrite => [ "message" ]
}
}
output { null { } }
CONFIG

2.times do
start = Time.now
agent do
duration = (Time.now - start)
puts "filters/grok parse rate: #{"%02.0f/sec" % (event_count / duration)}, elapsed: #{duration}s"
insist { duration } < max_duration
end
end
end

describe "single value match with duplicate-named fields in pattern" do
config <<-CONFIG
filter {
Expand Down
19 changes: 19 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# encoding: utf-8
require "logstash/devutils/rspec/spec_helper"
require "stud/temporary"

module LogStash::Environment
# running the grok code outside a logstash package means
# LOGSTASH_HOME will not be defined, so let's set it here
# before requiring the grok filter
unless self.const_defined?(:LOGSTASH_HOME)
LOGSTASH_HOME = File.expand_path("../../../", __FILE__)
end

# also :pattern_path method must exist so we define it too
unless self.method_defined?(:pattern_path)
def pattern_path(path)
::File.join(LOGSTASH_HOME, "patterns", path)
end
end
end

0 comments on commit 9c9c66f

Please sign in to comment.