From 2c1ee061a55e6aaac4ca7f6384649e85f2f92357 Mon Sep 17 00:00:00 2001 From: Karol Bucek Date: Thu, 18 Feb 2021 07:38:22 +0100 Subject: [PATCH] Feat: ECS compatibility support (#162) - support (ecs) pattern selector for core patterns - also made LS::Environment's `pattern_path` optional resolves #157 Co-authored-by: Karen Metts <35154725+karenzone@users.noreply.github.com> --- CHANGELOG.md | 9 +++++-- docs/index.asciidoc | 30 +++++++++++++++++++++ lib/logstash/filters/grok.rb | 51 ++++++++++++++++++++++-------------- logstash-filter-grok.gemspec | 6 ++--- spec/filters/grok_spec.rb | 10 +++++++ 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7dbed5..3689114 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,13 @@ +## 4.4.0 + - Feat: ECS compatibility support [#162](https://github.com/logstash-plugins/logstash-filter-grok/pull/162) + + The filter supports using (built-in) patterns definition that are fully Elactic Common Schema compliant. + ## 4.3.0 - - Added: added target support [#156](https://github.com/logstash-plugins/logstash-filter-grok/pull/156) + - Added: added target support [#156](https://github.com/logstash-plugins/logstash-filter-grok/pull/156) ## 4.2.0 - - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153) + - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153) ## 4.1.1 - Fix formatting for code sample [#148](https://github.com/logstash-plugins/logstash-filter-grok/pull/148) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 322403c..9d5a99f 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -175,6 +175,21 @@ Another option is to define patterns _inline_ in the filter using `pattern_defin This is mostly for convenience and allows user to define a pattern which can be used just in that filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. +[id="plugins-{type}s-{plugin}-ecs"] +==== Migrating to Elastic Common Schema (ECS) + +To ease migration to the {ecs-ref}[Elastic Common Schema (ECS)], the filter +plugin offers a new set of ECS-compliant patterns in addition to the existing +patterns. The new ECS pattern definitions capture event field names that are +compliant with the schema. + +The ECS pattern set has all of the pattern definitions from the legacy set, and is +a drop-in replacement. Use the <> +setting to switch modes. + +New features and enhancements will be added to the ECS-compliant files. The +legacy patterns may still receive bug fixes which are backwards compatible. + [id="plugins-{type}s-{plugin}-options"] ==== Grok Filter Configuration Options @@ -185,6 +200,7 @@ This plugin supports the following configuration options plus the <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -213,6 +229,20 @@ Break on first match. The first successful match by grok will result in the filter being finished. If you want grok to try all patterns (maybe you are parsing different things), then set this to false. +[id="plugins-{type}s-{plugin}-ecs_compatibility"] +===== `ecs_compatibility` + +* Value type is <> +* Supported values are: +** `disabled`: the plugin will load legacy (built-in) pattern definitions +** `v1`: all patterns provided by the plugin will use ECS compliant captures +* Default value depends on which version of Logstash is running: +** When Logstash provides a `pipeline.ecs_compatibility` setting, its value is used as the default +** Otherwise, the default value is `disabled`. + +Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)]. +The value of this setting affects extracted event field names when a composite pattern (such as `HTTPD_COMMONLOG`) is matched. + [id="plugins-{type}s-{plugin}-keep_empty_captures"] ===== `keep_empty_captures` diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 7a32ad8..ebf45dc 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -3,8 +3,8 @@ require "logstash/namespace" require "logstash/environment" require "logstash/patterns/core" + require 'logstash/plugin_mixins/ecs_compatibility_support' require "grok-pure" # rubygem 'jls-grok' - require "set" require "timeout" # Parse arbitrary text and structure it. @@ -144,6 +144,8 @@ # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. # class LogStash::Filters::Grok < LogStash::Filters::Base + include LogStash::PluginMixins::ECSCompatibilitySupport + config_name "grok" # A hash of matches of field => value @@ -250,22 +252,14 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # will be parsed and `hello world` will overwrite the original message. config :overwrite, :validate => :array, :default => [] - # Register default pattern paths - @@patterns_path ||= Set.new - @@patterns_path += [ - LogStash::Patterns::Core.path, - LogStash::Environment.pattern_path("*") - ] - def register # a cache of capture name handler methods. @handlers = {} @patternfiles = [] - - # Have @@patterns_path show first. Last-in pattern definitions win; this - # will let folks redefine built-in patterns at runtime. - @patternfiles += patterns_files_from_paths(@@patterns_path.to_a, "*") + # Have (default) patterns_path show first. Last-in pattern definitions wins + # this will let folks redefine built-in patterns at runtime + @patternfiles += patterns_files_from_paths(patterns_path, "*") @patternfiles += patterns_files_from_paths(@patterns_dir, @patterns_files_glob) @patterns = Hash.new { |h,k| h[k] = [] } @@ -278,11 +272,11 @@ def register patterns = [patterns] if patterns.is_a?(String) @metric_match_fields.gauge(field, patterns.length) - @logger.trace("Grok compile", :field => field, :patterns => patterns) + @logger.trace? && @logger.trace("Grok compile", :field => field, :patterns => patterns) patterns.each do |pattern| - @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) + @logger.debug? && @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) grok = Grok.new - grok.logger = @logger unless @logger.nil? + grok.logger = @logger add_patterns_from_files(@patternfiles, grok) add_patterns_from_inline_definition(@pattern_definitions, grok) grok.compile(pattern, @named_captures_only) @@ -301,15 +295,14 @@ def register def filter(event) matched = false - @logger.debug? and @logger.debug("Running grok filter", :event => event) + @logger.debug? && @logger.debug("Running grok filter", :event => event.to_hash) @patterns.each do |field, groks| if match(groks, field, event) matched = true break if @break_on_match end - #break if done - end # @patterns.each + end if matched @match_counter.increment(1) @@ -319,7 +312,7 @@ def filter(event) @tag_on_failure.each {|tag| event.tag(tag)} end - @logger.debug? and @logger.debug("Event now: ", :event => event) + @logger.debug? && @logger.debug("Event now: ", :event => event.to_hash) rescue GrokTimeoutException => e @logger.warn(e.message) metric.increment(:timeouts) @@ -331,6 +324,24 @@ def close private + # The default pattern paths, depending on environment. + def patterns_path + patterns_path = [] + case ecs_compatibility + when :disabled + patterns_path << LogStash::Patterns::Core.path # :legacy + when :v1 + patterns_path << LogStash::Patterns::Core.path('ecs-v1') + else + fail(NotImplementedError, "ECS #{ecs_compatibility} is not supported by this plugin.") + end + # allow plugin to be instantiated outside the LS environment (in tests) + if defined? LogStash::Environment.pattern_path + patterns_path << LogStash::Environment.pattern_path("*") + end + patterns_path + end + def match(groks, field, event) input = event.get(field) if input.is_a?(Array) @@ -343,7 +354,7 @@ def match(groks, field, event) match_against_groks(groks, field, input, event) end rescue StandardError => e - @logger.warn("Grok regexp threw exception", :exception => e.message, :backtrace => e.backtrace, :class => e.class.name) + @logger.warn("Grok regexp threw exception", :message => e.message, :exception => e.class, :backtrace => e.backtrace) return false end diff --git a/logstash-filter-grok.gemspec b/logstash-filter-grok.gemspec index 708da2c..e78bf26 100644 --- a/logstash-filter-grok.gemspec +++ b/logstash-filter-grok.gemspec @@ -1,7 +1,6 @@ Gem::Specification.new do |s| - s.name = 'logstash-filter-grok' - s.version = '4.3.0' + s.version = '4.4.0' s.licenses = ['Apache License (2.0)'] s.summary = "Parses unstructured event data into fields" s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" @@ -22,10 +21,11 @@ Gem::Specification.new do |s| # Gem dependencies s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" s.add_runtime_dependency "logstash-core", ">= 5.6.0" + s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.0' s.add_runtime_dependency 'jls-grok', '~> 0.11.3' s.add_runtime_dependency 'stud', '~> 0.0.22' - s.add_runtime_dependency 'logstash-patterns-core' + s.add_runtime_dependency 'logstash-patterns-core', '>= 4.3.0', '< 5' s.add_development_dependency 'logstash-devutils' end diff --git a/spec/filters/grok_spec.rb b/spec/filters/grok_spec.rb index 58d70d9..df2eee5 100644 --- a/spec/filters/grok_spec.rb +++ b/spec/filters/grok_spec.rb @@ -38,6 +38,16 @@ def self.sample(message, &block) expect( event.get("pid") ).to eql "1713" end + context 'in ecs mode' do + let(:config) { super.merge('ecs_compatibility' => 'v1') } + + it "matches pattern" do + expect( event.get("host") ).to eql "hostname"=>"evita" + expect( event.get("process") ).to eql "name"=>"postfix/smtpd", "pid"=>1713 + expect( event.get("message") ).to eql "connect from camomile.cloud9.net[168.100.1.3]" + end + end + context 'with target' do let(:config) { { "match" => { "message" => "%{SYSLOGLINE}" }, "target" => "grok" } }