From 82c5574c4857d33be6a1f419a29594064ad48bc9 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Sat, 2 Dec 2023 16:01:02 +0000
Subject: [PATCH 01/16] bugfix - get_hits_terms: split compound qk on
 commaspace

otherwise, a space gets prefixed to the value
---
 elastalert/elastalert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py
index d7a9db71..2b890973 100755
--- a/elastalert/elastalert.py
+++ b/elastalert/elastalert.py
@@ -466,7 +466,7 @@ def get_hits_count(self, rule, starttime, endtime, index):
     def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None, size=None):
         rule_filter = copy.copy(rule['filter'])
         if qk:
-            qk_list = qk.split(",")
+            qk_list = qk.split(", ")
             end = '.keyword'
 
             if len(qk_list) == 1:

From 27666d5689ff507f67ab22df15b184b15d351a0b Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Sat, 2 Dec 2023 16:14:12 +0000
Subject: [PATCH 02/16] bugfix - get_hits_terms `key` scope

The `raw_count_keys` handler now tests the intended value while iterating
over `compound_query_key` terms.  Previously, it was checking the
method param `key` and erroneously skipping the add_keyword_postfix()
conditional
---
 elastalert/elastalert.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py
index 2b890973..7d1e8be7 100755
--- a/elastalert/elastalert.py
+++ b/elastalert/elastalert.py
@@ -477,11 +477,10 @@ def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None, size=Non
                 rule_filter.extend([{'term': {filter_key: qk}}])
             else:
                 filter_keys = rule['compound_query_key']
-                for i in range(len(filter_keys)):
-                    key_with_postfix = filter_keys[i]
-                    if rule.get('raw_count_keys', True) and not key.endswith(end):
-                        key_with_postfix = add_keyword_postfix(key_with_postfix)
-                    rule_filter.extend([{'term': {key_with_postfix: qk_list[i]}}])
+                for i,filter_key in enumerate(filter_keys):
+                    if rule.get('raw_count_keys', True) and not filter_key.endswith(end):
+                        filter_key = add_keyword_postfix(filter_key)
+                    rule_filter.extend([{'term': {filter_key: qk_list[i]}}])
 
         base_query = self.get_query(
             rule_filter,

From 2520b18bfca4bff42485a8c69d5e18d004471ca9 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Sat, 2 Dec 2023 17:44:42 -0800
Subject: [PATCH 03/16] .keyword query_key with raw_count_keys=false

allow finding of qk values from matches when using raw_count_keys=false

hard-coding '.keyword' is probably wrong.  Not sure where to get the string_multi_field_name from in this context
---
 elastalert/util.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/elastalert/util.py b/elastalert/util.py
index e8a9dc34..66d4de40 100644
--- a/elastalert/util.py
+++ b/elastalert/util.py
@@ -66,6 +66,12 @@ def _find_es_dict_by_key(lookup_dict, term):
     """
     if term in lookup_dict:
         return lookup_dict, term
+
+    if term.endswith('.keyword'):
+        term, _, _ = term.partition('.keyword')
+        if term in lookup_dict:
+            return lookup_dict, term
+
     # If the term does not match immediately, perform iterative lookup:
     # 1. Split the search term into tokens
     # 2. Recurrently concatenate these together to traverse deeper into the dictionary,

From c0ac74280d16608d0eab5483c61024e986014b78 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Tue, 5 Dec 2023 01:18:15 +0000
Subject: [PATCH 04/16] setup.py - require python >= 3.9

To use string.removesuffix()

and f-strings (>=3.6)

and type-hints for builtin collections

and because it looks like the project is standardizing on 3.12 anyway
---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 825d06cf..8154534f 100644
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,7 @@
                             'elastalert=elastalert.elastalert:main']},
     packages=find_packages(exclude=["tests"]),
     package_data={'elastalert': ['schema.yaml', 'es_mappings/**/*.json']},
+    python_requires='>=3.9',
     install_requires=[
         'apscheduler>=3.10.4,<4.0',
         'aws-requests-auth>=0.4.3',

From c7c28dca254c0e111a04e7fce0db656ea4f0da7d Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Tue, 5 Dec 2023 01:20:40 +0000
Subject: [PATCH 05/16] _find_es_dict_by_key: .keyword will never be in _source

So, always remove it
---
 elastalert/util.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/elastalert/util.py b/elastalert/util.py
index 66d4de40..a504d820 100644
--- a/elastalert/util.py
+++ b/elastalert/util.py
@@ -44,7 +44,7 @@ def new_get_event_ts(ts_field):
     return lambda event: lookup_es_key(event[0], ts_field)
 
 
-def _find_es_dict_by_key(lookup_dict, term):
+def _find_es_dict_by_key(lookup_dict: dict, term: str, string_multi_field_name: str = ".keyword") -> tuple[dict, str]:
     """ Performs iterative dictionary search based upon the following conditions:
 
     1. Subkeys may either appear behind a full stop (.) or at one lookup_dict level lower in the tree.
@@ -64,14 +64,16 @@ def _find_es_dict_by_key(lookup_dict, term):
     element which is the last subkey used to access the target specified by the term. None is
     returned for both if the key can not be found.
     """
+
+    # matching documents will not have .keyword fieldnames, even if a .keyword
+    # fieldname was used as a term in the search
+    # e.g. {"term": {"description.keyword": "Target Description Here"}}
+    # will return a document with {"_source": {"description": "Target Description Here"}}
+    term = term.removesuffix(string_multi_field_name)
+
     if term in lookup_dict:
         return lookup_dict, term
 
-    if term.endswith('.keyword'):
-        term, _, _ = term.partition('.keyword')
-        if term in lookup_dict:
-            return lookup_dict, term
-
     # If the term does not match immediately, perform iterative lookup:
     # 1. Split the search term into tokens
     # 2. Recurrently concatenate these together to traverse deeper into the dictionary,

From df03531f9adabb168ca425836bc05a679be23929 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Tue, 5 Dec 2023 01:21:46 +0000
Subject: [PATCH 06/16] tests/util_test.py - include .keyword subtests for
 lookups

---
 tests/util_test.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/util_test.py b/tests/util_test.py
index dbb0e8f1..32dcbc26 100644
--- a/tests/util_test.py
+++ b/tests/util_test.py
@@ -117,6 +117,7 @@ def test_looking_up_nested_keys(ea):
     }
 
     assert lookup_es_key(record, 'Fields.ts') == expected
+    assert lookup_es_key(record, 'Fields.ts.keyword') == expected
 
 
 def test_looking_up_nested_composite_keys(ea):
@@ -131,6 +132,7 @@ def test_looking_up_nested_composite_keys(ea):
     }
 
     assert lookup_es_key(record, 'Fields.ts.value') == expected
+    assert lookup_es_key(record, 'Fields.ts.value.keyword') == expected
 
 
 def test_looking_up_arrays(ea):
@@ -148,10 +150,14 @@ def test_looking_up_arrays(ea):
     assert lookup_es_key(record, 'flags[0]') == 1
     assert lookup_es_key(record, 'flags[1]') == 2
     assert lookup_es_key(record, 'objects[0]foo') == 'bar'
+    assert lookup_es_key(record, 'objects[0]foo.keyword') == 'bar'
     assert lookup_es_key(record, 'objects[1]foo[0]bar') == 'baz'
     assert lookup_es_key(record, 'objects[2]foo.bar') == 'baz'
+    assert lookup_es_key(record, 'objects[2]foo.bar.keyword') == 'baz'
     assert lookup_es_key(record, 'objects[1]foo[1]bar') is None
+    assert lookup_es_key(record, 'objects[1]foo[1]bar.keyword') is None
     assert lookup_es_key(record, 'objects[1]foo[0]baz') is None
+    assert lookup_es_key(record, 'objects[1]foo[0]baz.keyword') is None
     assert lookup_es_key(record, 'nested.foo[0]') == 'bar'
     assert lookup_es_key(record, 'nested.foo[1]') == 'baz'
 

From 52fa1ff33f4dca8b980ba96803ca5750bb3738c6 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 4 Dec 2023 18:22:49 -0800
Subject: [PATCH 07/16] refactor query_key term generation

---
 elastalert/elastalert.py | 41 ++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py
index 7d1e8be7..1033801d 100755
--- a/elastalert/elastalert.py
+++ b/elastalert/elastalert.py
@@ -463,24 +463,33 @@ def get_hits_count(self, rule, starttime, endtime, index):
         )
         return {endtime: res['count']}
 
+    @staticmethod
+    def query_key_filters(rule: dict, qk_value_csv: str) -> dict:
+        if qk_value_csv is None:
+            return
+
+        qk_values = qk_value_csv.split(", ")
+        end = '.keyword'
+
+        query_keys = []
+        try:
+            query_keys = rule['compound_query_key']
+        except KeyError:
+            query_key = rule.get('query_key')
+            if query_key is not None:
+                query_keys.append(query_key)
+
+        for key, value in zip(query_keys, qk_values):
+            if rule.get('raw_count_keys', True):
+                if not key.endswith(end):
+                    key += end
+            yield {'term': {key: value}}
+
     def get_hits_terms(self, rule, starttime, endtime, index, key, qk=None, size=None):
         rule_filter = copy.copy(rule['filter'])
-        if qk:
-            qk_list = qk.split(", ")
-            end = '.keyword'
-
-            if len(qk_list) == 1:
-                qk = qk_list[0]
-                filter_key = rule['query_key']
-                if rule.get('raw_count_keys', True) and not rule['query_key'].endswith(end):
-                    filter_key = add_keyword_postfix(filter_key)
-                rule_filter.extend([{'term': {filter_key: qk}}])
-            else:
-                filter_keys = rule['compound_query_key']
-                for i,filter_key in enumerate(filter_keys):
-                    if rule.get('raw_count_keys', True) and not filter_key.endswith(end):
-                        filter_key = add_keyword_postfix(filter_key)
-                    rule_filter.extend([{'term': {filter_key: qk_list[i]}}])
+
+        for filter in self.query_key_filters(rule=rule, qk_value_csv=qk):
+            rule_filter.append(filter)
 
         base_query = self.get_query(
             rule_filter,

From 3ce569f2fd008ea47c4087d24aa725c2c00fcf39 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Sun, 10 Dec 2023 10:06:18 -0800
Subject: [PATCH 08/16] unitest - get_hits_terms (happy path)

---
 tests/hits_terms_test.py | 68 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 tests/hits_terms_test.py

diff --git a/tests/hits_terms_test.py b/tests/hits_terms_test.py
new file mode 100644
index 00000000..637e8f8b
--- /dev/null
+++ b/tests/hits_terms_test.py
@@ -0,0 +1,68 @@
+import pytest
+from datetime import datetime,timedelta
+
+from elastalert.util import dt_to_ts
+
+
+@pytest.fixture
+def example_agg_response():
+    res = {
+        'took': 1,
+        'timed_out': False,
+        '_shards': {'total': 3, 'successful': 3, 'skipped': 0, 'failed': 0},
+        'hits': {
+            'total': {'value': 9, 'relation': 'eq'},
+            'max_score': None,
+            'hits': []},
+        'aggregations':{
+            'counts': {
+                'doc_count_error_upper_bound': 0,
+                'sum_other_doc_count': 0,
+                'buckets': [{'key': '10.0.4.174', 'doc_count': 2},
+                            {'key': '10.0.4.241', 'doc_count': 2},
+                            {'key': '10.0.4.76', 'doc_count': 1},
+                            {'key': '10.0.4.123', 'doc_count': 1},
+                            {'key': '10.0.4.156', 'doc_count': 1},
+                            {'key': '10.0.4.231', 'doc_count': 1},
+                            {'key': '10.0.4.248', 'doc_count': 1}]}}
+    }
+    return res
+
+
+def test_query_key_filter_happy_path(ea, example_agg_response):
+
+    ea.rules[0]['compound_query_key'] = ['server_ip', 'service_name']
+    qk = ('172.16.1.10', '/api/v1/endpoint-foo')
+    qk_csv = ", ".join(qk)
+    index = 'foo-2023-13-13' #lousy Smarch weather
+    top_term_key = 'client_ip'
+
+    endtime = datetime.now()
+    starttime = endtime - timedelta(hours=1)
+    ea.thread_data.current_es.search.return_value = example_agg_response
+
+    hit_terms = ea.get_hits_terms(
+        rule=ea.rules[0],
+        starttime=starttime,
+        endtime=endtime,
+        index=index,
+        key=top_term_key,
+        qk = qk_csv,
+        size=None
+    )
+    assert endtime in hit_terms
+    assert hit_terms[endtime] == example_agg_response['aggregations']['counts']['buckets']
+
+    expected_filters = [
+        {'range': {'@timestamp': {
+            'gt': dt_to_ts(starttime),
+            'lte': dt_to_ts(endtime)}}},
+        {'term': {f'{ea.rules[0]['compound_query_key'][0]}.keyword': qk[0]}},
+        {'term': {f'{ea.rules[0]['compound_query_key'][1]}.keyword': qk[1]}}
+    ]
+    expected_query = {
+        'query': {'bool': {'filter': {'bool': {'must': expected_filters}}}},
+         # 50 harded coded in get_hits_terms as a default for size=None
+        'aggs': {'counts': {'terms': {'field': top_term_key, 'size': 50, 'min_doc_count': 1}}}
+    }
+    ea.thread_data.current_es.search.assert_called_with(index=index,body=expected_query, size=0, ignore_unavailable=True)
\ No newline at end of file

From 985df30c259406190f4c72f12007c32922ea83d4 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Sun, 10 Dec 2023 19:16:15 -0800
Subject: [PATCH 09/16] hits_terms_test - parameterized senarios

---
 tests/hits_terms_test.py | 87 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 78 insertions(+), 9 deletions(-)

diff --git a/tests/hits_terms_test.py b/tests/hits_terms_test.py
index 637e8f8b..74b18d49 100644
--- a/tests/hits_terms_test.py
+++ b/tests/hits_terms_test.py
@@ -29,11 +29,70 @@ def example_agg_response():
     return res
 
 
-def test_query_key_filter_happy_path(ea, example_agg_response):
+def _mock_query_key_option_loader(rule):
+    '''
+    So, some copypasta from loaders.load_options,
 
-    ea.rules[0]['compound_query_key'] = ['server_ip', 'service_name']
-    qk = ('172.16.1.10', '/api/v1/endpoint-foo')
-    qk_csv = ", ".join(qk)
+    if query_key is a string:
+        no compound_query_key is created
+    if query_key is a list:
+        if len() > 1:
+            compound_query_key is created
+            query_key is replaced with ",".join() of the original query_key values
+        if len() == 1:
+            the query_key list with one string is normalilzed back to just a string
+        if len() == 0:
+            somehow it was an empty list and query_keys is silently dropped from the config
+    '''
+    raw_query_key = rule.get('query_key')
+    if isinstance(raw_query_key, list):
+        if len(raw_query_key) > 1:
+            rule['compound_query_key'] = raw_query_key
+            rule['query_key'] = ','.join(raw_query_key)
+        elif len(raw_query_key) == 1:
+            rule['query_key'] = raw_query_key[0]
+        else:
+            del(rule['query_key'])
+
+@pytest.mark.parametrize(
+    ["qk", "query_key"],
+
+    #scenario A: 3 query keys
+    [ ( ['172.16.1.10', '/api/v1/endpoint-foo', 'us-east-2'],
+        ['server_ip', 'service_name', 'region'] ),
+
+    #scenario B: 2 query keys
+      ( ['172.16.1.10', '/api/v1/endpoint-foo'],
+        ['server_ip', 'service_name'] ),
+
+    #scenario C: 1 query key, but it was given as a list of one fieldname in the rule options
+      ( ['172.16.1.10'],
+        ['server_ip'] ),
+
+    #scenario D: 1 query key, given as a string
+      ( ['172.16.1.10'],
+        'server_ip' ),
+
+    #scenario E: no query key
+      ( None,
+        None )
+   ],
+)
+def test_query_key_filter_happy_path(ea, example_agg_response, qk, query_key):
+
+    if query_key is not None:
+        ea.rules[0]['query_key'] = query_key
+
+    # emulate the rule['compound_query_key'] creation logic which prob should be
+    # factored out of loaders.load_options() instead of copypasta'd for the test
+    _mock_query_key_option_loader(ea.rules[0])
+
+    try:
+        # todo: why are these values being passed as a commaspace-join and not a
+        # comma-join like everything else?
+        qk_csv = ", ".join(qk)
+    except TypeError:
+        qk_csv = None
     index = 'foo-2023-13-13' #lousy Smarch weather
     top_term_key = 'client_ip'
 
@@ -54,12 +113,22 @@ def test_query_key_filter_happy_path(ea, example_agg_response):
     assert hit_terms[endtime] == example_agg_response['aggregations']['counts']['buckets']
 
     expected_filters = [
-        {'range': {'@timestamp': {
-            'gt': dt_to_ts(starttime),
-            'lte': dt_to_ts(endtime)}}},
-        {'term': {f'{ea.rules[0]['compound_query_key'][0]}.keyword': qk[0]}},
-        {'term': {f'{ea.rules[0]['compound_query_key'][1]}.keyword': qk[1]}}
+        {'range': {'@timestamp': { 'gt': dt_to_ts(starttime), 'lte': dt_to_ts(endtime)}}}
     ]
+    try:
+        cqk = ea.rules[0]['compound_query_key']
+        for fieldname, value in zip(cqk, qk):
+            filter = {'term': {f'{fieldname}.keyword': value}}
+            expected_filters.append(filter)
+    except KeyError:
+        #not a compound, eh?  it must be a string of a single filedname
+        try:
+            fieldname = ea.rules[0]['query_key']
+            filter = {'term': {f'{fieldname}.keyword': qk[0]}}
+            expected_filters.append(filter)
+        except KeyError:
+            pass # maybe the rule never had a query_key, or it was an empty list and purged
+
     expected_query = {
         'query': {'bool': {'filter': {'bool': {'must': expected_filters}}}},
          # 50 harded coded in get_hits_terms as a default for size=None

From 483001618948157d9716b9dcead60de83a000dba Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 18:58:52 -0800
Subject: [PATCH 10/16] test AlastAlerter.query_key_filters

---
 tests/hits_terms_test.py | 47 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/hits_terms_test.py b/tests/hits_terms_test.py
index 74b18d49..bd81c2eb 100644
--- a/tests/hits_terms_test.py
+++ b/tests/hits_terms_test.py
@@ -2,6 +2,7 @@
 from datetime import datetime,timedelta
 
 from elastalert.util import dt_to_ts
+from elastalert.elastalert import ElastAlerter
 
 
 @pytest.fixture
@@ -134,4 +135,48 @@ def test_query_key_filter_happy_path(ea, example_agg_response, qk, query_key):
          # 50 harded coded in get_hits_terms as a default for size=None
         'aggs': {'counts': {'terms': {'field': top_term_key, 'size': 50, 'min_doc_count': 1}}}
     }
-    ea.thread_data.current_es.search.assert_called_with(index=index,body=expected_query, size=0, ignore_unavailable=True)
\ No newline at end of file
+    ea.thread_data.current_es.search.assert_called_with(index=index,body=expected_query, size=0, ignore_unavailable=True)
+
+
+def test_query_key_filters_single_query_key():
+    rule = { 'query_key': 'a_single_key_as_a_string' }
+    qk_value_csv = 'a single value'
+    filters = list(ElastAlerter.query_key_filters(rule,qk_value_csv))
+    expected_filters = [{'term': {f'{rule['query_key']}.keyword': qk_value_csv}}]
+    assert filters == expected_filters
+
+def test_query_key_filters_compound_query_key():
+    rule = { 'query_key': 'compound,key',
+             'compound_query_key': ['compound', 'key'] }
+    qk_value_csv = 'combined value, by commaspace'
+    filters = list(ElastAlerter.query_key_filters(rule,qk_value_csv))
+    expected_filters = [
+        {'term': {'compound.keyword': 'combined value'}},
+        {'term': {'key.keyword': 'by commaspace'}},
+    ]
+    assert filters == expected_filters
+
+def test_query_key_filters_brittle_query_key_value_logs_warning(caplog):
+    rule = { 'query_key': 'university,state',
+             'compound_query_key': ['university', 'state'] }
+    #uh oh, a commaspace we didn't expect
+    qk_value_csv = 'California State University, San Bernardino, California'
+    filters = list(ElastAlerter.query_key_filters(rule,qk_value_csv))
+    log = caplog.records[0]
+    assert log.levelname == "WARNING"
+    assert 'Received 3 value(s) for 2 key(s).' in log.message
+
+def test_query_key_filters_none_values():
+    rule = { 'query_key': 'something'}
+    qk_value_csv = None
+    filters = list(ElastAlerter.query_key_filters(rule,qk_value_csv))
+    assert len(filters) == 0
+
+def test_query_key_filters_unexpected_passed_values_for_a_rule_without_query_keys(caplog):
+    rule = { }
+    qk_value_csv = 'value'
+    filters = list(ElastAlerter.query_key_filters(rule,qk_value_csv))
+    assert len(filters) == 0
+    log = caplog.records[0]
+    assert log.levelname == "WARNING"
+    assert 'Received 1 value(s) for 0 key(s).' in log.message
\ No newline at end of file

From 30820c9895d737707d48a3c29dad2d1a12923cd0 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 18:59:52 -0800
Subject: [PATCH 11/16] reword hits_terms_test.py

---
 tests/hits_terms_test.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/hits_terms_test.py b/tests/hits_terms_test.py
index bd81c2eb..e834e8c2 100644
--- a/tests/hits_terms_test.py
+++ b/tests/hits_terms_test.py
@@ -56,7 +56,7 @@ def _mock_query_key_option_loader(rule):
             del(rule['query_key'])
 
 @pytest.mark.parametrize(
-    ["qk", "query_key"],
+    ["qk_value", "query_key"],
 
     #scenario A: 3 query keys
     [ ( ['172.16.1.10', '/api/v1/endpoint-foo', 'us-east-2'],
@@ -67,6 +67,7 @@ def _mock_query_key_option_loader(rule):
         ['server_ip', 'service_name'] ),
 
     #scenario C: 1 query key, but it was given as a list of one fieldname in the rule options
+    #as of this writing, 707b2a5 shouldn't allow this to happen, but here is a test regardless
       ( ['172.16.1.10'],
         ['server_ip'] ),
 
@@ -79,7 +80,7 @@ def _mock_query_key_option_loader(rule):
         None )
    ],
 )
-def test_query_key_filter_happy_path(ea, example_agg_response, qk, query_key):
+def test_get_hits_terms_with_factored_out_filters(ea, example_agg_response, qk_value, query_key):
 
     if query_key is not None:
         ea.rules[0]['query_key'] = query_key
@@ -89,9 +90,9 @@ def test_query_key_filter_happy_path(ea, example_agg_response, qk, query_key):
     _mock_query_key_option_loader(ea.rules[0])
 
     try:
-        # todo: why are these values being passed as a commaspace-join and not a
-        # comma-join like everything else?
-        qk_csv = ", ".join(qk)
+        # ElastAlert.process_hits() is expected to insert the filedname values
+        # from _hits as a commaspace csv
+        qk_csv = ", ".join(qk_value)
     except TypeError:
         qk_csv = None
     index = 'foo-2023-13-13' #lousy Smarch weather
@@ -118,14 +119,14 @@ def test_query_key_filter_happy_path(ea, example_agg_response, qk, query_key):
     ]
     try:
         cqk = ea.rules[0]['compound_query_key']
-        for fieldname, value in zip(cqk, qk):
+        for fieldname, value in zip(cqk, qk_value):
             filter = {'term': {f'{fieldname}.keyword': value}}
             expected_filters.append(filter)
     except KeyError:
         #not a compound, eh?  it must be a string of a single filedname
         try:
             fieldname = ea.rules[0]['query_key']
-            filter = {'term': {f'{fieldname}.keyword': qk[0]}}
+            filter = {'term': {f'{fieldname}.keyword': qk_value[0]}}
             expected_filters.append(filter)
         except KeyError:
             pass # maybe the rule never had a query_key, or it was an empty list and purged

From 1c98f893c7c1a3adb46e0d3163a9ebe50dab97aa Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 19:09:19 -0800
Subject: [PATCH 12/16] test query_key_filters handling of CSV flavors

---
 tests/hits_terms_test.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/hits_terms_test.py b/tests/hits_terms_test.py
index e834e8c2..c3097da7 100644
--- a/tests/hits_terms_test.py
+++ b/tests/hits_terms_test.py
@@ -80,7 +80,8 @@ def _mock_query_key_option_loader(rule):
         None )
    ],
 )
-def test_get_hits_terms_with_factored_out_filters(ea, example_agg_response, qk_value, query_key):
+@pytest.mark.parametrize("query_key_values_separator", [",", ", ", ",      ", ",\t"])
+def test_get_hits_terms_with_factored_out_filters(ea, example_agg_response, qk_value, query_key, query_key_values_separator):
 
     if query_key is not None:
         ea.rules[0]['query_key'] = query_key
@@ -92,7 +93,7 @@ def test_get_hits_terms_with_factored_out_filters(ea, example_agg_response, qk_v
     try:
         # ElastAlert.process_hits() is expected to insert the filedname values
         # from _hits as a commaspace csv
-        qk_csv = ", ".join(qk_value)
+        qk_csv = query_key_values_separator.join(qk_value)
     except TypeError:
         qk_csv = None
     index = 'foo-2023-13-13' #lousy Smarch weather
@@ -146,10 +147,11 @@ def test_query_key_filters_single_query_key():
     expected_filters = [{'term': {f'{rule['query_key']}.keyword': qk_value_csv}}]
     assert filters == expected_filters
 
-def test_query_key_filters_compound_query_key():
+@pytest.mark.parametrize("query_key_values_separator", [",", ", ", ",      ", ",\t"])
+def test_query_key_filters_compound_query_key(query_key_values_separator):
     rule = { 'query_key': 'compound,key',
              'compound_query_key': ['compound', 'key'] }
-    qk_value_csv = 'combined value, by commaspace'
+    qk_value_csv = query_key_values_separator.join( ['combined value', 'by commaspace'] )
     filters = list(ElastAlerter.query_key_filters(rule,qk_value_csv))
     expected_filters = [
         {'term': {'compound.keyword': 'combined value'}},

From 33e669922527da3aa1be7d02a1e747a0625377b6 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 19:12:24 -0800
Subject: [PATCH 13/16] Handle qk_values with various CSV styles

---
 elastalert/elastalert.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/elastalert/elastalert.py b/elastalert/elastalert.py
index 1033801d..b8224aae 100755
--- a/elastalert/elastalert.py
+++ b/elastalert/elastalert.py
@@ -6,6 +6,7 @@
 import logging
 import os
 import random
+import re
 import signal
 import sys
 import threading
@@ -468,7 +469,10 @@ def query_key_filters(rule: dict, qk_value_csv: str) -> dict:
         if qk_value_csv is None:
             return
 
-        qk_values = qk_value_csv.split(", ")
+        # Split on comma followed by zero or more whitespace characters. It's
+        # expected to be commaspace separated. However 76ab593 suggests there
+        # are cases when it is only comma and not commaspace
+        qk_values = re.split(r',\s*',qk_value_csv)
         end = '.keyword'
 
         query_keys = []
@@ -479,6 +483,12 @@ def query_key_filters(rule: dict, qk_value_csv: str) -> dict:
             if query_key is not None:
                 query_keys.append(query_key)
 
+        if len(qk_values) != len(query_keys):
+            msg = ( f"Received {len(qk_values)} value(s) for {len(query_keys)} key(s)."
+                    f" Did '{qk_value_csv}' have a value with a comma?"
+                    " See https://github.com/jertel/elastalert2/pull/1330#issuecomment-1849962106" )
+            elastalert_logger.warning( msg )
+
         for key, value in zip(query_keys, qk_values):
             if rule.get('raw_count_keys', True):
                 if not key.endswith(end):

From 86dd97f3782c0ac4d2515bf02507d68e1c75a323 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 19:16:36 -0800
Subject: [PATCH 14/16] _find_es_dict_by_key - handle "keywords" added by
 process_hits()

---
 elastalert/util.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/elastalert/util.py b/elastalert/util.py
index a504d820..04c0f274 100644
--- a/elastalert/util.py
+++ b/elastalert/util.py
@@ -65,12 +65,27 @@ def _find_es_dict_by_key(lookup_dict: dict, term: str, string_multi_field_name:
     returned for both if the key can not be found.
     """
 
-    # matching documents will not have .keyword fieldnames, even if a .keyword
-    # fieldname was used as a term in the search
+    # For compound fieldnames added by ElastAlert.process_hits()
+    #
+    # For example, when query_key is a list of fieldnames it will insert a term
+    #     'key_1,other_fieldname,a_third_name'
+    # and if the rule is set for raw_query_keys, the query_key values may end
+    # with .keyword it will insert instead something like
+    #     'key_1_ip,other_fieldname_number,a_third_name.keyword'
+    # and we need to check for that synthentic compound fielname, including the
+    # .keyword suffix before contnuing
+    #
+    # Of course, it also handles happy path, non-ambuiguous fieldnames like
+    # 'ip_address' and 'src_displayname' that don't have . or [] characters
+    if term in lookup_dict:
+        return lookup_dict, term
+
+    # If not synthetically added by ElastAlert, matching documents will not have
+    # .keyword fieldnames, even if a .keyword fieldname was used as a term in
+    # the search
     # e.g. {"term": {"description.keyword": "Target Description Here"}}
     # will return a document with {"_source": {"description": "Target Description Here"}}
     term = term.removesuffix(string_multi_field_name)
-
     if term in lookup_dict:
         return lookup_dict, term
 

From 5c34551c266997a8168962cd72ce8d363838e3f3 Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 19:56:19 -0800
Subject: [PATCH 15/16] fixup: appease the linter

---
 tests/hits_terms_test.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tests/hits_terms_test.py b/tests/hits_terms_test.py
index c3097da7..0f683779 100644
--- a/tests/hits_terms_test.py
+++ b/tests/hits_terms_test.py
@@ -1,9 +1,12 @@
 import pytest
-from datetime import datetime,timedelta
+from datetime import datetime, timedelta
 
 from elastalert.util import dt_to_ts
 from elastalert.elastalert import ElastAlerter
 
+# I like the dictionary whitespace the way it is, thank you
+# but I'm not going to tag all the lines with #noqa: E201
+# flake8: noqa
 
 @pytest.fixture
 def example_agg_response():
@@ -15,7 +18,7 @@ def example_agg_response():
             'total': {'value': 9, 'relation': 'eq'},
             'max_score': None,
             'hits': []},
-        'aggregations':{
+        'aggregations': {
             'counts': {
                 'doc_count_error_upper_bound': 0,
                 'sum_other_doc_count': 0,
@@ -53,29 +56,30 @@ def _mock_query_key_option_loader(rule):
         elif len(raw_query_key) == 1:
             rule['query_key'] = raw_query_key[0]
         else:
-            del(rule['query_key'])
+            del rule['query_key']
+
 
 @pytest.mark.parametrize(
     ["qk_value", "query_key"],
 
-    #scenario A: 3 query keys
+    # scenario A: 3 query keys
     [ ( ['172.16.1.10', '/api/v1/endpoint-foo', 'us-east-2'],
         ['server_ip', 'service_name', 'region'] ),
 
-    #scenario B: 2 query keys
+    # scenario B: 2 query keys
       ( ['172.16.1.10', '/api/v1/endpoint-foo'],
         ['server_ip', 'service_name'] ),
 
-    #scenario C: 1 query key, but it was given as a list of one fieldname in the rule options
-    #as of this writing, 707b2a5 shouldn't allow this to happen, but here is a test regardless
+    # scenario C: 1 query key, but it was given as a list of one fieldname in the rule options
+    # as of this writing, 707b2a5 shouldn't allow this to happen, but here is a test regardless
       ( ['172.16.1.10'],
         ['server_ip'] ),
 
-    #scenario D: 1 query key, given as a string
+    # scenario D: 1 query key, given as a string
       ( ['172.16.1.10'],
         'server_ip' ),
 
-    #scenario E: no query key
+    # scenario E: no query key
       ( None,
         None )
    ],

From e78a4b74c149eb9dde02bbc487684d7b3f0fc02b Mon Sep 17 00:00:00 2001
From: James Macdonell <jmacdone@csusb.edu>
Date: Mon, 11 Dec 2023 20:07:31 -0800
Subject: [PATCH 16/16] CHANGELOG.md PR #1330

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f0d62f38..1cba9b00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,8 @@
 - [Docs] Extend FAQ / troubleshooting section with information on Elasticsearch RBAC - [#1324](https://github.com/jertel/elastalert2/pull/1324) - @chr-b
 - Upgrade to Python 3.12 - [#1327](https://github.com/jertel/elastalert2/pull/1327) - @jertel
 - Correction in IRIS and GELF alerter [#1331](https://github.com/jertel/elastalert2/pull/1331) - @malinkinsa
+- Fix handing of compound_query_key values - [#1330](https://github.com/jertel/elastalert2/pull/1330) - @jmacdone
+- Fix handing raw_query_key and query_key values ending with .keyword- [#1330](https://github.com/jertel/elastalert2/pull/1330) - @jmacdone
 - [Docs] Fix broken search function caused by sphinx upgrade a few releases ago - [#1332](https://github.com/jertel/elastalert2/pull/1332) - @jertel
 - [Docs] Fix mismatch for parameter iris_customer_id - [1334](https://github.com/jertel/elastalert2/pull/1334) @malinkinsa
 - [IRIS] Make parameter iris_customer_id optional with default value - [1334](https://github.com/jertel/elastalert2/pull/1334) @malinkinsa