Improve run times for large projects by reusing connections by default (

#1109) * add performance test for auth methods * update default setting for reuse_connections to True * update unit tests to reflect the new default value for reuse_connections * update the full default on reuse_connections to a conditional default based on client_session_keep_alive * fix unit test expected result * update unit test to account for __post_init__ * rerun auth method analysis
dbt-labs · Jul 12, 2024 · 6857e6b · 6857e6b
1 parent 1193a79
commit 6857e6b
Show file tree

Hide file tree

Showing 5 changed files with 169 additions and 13 deletions.
diff --git a/.changes/unreleased/Features-20240709-194316.yaml b/.changes/unreleased/Features-20240709-194316.yaml
@@ -0,0 +1,6 @@
+kind: Features
+body: Improve run times for large projects by reusing connections by default
+time: 2024-07-09T19:43:16.489649-04:00
+custom:
+  Author: mikealfare amardatar
+  Issue: "1082"
diff --git a/dbt/adapters/snowflake/connections.py b/dbt/adapters/snowflake/connections.py
@@ -113,6 +113,7 @@ class SnowflakeCredentials(Credentials):
     retry_on_database_errors: bool = False
     retry_all: bool = False
     insecure_mode: Optional[bool] = False
+    # this needs to default to `None` so that we can tell if the user set it; see `__post_init__()`
     reuse_connections: Optional[bool] = None
 
     def __post_init__(self):
@@ -143,6 +144,11 @@ def __post_init__(self):
 
         self.account = self.account.replace("_", "-")
 
+        # only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive`
+        # having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior
+        if self.client_session_keep_alive is False and self.reuse_connections is None:
+            self.reuse_connections = True
+
     @property
     def type(self):
         return "snowflake"

diff --git a/tests/performance/README.md b/tests/performance/README.md
@@ -0,0 +1,6 @@
+# Performance testing
+
+These tests are not meant to run on a regular basis; instead, they are tools for measuring performance impacts of changes as needed.
+We often get requests for reducing processing times, researching why a particular component is taking longer to run than expected, etc.
+In the past we have performed one-off analyses to address these requests and documented the results in the relevant PR (when a change is made).
+It is more useful to document those analyses in the form of performance tests so that we can easily rerun the analysis at a later date.
diff --git a/tests/performance/test_auth_methods.py b/tests/performance/test_auth_methods.py
@@ -0,0 +1,132 @@
+"""
+Results:
+
+| method        | project_size | reuse_connections | unsafe_skip_rsa_key_validation | duration |
+|---------------|--------------|-------------------|--------------------------------|----------|
+| User Password |        1,000 | False             | -                              |  234.09s |
+| User Password |        1,000 | True              | -                              |   78.34s |
+| Key Pair      |        1,000 | False             | False                          |  271.47s |
+| Key Pair      |        1,000 | False             | True                           |  275.73s |
+| Key Pair      |        1,000 | True              | False                          |   63.69s |
+| Key Pair      |        1,000 | True              | True                           |   73.45s |
+
+Notes:
+- run locally on MacOS, single threaded
+- `unsafe_skip_rsa_key_validation` only applies to the Key Pair auth method
+- `unsafe_skip_rsa_key_validation=True` was tested by updating the relevant `cryptography` calls directly as it is not a user configuration
+- since the models are all views, time differences should be viewed as absolute differences, e.g.:
+    - this: (271.47s - 63.69s) / 1,000 models = 208ms improvement
+    - NOT this: 1 - (63.69s / 271.47s) = 76.7% improvement
+"""
+
+from datetime import datetime
+import os
+
+from dbt.tests.util import run_dbt
+import pytest
+
+
+SEED = """
+id,value
+1,a
+2,b
+3,c
+""".strip()
+
+
+MODEL = """
+select * from {{ ref("my_seed") }}
+"""
+
+
+class Scenario:
+    """
+    Runs a full load test. The test can be configured to run an arbitrary number of models.
+
+    To use this test, configure the test by setting `project_size` and/or `expected_duration`.
+    """
+
+    auth_method: str
+    project_size: int = 1
+    reuse_connections: bool = False
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"my_seed.csv": SEED}
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {f"my_model_{i}.sql": MODEL for i in range(self.project_size)}
+
+    @pytest.fixture(scope="class", autouse=True)
+    def setup(self, project):
+        run_dbt(["seed"])
+
+        start = datetime.now()
+        yield
+        end = datetime.now()
+
+        duration = (end - start).total_seconds()
+        print(f"Run took: {duration} seconds")
+
+    @pytest.fixture(scope="class")
+    def dbt_profile_target(self, auth_params):
+        yield {
+            "type": "snowflake",
+            "threads": 4,
+            "account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"),
+            "database": os.getenv("SNOWFLAKE_TEST_DATABASE"),
+            "warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"),
+            "user": os.getenv("SNOWFLAKE_TEST_USER"),
+            "reuse_connections": self.reuse_connections,
+            **auth_params,
+        }
+
+    @pytest.fixture(scope="class")
+    def auth_params(self):
+
+        if self.auth_method == "user_password":
+            yield {"password": os.getenv("SNOWFLAKE_TEST_PASSWORD")}
+
+        elif self.auth_method == "key_pair":
+            """
+            This connection method uses key pair auth.
+            Follow the instructions here to setup key pair authentication for your test user:
+            https://docs.snowflake.com/en/user-guide/key-pair-auth
+            """
+            yield {
+                "private_key": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY"),
+                "private_key_passphrase": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE"),
+            }
+
+        else:
+            raise ValueError(
+                f"`auth_method` must be one of `user_password` or `key_pair`, received: {self.auth_method}"
+            )
+
+    def test_scenario(self, project):
+        run_dbt(["run"])
+
+
+class TestUserPasswordAuth(Scenario):
+    auth_method = "user_password"
+    project_size = 1_000
+    reuse_connections = False
+
+
+class TestUserPasswordAuthReuseConnections(Scenario):
+    auth_method = "user_password"
+    project_size = 1_000
+    reuse_connections = True
+
+
+class TestKeyPairAuth(Scenario):
+    auth_method = "key_pair"
+    project_size = 1_000
+    reuse_connections = False
+
+
+class TestKeyPairAuthReuseConnections(Scenario):
+    auth_method = "key_pair"
+    project_size = 1_000
+    reuse_connections = True
diff --git a/tests/unit/test_snowflake_adapter.py b/tests/unit/test_snowflake_adapter.py
@@ -290,13 +290,19 @@ def test_client_session_keep_alive_false_by_default(self):
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 ),
             ]
         )
 
     def test_client_session_keep_alive_true(self):
-        self.config.credentials = self.config.credentials.replace(client_session_keep_alive=True)
+        self.config.credentials = self.config.credentials.replace(
+            client_session_keep_alive=True,
+            # this gets defaulted via `__post_init__` when `client_session_keep_alive` comes in as `False`
+            # then when `replace` is called, `__post_init__` cannot set it back to `None` since it cannot
+            # tell the difference between set by user and set by `__post_init__`
+            reuse_connections=None,
+        )
         self.adapter = SnowflakeAdapter(self.config, get_context("spawn"))
         conn = self.adapter.connections.set_connection_name(name="new_connection_with_new_config")
 
@@ -339,7 +345,7 @@ def test_client_has_query_tag(self):
                     role=None,
                     schema="public",
                     user="test_user",
-                    reuse_connections=None,
+                    reuse_connections=True,
                     warehouse="test_warehouse",
                     private_key=None,
                     application="dbt",
@@ -379,7 +385,7 @@ def test_user_pass_authentication(self):
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -413,7 +419,7 @@ def test_authenticator_user_pass_authentication(self):
                     client_store_temporary_credential=True,
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -443,7 +449,7 @@ def test_authenticator_externalbrowser_authentication(self):
                     client_store_temporary_credential=True,
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -477,7 +483,7 @@ def test_authenticator_oauth_authentication(self):
                     client_store_temporary_credential=True,
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -511,7 +517,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key):
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -545,7 +551,7 @@ def test_authenticator_private_key_authentication_no_passphrase(self, mock_get_p
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -577,7 +583,7 @@ def test_authenticator_jwt_authentication(self):
                     client_store_temporary_credential=True,
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -607,7 +613,7 @@ def test_query_tag(self):
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={"QUERY_TAG": "test_query_tag"},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -670,7 +676,7 @@ def test_authenticator_private_key_string_authentication(self, mock_get_private_
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )
@@ -706,7 +712,7 @@ def test_authenticator_private_key_string_authentication_no_passphrase(
                     application="dbt",
                     insecure_mode=False,
                     session_parameters={},
-                    reuse_connections=None,
+                    reuse_connections=True,
                 )
             ]
         )