Skip to content

Commit

Permalink
Improve run times for large projects by reusing connections by default (
Browse files Browse the repository at this point in the history
#1109)

* add performance test for auth methods
* update default setting for reuse_connections to True
* update unit tests to reflect the new default value for reuse_connections
* update the full default on reuse_connections to a conditional default based on client_session_keep_alive
* fix unit test expected result
* update unit test to account for __post_init__
* rerun auth method analysis
  • Loading branch information
mikealfare authored Jul 12, 2024
1 parent 1193a79 commit 6857e6b
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 13 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240709-194316.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Improve run times for large projects by reusing connections by default
time: 2024-07-09T19:43:16.489649-04:00
custom:
Author: mikealfare amardatar
Issue: "1082"
6 changes: 6 additions & 0 deletions dbt/adapters/snowflake/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class SnowflakeCredentials(Credentials):
retry_on_database_errors: bool = False
retry_all: bool = False
insecure_mode: Optional[bool] = False
# this needs to default to `None` so that we can tell if the user set it; see `__post_init__()`
reuse_connections: Optional[bool] = None

def __post_init__(self):
Expand Down Expand Up @@ -143,6 +144,11 @@ def __post_init__(self):

self.account = self.account.replace("_", "-")

# only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive`
# having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior
if self.client_session_keep_alive is False and self.reuse_connections is None:
self.reuse_connections = True

@property
def type(self):
return "snowflake"
Expand Down
6 changes: 6 additions & 0 deletions tests/performance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Performance testing

These tests are not meant to run on a regular basis; instead, they are tools for measuring performance impacts of changes as needed.
We often get requests for reducing processing times, researching why a particular component is taking longer to run than expected, etc.
In the past we have performed one-off analyses to address these requests and documented the results in the relevant PR (when a change is made).
It is more useful to document those analyses in the form of performance tests so that we can easily rerun the analysis at a later date.
132 changes: 132 additions & 0 deletions tests/performance/test_auth_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
Results:
| method | project_size | reuse_connections | unsafe_skip_rsa_key_validation | duration |
|---------------|--------------|-------------------|--------------------------------|----------|
| User Password | 1,000 | False | - | 234.09s |
| User Password | 1,000 | True | - | 78.34s |
| Key Pair | 1,000 | False | False | 271.47s |
| Key Pair | 1,000 | False | True | 275.73s |
| Key Pair | 1,000 | True | False | 63.69s |
| Key Pair | 1,000 | True | True | 73.45s |
Notes:
- run locally on MacOS, single threaded
- `unsafe_skip_rsa_key_validation` only applies to the Key Pair auth method
- `unsafe_skip_rsa_key_validation=True` was tested by updating the relevant `cryptography` calls directly as it is not a user configuration
- since the models are all views, time differences should be viewed as absolute differences, e.g.:
- this: (271.47s - 63.69s) / 1,000 models = 208ms improvement
- NOT this: 1 - (63.69s / 271.47s) = 76.7% improvement
"""

from datetime import datetime
import os

from dbt.tests.util import run_dbt
import pytest


SEED = """
id,value
1,a
2,b
3,c
""".strip()


MODEL = """
select * from {{ ref("my_seed") }}
"""


class Scenario:
"""
Runs a full load test. The test can be configured to run an arbitrary number of models.
To use this test, configure the test by setting `project_size` and/or `expected_duration`.
"""

auth_method: str
project_size: int = 1
reuse_connections: bool = False

@pytest.fixture(scope="class")
def seeds(self):
return {"my_seed.csv": SEED}

@pytest.fixture(scope="class")
def models(self):
return {f"my_model_{i}.sql": MODEL for i in range(self.project_size)}

@pytest.fixture(scope="class", autouse=True)
def setup(self, project):
run_dbt(["seed"])

start = datetime.now()
yield
end = datetime.now()

duration = (end - start).total_seconds()
print(f"Run took: {duration} seconds")

@pytest.fixture(scope="class")
def dbt_profile_target(self, auth_params):
yield {
"type": "snowflake",
"threads": 4,
"account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"),
"database": os.getenv("SNOWFLAKE_TEST_DATABASE"),
"warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"),
"user": os.getenv("SNOWFLAKE_TEST_USER"),
"reuse_connections": self.reuse_connections,
**auth_params,
}

@pytest.fixture(scope="class")
def auth_params(self):

if self.auth_method == "user_password":
yield {"password": os.getenv("SNOWFLAKE_TEST_PASSWORD")}

elif self.auth_method == "key_pair":
"""
This connection method uses key pair auth.
Follow the instructions here to setup key pair authentication for your test user:
https://docs.snowflake.com/en/user-guide/key-pair-auth
"""
yield {
"private_key": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY"),
"private_key_passphrase": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE"),
}

else:
raise ValueError(
f"`auth_method` must be one of `user_password` or `key_pair`, received: {self.auth_method}"
)

def test_scenario(self, project):
run_dbt(["run"])


class TestUserPasswordAuth(Scenario):
auth_method = "user_password"
project_size = 1_000
reuse_connections = False


class TestUserPasswordAuthReuseConnections(Scenario):
auth_method = "user_password"
project_size = 1_000
reuse_connections = True


class TestKeyPairAuth(Scenario):
auth_method = "key_pair"
project_size = 1_000
reuse_connections = False


class TestKeyPairAuthReuseConnections(Scenario):
auth_method = "key_pair"
project_size = 1_000
reuse_connections = True
32 changes: 19 additions & 13 deletions tests/unit/test_snowflake_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,19 @@ def test_client_session_keep_alive_false_by_default(self):
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
),
]
)

def test_client_session_keep_alive_true(self):
self.config.credentials = self.config.credentials.replace(client_session_keep_alive=True)
self.config.credentials = self.config.credentials.replace(
client_session_keep_alive=True,
# this gets defaulted via `__post_init__` when `client_session_keep_alive` comes in as `False`
# then when `replace` is called, `__post_init__` cannot set it back to `None` since it cannot
# tell the difference between set by user and set by `__post_init__`
reuse_connections=None,
)
self.adapter = SnowflakeAdapter(self.config, get_context("spawn"))
conn = self.adapter.connections.set_connection_name(name="new_connection_with_new_config")

Expand Down Expand Up @@ -339,7 +345,7 @@ def test_client_has_query_tag(self):
role=None,
schema="public",
user="test_user",
reuse_connections=None,
reuse_connections=True,
warehouse="test_warehouse",
private_key=None,
application="dbt",
Expand Down Expand Up @@ -379,7 +385,7 @@ def test_user_pass_authentication(self):
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -413,7 +419,7 @@ def test_authenticator_user_pass_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -443,7 +449,7 @@ def test_authenticator_externalbrowser_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -477,7 +483,7 @@ def test_authenticator_oauth_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -511,7 +517,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key):
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -545,7 +551,7 @@ def test_authenticator_private_key_authentication_no_passphrase(self, mock_get_p
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -577,7 +583,7 @@ def test_authenticator_jwt_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -607,7 +613,7 @@ def test_query_tag(self):
application="dbt",
insecure_mode=False,
session_parameters={"QUERY_TAG": "test_query_tag"},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -670,7 +676,7 @@ def test_authenticator_private_key_string_authentication(self, mock_get_private_
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -706,7 +712,7 @@ def test_authenticator_private_key_string_authentication_no_passphrase(
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down

0 comments on commit 6857e6b

Please sign in to comment.