Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve run times for large projects by reusing connections by default #1109

Merged
merged 11 commits into from
Jul 12, 2024
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240709-194316.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Improve run times for large projects by reusing connections by default
time: 2024-07-09T19:43:16.489649-04:00
custom:
Author: mikealfare amardatar
Issue: "1082"
6 changes: 6 additions & 0 deletions dbt/adapters/snowflake/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class SnowflakeCredentials(Credentials):
retry_on_database_errors: bool = False
retry_all: bool = False
insecure_mode: Optional[bool] = False
# this needs to default to `None` so that we can tell if the user set it; see `__post_init__()`
reuse_connections: Optional[bool] = None

def __post_init__(self):
Expand Down Expand Up @@ -143,6 +144,11 @@ def __post_init__(self):

self.account = self.account.replace("_", "-")

# only default `reuse_connections` to `True` if the user has not turned on `client_session_keep_alive`
# having both of these set to `True` could lead to hanging open connections, so it should be opt-in behavior
if self.client_session_keep_alive is False and self.reuse_connections is None:
self.reuse_connections = True

@property
def type(self):
return "snowflake"
Expand Down
6 changes: 6 additions & 0 deletions tests/performance/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Performance testing

These tests are not meant to run on a regular basis; instead, they are tools for measuring performance impacts of changes as needed.
We often get requests for reducing processing times, researching why a particular component is taking longer to run than expected, etc.
In the past we have performed one-off analyses to address these requests and documented the results in the relevant PR (when a change is made).
It is more useful to document those analyses in the form of performance tests so that we can easily rerun the analysis at a later date.
132 changes: 132 additions & 0 deletions tests/performance/test_auth_methods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
Results:

| method | project_size | reuse_connections | unsafe_skip_rsa_key_validation | duration |
|---------------|--------------|-------------------|--------------------------------|----------|
| User Password | 1,000 | False | - | 234.09s |
| User Password | 1,000 | True | - | 78.34s |
| Key Pair | 1,000 | False | False | 271.47s |
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
| Key Pair | 1,000 | False | True | 275.73s |
| Key Pair | 1,000 | True | False | 63.69s |
| Key Pair | 1,000 | True | True | 73.45s |

Notes:
- run locally on MacOS, single threaded
- `unsafe_skip_rsa_key_validation` only applies to the Key Pair auth method
- `unsafe_skip_rsa_key_validation=True` was tested by updating the relevant `cryptography` calls directly as it is not a user configuration
- since the models are all views, time differences should be viewed as absolute differences, e.g.:
- this: (271.47s - 63.69s) / 1,000 models = 208ms improvement
- NOT this: 1 - (63.69s / 271.47s) = 76.7% improvement
"""

from datetime import datetime
import os

from dbt.tests.util import run_dbt
import pytest


SEED = """
id,value
1,a
2,b
3,c
""".strip()


MODEL = """
select * from {{ ref("my_seed") }}
"""


class Scenario:
"""
Runs a full load test. The test can be configured to run an arbitrary number of models.

To use this test, configure the test by setting `project_size` and/or `expected_duration`.
"""

auth_method: str
project_size: int = 1
reuse_connections: bool = False

@pytest.fixture(scope="class")
def seeds(self):
return {"my_seed.csv": SEED}

@pytest.fixture(scope="class")
def models(self):
return {f"my_model_{i}.sql": MODEL for i in range(self.project_size)}

@pytest.fixture(scope="class", autouse=True)
def setup(self, project):
run_dbt(["seed"])

start = datetime.now()
yield
end = datetime.now()

duration = (end - start).total_seconds()
print(f"Run took: {duration} seconds")

@pytest.fixture(scope="class")
def dbt_profile_target(self, auth_params):
yield {
"type": "snowflake",
"threads": 4,
"account": os.getenv("SNOWFLAKE_TEST_ACCOUNT"),
"database": os.getenv("SNOWFLAKE_TEST_DATABASE"),
"warehouse": os.getenv("SNOWFLAKE_TEST_WAREHOUSE"),
"user": os.getenv("SNOWFLAKE_TEST_USER"),
"reuse_connections": self.reuse_connections,
**auth_params,
}

@pytest.fixture(scope="class")
def auth_params(self):

if self.auth_method == "user_password":
yield {"password": os.getenv("SNOWFLAKE_TEST_PASSWORD")}

elif self.auth_method == "key_pair":
"""
This connection method uses key pair auth.
Follow the instructions here to setup key pair authentication for your test user:
https://docs.snowflake.com/en/user-guide/key-pair-auth
"""
yield {
"private_key": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY"),
"private_key_passphrase": os.getenv("SNOWFLAKE_TEST_PRIVATE_KEY_PASSPHRASE"),
}

else:
raise ValueError(
f"`auth_method` must be one of `user_password` or `key_pair`, received: {self.auth_method}"
)

def test_scenario(self, project):
run_dbt(["run"])
VersusFacit marked this conversation as resolved.
Show resolved Hide resolved


class TestUserPasswordAuth(Scenario):
auth_method = "user_password"
project_size = 1_000
reuse_connections = False


class TestUserPasswordAuthReuseConnections(Scenario):
auth_method = "user_password"
project_size = 1_000
reuse_connections = True


class TestKeyPairAuth(Scenario):
auth_method = "key_pair"
project_size = 1_000
reuse_connections = False


class TestKeyPairAuthReuseConnections(Scenario):
auth_method = "key_pair"
project_size = 1_000
reuse_connections = True
32 changes: 19 additions & 13 deletions tests/unit/test_snowflake_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,13 +290,19 @@ def test_client_session_keep_alive_false_by_default(self):
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
),
]
)

def test_client_session_keep_alive_true(self):
self.config.credentials = self.config.credentials.replace(client_session_keep_alive=True)
self.config.credentials = self.config.credentials.replace(
client_session_keep_alive=True,
# this gets defaulted via `__post_init__` when `client_session_keep_alive` comes in as `False`
# then when `replace` is called, `__post_init__` cannot set it back to `None` since it cannot
# tell the difference between set by user and set by `__post_init__`
reuse_connections=None,
)
self.adapter = SnowflakeAdapter(self.config, get_context("spawn"))
conn = self.adapter.connections.set_connection_name(name="new_connection_with_new_config")

Expand Down Expand Up @@ -339,7 +345,7 @@ def test_client_has_query_tag(self):
role=None,
schema="public",
user="test_user",
reuse_connections=None,
reuse_connections=True,
warehouse="test_warehouse",
private_key=None,
application="dbt",
Expand Down Expand Up @@ -379,7 +385,7 @@ def test_user_pass_authentication(self):
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -413,7 +419,7 @@ def test_authenticator_user_pass_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -443,7 +449,7 @@ def test_authenticator_externalbrowser_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -477,7 +483,7 @@ def test_authenticator_oauth_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -511,7 +517,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key):
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -545,7 +551,7 @@ def test_authenticator_private_key_authentication_no_passphrase(self, mock_get_p
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -577,7 +583,7 @@ def test_authenticator_jwt_authentication(self):
client_store_temporary_credential=True,
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -607,7 +613,7 @@ def test_query_tag(self):
application="dbt",
insecure_mode=False,
session_parameters={"QUERY_TAG": "test_query_tag"},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -670,7 +676,7 @@ def test_authenticator_private_key_string_authentication(self, mock_get_private_
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down Expand Up @@ -706,7 +712,7 @@ def test_authenticator_private_key_string_authentication_no_passphrase(
application="dbt",
insecure_mode=False,
session_parameters={},
reuse_connections=None,
reuse_connections=True,
)
]
)
Expand Down
Loading