diff --git a/.github/workflows/airbyte-ci-release.yml b/.github/workflows/airbyte-ci-release.yml index 1d42e96e7d3b..aad8f7629559 100644 --- a/.github/workflows/airbyte-ci-release.yml +++ b/.github/workflows/airbyte-ci-release.yml @@ -6,8 +6,6 @@ concurrency: on: push: - branches: - - master paths: - "airbyte-ci/connectors/pipelines/**" workflow_dispatch: diff --git a/.github/workflows/airbyte-ci-tests.yml b/.github/workflows/airbyte-ci-tests.yml index 6069ea917a70..7089532b095d 100644 --- a/.github/workflows/airbyte-ci-tests.yml +++ b/.github/workflows/airbyte-ci-tests.yml @@ -13,8 +13,9 @@ on: - synchronize jobs: run-airbyte-ci-tests: + # Note if you are changing this name you must also change it in the approve-and-merge-dispatch.yml workflow name: Run Airbyte CI tests - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-test-large-dagger-0-6-4" steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml new file mode 100644 index 000000000000..234a038558e0 --- /dev/null +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -0,0 +1,50 @@ +name: Approve and Merge Demo Command Dispatch + +# Note: We have a two stage dispatch so that we can wait for the formatters to run before approving and merging. +on: + repository_dispatch: + types: [approve-and-merge-demo-command] + +jobs: + checkFormat: + runs-on: ubuntu-latest + steps: + - name: Wait for formatters to succeed + id: wait-for-formatters + uses: lewagon/wait-on-check-action@v1.3.1 + with: + ref: ${{ github.event.client_payload.pull_request.head.ref }} + check-name: "Apply All Formatting Rules" + repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + wait-interval: 30 + - name: Comment if formatters failed + if: failure() + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + body: | + > Error: Formatters failed. Ensure formatting is passing before using approve-and-merge. + + approveAndMergeDispatch: + runs-on: ubuntu-latest + needs: [checkFormat] + steps: + - name: Auto Approve Slash Command Dispatch + uses: peter-evans/slash-command-dispatch@v3 + id: scd + with: + token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + permission: write + issue-type: pull-request + repository: airbytehq/airbyte-cloud + dispatch-type: repository + commands: | + approve-and-merge + + - name: Edit comment with error message + if: steps.scd.outputs.error-message + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + body: | + > Error: ${{ steps.scd.outputs.error-message }} diff --git a/.github/workflows/cat-tests.yml b/.github/workflows/cat-tests.yml index 97de1c1fda89..e341407dd8f1 100644 --- a/.github/workflows/cat-tests.yml +++ b/.github/workflows/cat-tests.yml @@ -16,7 +16,7 @@ on: jobs: run-cat-unit-tests: name: Run CAT unit tests - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-test-large-dagger-0-6-4" steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index c7f7eb7dddf5..6b9d5d6ce5fa 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -8,19 +8,19 @@ on: inputs: runs-on: type: string - default: conn-nightly-xlarge-runner + default: ci-runner-connector-nightly-xlarge-dagger-0-6-4 required: true test-connectors-options: default: --concurrency=5 --support-level=certified required: true -run-name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" +run-name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" jobs: test_connectors: - name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" + name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" timeout-minutes: 720 # 12 hours - runs-on: ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/connectors_tests.yml b/.github/workflows/connectors_tests.yml index 610e4fc94ad1..c371735c35bb 100644 --- a/.github/workflows/connectors_tests.yml +++ b/.github/workflows/connectors_tests.yml @@ -19,7 +19,7 @@ on: default: "--modified" runner: description: "The runner to use for this job" - default: "conn-prod-xlarge-runner" + default: "ci-runner-connector-test-large-dagger-0-6-4" pull_request: types: - opened @@ -29,7 +29,7 @@ jobs: connectors_ci: name: Connectors CI timeout-minutes: 1440 # 24 hours - runs-on: ${{ inputs.runner || 'conn-prod-xlarge-runner'}} + runs-on: ${{ inputs.runner || 'ci-runner-connector-test-large-dagger-0-6-4'}} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/connectors_weekly_build.yml b/.github/workflows/connectors_weekly_build.yml index ccf1f0b52199..aa96a832b9b8 100644 --- a/.github/workflows/connectors_weekly_build.yml +++ b/.github/workflows/connectors_weekly_build.yml @@ -8,19 +8,19 @@ on: inputs: runs-on: type: string - default: conn-nightly-xlarge-runner + default: ci-runner-connector-nightly-xlarge-dagger-0-6-4 required: true test-connectors-options: default: --concurrency=3 --support-level=community required: true -run-name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" +run-name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" jobs: test_connectors: - name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" + name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" timeout-minutes: 8640 # 6 days - runs-on: ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/format_check.yml b/.github/workflows/format_check.yml index 290eb5662fd9..97530ad424e7 100644 --- a/.github/workflows/format_check.yml +++ b/.github/workflows/format_check.yml @@ -10,7 +10,7 @@ on: - master jobs: format-check: - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-format-medium-dagger-0-6-4" name: "Check for formatting errors on ${{ github.head_ref }}" timeout-minutes: 40 steps: diff --git a/.github/workflows/format_fix.yml b/.github/workflows/format_fix.yml index b1b2c31562aa..58761fef432e 100644 --- a/.github/workflows/format_fix.yml +++ b/.github/workflows/format_fix.yml @@ -10,7 +10,8 @@ on: pull_request: jobs: format-fix: - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-format-medium-dagger-0-6-4" + # Note if you are changing this name you must also change it in the approve-and-merge-dispatch.yml workflow name: "Apply All Formatting Rules" timeout-minutes: 40 steps: diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 13fa98a6acaa..695ccde96fa1 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -54,6 +54,7 @@ jobs: # In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest. needs: start-check-runner # required to start the main job when the runner is ready runs-on: ${{ needs.start-check-runner.outputs.label }} # run the job on the newly created runner + # Note if you are changing this name you must also change it in the approve-and-merge-dispatch.yml workflow name: Gradle Check timeout-minutes: 30 steps: diff --git a/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml b/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml index 0da841726893..9550a5b1635c 100644 --- a/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml +++ b/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml @@ -10,7 +10,7 @@ on: jobs: connector_metadata_service_deploy_orchestrator: name: Connector metadata service deploy orchestrator - runs-on: medium-runner + runs-on: ci-runner-connector-test-large-dagger-0-6-4 steps: - name: Checkout Airbyte uses: actions/checkout@v2 diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index 5fdc8dfcde60..ded1f9fb11ae 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -16,12 +16,12 @@ on: default: "--pre-release" runs-on: type: string - default: conn-prod-xlarge-runner + default: ci-runner-connector-publish-large-dagger-0-6-4 required: true jobs: publish_connectors: name: Publish connectors - runs-on: ${{ inputs.runs-on || 'conn-prod-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-publish-large-dagger-0-6-4' }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index 477db6af6d6a..05deefcb7845 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -5,12 +5,12 @@ on: inputs: runs-on: type: string - default: conn-prod-xlarge-runner + default: ci-runner-connector-publish-large-dagger-0-6-4 required: true jobs: no-op: name: No-op - runs-on: ${{ inputs.runs-on || 'conn-prod-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-publish-large-dagger-0-6-4' }} steps: - run: echo 'hi!' diff --git a/.github/workflows/slash-commands.yml b/.github/workflows/slash-commands.yml index 22029f06baba..8df9fb7d3342 100644 --- a/.github/workflows/slash-commands.yml +++ b/.github/workflows/slash-commands.yml @@ -15,9 +15,9 @@ jobs: echo ref="$(echo $pr_info | jq -r '.head.ref')" >> $GITHUB_OUTPUT echo repo="$(echo $pr_info | jq -r '.head.repo.full_name')" >> $GITHUB_OUTPUT - - name: Slash Command Dispatch + - name: Slash Command Dispatch (Workflow) id: scd - uses: peter-evans/slash-command-dispatch@v2 + uses: peter-evans/slash-command-dispatch@v3 with: token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} permission: write @@ -35,6 +35,16 @@ jobs: comment-id=${{ github.event.comment.id }} dispatch-type: workflow + - name: Slash Command Dispatch (Repository) + id: scdr + uses: peter-evans/slash-command-dispatch@v3 + with: + token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} + permission: write + commands: | + approve-and-merge-demo + dispatch-type: repository + - name: Edit comment with error message if: steps.scd.outputs.error-message uses: peter-evans/create-or-update-comment@v1 diff --git a/LICENSE b/LICENSE index 814fd88f57f3..0df58b4829be 100644 --- a/LICENSE +++ b/LICENSE @@ -1,14 +1,17 @@ Airbyte monorepo uses multiple licenses. The license for a particular work is defined with following prioritized rules: + 1. License directly present in the file 2. LICENSE file in the same directory as the work -3. First LICENSE found when exploring parent directories up to the project top level directory -4. Defaults to Elastic License 2.0 +3. A `license` property defined in the `metadata.yaml` configuration file found when exploring parent directories (most connectors) +4. First LICENSE found when exploring parent directories up to the project top level directory +5. Defaults to Elastic License 2.0 If you have any question regarding licenses, just visit our [FAQ](https://airbyte.io/license-faq) or [contact us](mailto:license@airbyte.io). ------------------------------------------------------------------------------------- +--- + MIT License Copyright (c) 2020 Airbyte, Inc. @@ -31,7 +34,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------------- +--- + Elastic License 2.0 (ELv2) **Acceptance** @@ -65,16 +69,16 @@ If you use the software in violation of these terms, such use is not licensed, a As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim. **Definitions** -The *licensor* is the entity offering these terms, and the *software* is the software the licensor makes available under these terms, including any portion of it. +The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it. -*you* refers to the individual or entity agreeing to these terms. +_you_ refers to the individual or entity agreeing to these terms. -*your company* is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. *control* means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. +_your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. -*your licenses* are all the licenses granted to you for the software under these terms. +_your licenses_ are all the licenses granted to you for the software under these terms. -*use* means anything you do with the software requiring one of your licenses. +_use_ means anything you do with the software requiring one of your licenses. -*trademark* means trademarks, service marks, and similar rights. +_trademark_ means trademarks, service marks, and similar rights. ------------------------------------------------------------------------------------- +--- diff --git a/airbyte-cdk/java/airbyte-cdk/README.md b/airbyte-cdk/java/airbyte-cdk/README.md index 26b1f6dc8378..f51f376715bf 100644 --- a/airbyte-cdk/java/airbyte-cdk/README.md +++ b/airbyte-cdk/java/airbyte-cdk/README.md @@ -156,6 +156,7 @@ MavenLocal debugging steps: | Version | Date | Pull Request | Subject | | :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 0.5.0 | 2023-11-22 | [\#32656](https://github.com/airbytehq/airbyte/pull/32656) | Introduce TestDatabase test fixture, refactor database source test base classes. | | 0.4.11 | 2023-11-14 | [\#32526](https://github.com/airbytehq/airbyte/pull/32526) | Clean up memory manager logs. | | 0.4.10 | 2023-11-13 | [\#32285](https://github.com/airbytehq/airbyte/pull/32285) | Fix UUID codec ordering for MongoDB connector | | 0.4.9 | 2023-11-13 | [\#32468](https://github.com/airbytehq/airbyte/pull/32468) | Further error grouping improvements for DV2 connectors | diff --git a/airbyte-cdk/java/airbyte-cdk/core/build.gradle b/airbyte-cdk/java/airbyte-cdk/core/build.gradle index 5dccda8a8d05..38c9c3e24b29 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/build.gradle +++ b/airbyte-cdk/java/airbyte-cdk/core/build.gradle @@ -77,6 +77,7 @@ dependencies { testImplementation libs.testcontainers.jdbc testImplementation libs.testcontainers.mysql testImplementation libs.testcontainers.postgresql + testImplementation libs.testcontainers.mssqlserver implementation 'org.codehaus.plexus:plexus-utils:3.4.2' // bouncycastle is pinned to version-match the transitive dependency from kubernetes client-java diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java deleted file mode 100644 index 0ae1829e93aa..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.db; - -import com.google.common.annotations.VisibleForTesting; -import java.io.IOException; -import org.testcontainers.containers.MySQLContainer; - -public class MySqlUtils { - - @VisibleForTesting - public static Certificate getCertificate(final MySQLContainer container, - final boolean useAllCertificates) - throws IOException, InterruptedException { - // add root and server certificates to config file - container.execInContainer("sh", "-c", "sed -i '31 a ssl' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '32 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '33 a ssl-cert=/var/lib/mysql/server-cert.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '34 a ssl-key=/var/lib/mysql/server-key.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '35 a require_secure_transport=ON' /etc/my.cnf"); - // add client certificates to config file - if (useAllCertificates) { - container.execInContainer("sh", "-c", "sed -i '39 a [client]' /etc/mysql/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '40 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '41 a ssl-cert=/var/lib/mysql/client-cert.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '42 a ssl-key=/var/lib/mysql/client-key.pem' /etc/my.cnf"); - } - // copy root certificate and client certificates - var caCert = container.execInContainer("sh", "-c", "cat /var/lib/mysql/ca.pem").getStdout().trim(); - - if (useAllCertificates) { - var clientKey = container.execInContainer("sh", "-c", "cat /var/lib/mysql/client-key.pem").getStdout().trim(); - var clientCert = container.execInContainer("sh", "-c", "cat /var/lib/mysql/client-cert.pem").getStdout().trim(); - return new Certificate(caCert, clientCert, clientKey); - } else { - return new Certificate(caCert); - } - } - - public static class Certificate { - - private final String caCertificate; - private final String clientCertificate; - private final String clientKey; - - public Certificate(final String caCertificate) { - this.caCertificate = caCertificate; - this.clientCertificate = null; - this.clientKey = null; - } - - public Certificate(final String caCertificate, final String clientCertificate, final String clientKey) { - this.caCertificate = caCertificate; - this.clientCertificate = clientCertificate; - this.clientKey = clientKey; - } - - public String getCaCertificate() { - return caCertificate; - } - - public String getClientCertificate() { - return clientCertificate; - } - - public String getClientKey() { - return clientKey; - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java index 8781369cb77d..0b16eb5fed00 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java @@ -5,14 +5,11 @@ package io.airbyte.cdk.db; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import java.io.IOException; import java.sql.SQLException; import java.util.List; -import org.testcontainers.containers.PostgreSQLContainer; public class PostgresUtils { @@ -26,74 +23,4 @@ public static PgLsn getLsn(final JdbcDatabase database) throws SQLException { return PgLsn.fromPgString(jsonNodes.get(0).get("pg_current_wal_lsn").asText()); } - @VisibleForTesting - public static Certificate getCertificate(final PostgreSQLContainer container) throws IOException, InterruptedException { - container.execInContainer("su", "-c", "psql -U test -c \"CREATE USER postgres WITH PASSWORD 'postgres';\""); - container.execInContainer("su", "-c", "psql -U test -c \"GRANT CONNECT ON DATABASE \"test\" TO postgres;\""); - container.execInContainer("su", "-c", "psql -U test -c \"ALTER USER postgres WITH SUPERUSER;\""); - - container.execInContainer("su", "-c", "openssl ecparam -name prime256v1 -genkey -noout -out ca.key"); - container.execInContainer("su", "-c", "openssl req -new -x509 -sha256 -key ca.key -out ca.crt -subj \"/CN=127.0.0.1\""); - container.execInContainer("su", "-c", "openssl ecparam -name prime256v1 -genkey -noout -out server.key"); - container.execInContainer("su", "-c", "openssl req -new -sha256 -key server.key -out server.csr -subj \"/CN=localhost\""); - container.execInContainer("su", "-c", - "openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256"); - container.execInContainer("su", "-c", "cp server.key /etc/ssl/private/"); - container.execInContainer("su", "-c", "cp server.crt /etc/ssl/private/"); - container.execInContainer("su", "-c", "cp ca.crt /etc/ssl/private/"); - container.execInContainer("su", "-c", "chmod og-rwx /etc/ssl/private/server.* /etc/ssl/private/ca.*"); - container.execInContainer("su", "-c", "chown postgres:postgres /etc/ssl/private/server.crt /etc/ssl/private/server.key /etc/ssl/private/ca.crt"); - container.execInContainer("su", "-c", "echo \"ssl = on\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "echo \"ssl_cert_file = '/etc/ssl/private/server.crt'\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "echo \"ssl_key_file = '/etc/ssl/private/server.key'\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "echo \"ssl_ca_file = '/etc/ssl/private/ca.crt'\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "mkdir root/.postgresql"); - container.execInContainer("su", "-c", - "echo \"hostssl all all 127.0.0.1/32 cert clientcert=verify-full\" >> /var/lib/postgresql/data/pg_hba.conf"); - - final var caCert = container.execInContainer("su", "-c", "cat ca.crt").getStdout().trim(); - - container.execInContainer("su", "-c", "openssl ecparam -name prime256v1 -genkey -noout -out client.key"); - container.execInContainer("su", "-c", "openssl req -new -sha256 -key client.key -out client.csr -subj \"/CN=postgres\""); - container.execInContainer("su", "-c", - "openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256"); - container.execInContainer("su", "-c", "cp client.crt ~/.postgresql/postgresql.crt"); - container.execInContainer("su", "-c", "cp client.key ~/.postgresql/postgresql.key"); - container.execInContainer("su", "-c", "chmod 0600 ~/.postgresql/postgresql.crt ~/.postgresql/postgresql.key"); - container.execInContainer("su", "-c", "cp ca.crt root/.postgresql/ca.crt"); - container.execInContainer("su", "-c", "chown postgres:postgres ~/.postgresql/ca.crt"); - - container.execInContainer("su", "-c", "psql -U test -c \"SELECT pg_reload_conf();\""); - - final var clientKey = container.execInContainer("su", "-c", "cat client.key").getStdout().trim(); - final var clientCert = container.execInContainer("su", "-c", "cat client.crt").getStdout().trim(); - return new Certificate(caCert, clientCert, clientKey); - } - - public static class Certificate { - - private final String caCertificate; - private final String clientCertificate; - private final String clientKey; - - public Certificate(final String caCertificate, final String clientCertificate, final String clientKey) { - this.caCertificate = caCertificate; - this.clientCertificate = clientCertificate; - this.clientKey = clientKey; - } - - public String getCaCertificate() { - return caCertificate; - } - - public String getClientCertificate() { - return clientCertificate; - } - - public String getClientKey() { - return clientKey; - } - - } - } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java index 38837ac5ef35..c03b6fb7a89b 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java @@ -11,8 +11,10 @@ import com.zaxxer.hikari.HikariDataSource; import java.io.Closeable; import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.time.temporal.TemporalUnit; import java.util.Map; -import java.util.Objects; +import java.util.Optional; import javax.sql.DataSource; /** @@ -188,10 +190,10 @@ private static class DataSourceBuilder { private DataSourceBuilder() {} /** - * Retrieves connectionTimeout value from connection properties in seconds, default minimum timeout + * Retrieves connectionTimeout value from connection properties in millis, default minimum timeout * is 60 seconds since Hikari default of 30 seconds is not enough for acceptance tests. In the case * the value is 0, pass the value along as Hikari and Postgres use default max value for 0 timeout - * value + * value. * * NOTE: HikariCP uses milliseconds for all time values: * https://github.com/brettwooldridge/HikariCP#gear-configuration-knobs-baby whereas Postgres is @@ -203,27 +205,32 @@ private DataSourceBuilder() {} * @return DataSourceBuilder class used to create dynamic fields for DataSource */ private static long getConnectionTimeoutMs(final Map connectionProperties, String driverClassName) { - // TODO: the usage of CONNECT_TIMEOUT is Postgres specific, may need to extend for other databases - if (driverClassName.equals(DatabaseDriver.POSTGRESQL.getDriverClassName())) { - final String pgPropertyConnectTimeout = CONNECT_TIMEOUT.getName(); - // If the PGProperty.CONNECT_TIMEOUT was set by the user, then take its value, if not take the - // default - if (connectionProperties.containsKey(pgPropertyConnectTimeout) - && (Long.parseLong(connectionProperties.get(pgPropertyConnectTimeout)) >= 0)) { - return Duration.ofSeconds(Long.parseLong(connectionProperties.get(pgPropertyConnectTimeout))).toMillis(); - } else { - return Duration.ofSeconds(Long.parseLong(Objects.requireNonNull(CONNECT_TIMEOUT.getDefaultValue()))).toMillis(); - } + final Optional parsedConnectionTimeout = switch (DatabaseDriver.findByDriverClassName(driverClassName)) { + case POSTGRESQL -> maybeParseDuration(connectionProperties.get(CONNECT_TIMEOUT.getName()), ChronoUnit.SECONDS) + .or(() -> maybeParseDuration(CONNECT_TIMEOUT.getDefaultValue(), ChronoUnit.SECONDS)); + case MYSQL -> maybeParseDuration(connectionProperties.get("connectTimeout"), ChronoUnit.MILLIS); + case MSSQLSERVER -> maybeParseDuration(connectionProperties.get("loginTimeout"), ChronoUnit.SECONDS); + default -> maybeParseDuration(connectionProperties.get(CONNECT_TIMEOUT_KEY), ChronoUnit.SECONDS) + // Enforce minimum timeout duration for unspecified data sources. + .filter(d -> d.compareTo(CONNECT_TIMEOUT_DEFAULT) >= 0); + }; + return parsedConnectionTimeout.orElse(CONNECT_TIMEOUT_DEFAULT).toMillis(); + } + + private static Optional maybeParseDuration(final String stringValue, TemporalUnit unit) { + if (stringValue == null) { + return Optional.empty(); + } + final long number; + try { + number = Long.parseLong(stringValue); + } catch (NumberFormatException __) { + return Optional.empty(); } - final Duration connectionTimeout; - connectionTimeout = - connectionProperties.containsKey(CONNECT_TIMEOUT_KEY) ? Duration.ofSeconds(Long.parseLong(connectionProperties.get(CONNECT_TIMEOUT_KEY))) - : CONNECT_TIMEOUT_DEFAULT; - if (connectionTimeout.getSeconds() == 0) { - return connectionTimeout.toMillis(); - } else { - return (connectionTimeout.compareTo(CONNECT_TIMEOUT_DEFAULT) > 0 ? connectionTimeout : CONNECT_TIMEOUT_DEFAULT).toMillis(); + if (number < 0) { + return Optional.empty(); } + return Optional.of(Duration.of(number, unit)); } public DataSourceBuilder withConnectionProperties(final Map connectionProperties) { diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java index c04c5ccc0907..07a1786f60dd 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java @@ -21,7 +21,7 @@ import org.testcontainers.containers.Network; import org.testcontainers.images.builder.ImageFromDockerfile; -public class SshBastionContainer { +public class SshBastionContainer implements AutoCloseable { private static final String SSH_USER = "sshuser"; private static final String SSH_PASSWORD = "secret"; @@ -36,21 +36,27 @@ public void initAndStartBastion(final Network network) { bastion.start(); } + public JsonNode getTunnelMethod(final SshTunnel.TunnelMethod tunnelMethod, + final boolean innerAddress) + throws IOException, InterruptedException { + final var containerAddress = innerAddress ? getInnerContainerAddress(bastion) : getOuterContainerAddress(bastion); + return Jsons.jsonNode(ImmutableMap.builder() + .put("tunnel_host", + Objects.requireNonNull(containerAddress.left)) + .put("tunnel_method", tunnelMethod) + .put("tunnel_port", containerAddress.right) + .put("tunnel_user", SSH_USER) + .put("tunnel_user_password", tunnelMethod.equals(SSH_PASSWORD_AUTH) ? SSH_PASSWORD : "") + .put("ssh_key", tunnelMethod.equals(SSH_KEY_AUTH) ? bastion.execInContainer("cat", "var/bastion/id_rsa").getStdout() : "") + .build()); + } + public JsonNode getTunnelConfig(final SshTunnel.TunnelMethod tunnelMethod, final ImmutableMap.Builder builderWithSchema, final boolean innerAddress) throws IOException, InterruptedException { - final var containerAddress = innerAddress ? getInnerContainerAddress(bastion) : getOuterContainerAddress(bastion); return Jsons.jsonNode(builderWithSchema - .put("tunnel_method", Jsons.jsonNode(ImmutableMap.builder() - .put("tunnel_host", - Objects.requireNonNull(containerAddress.left)) - .put("tunnel_method", tunnelMethod) - .put("tunnel_port", containerAddress.right) - .put("tunnel_user", SSH_USER) - .put("tunnel_user_password", tunnelMethod.equals(SSH_PASSWORD_AUTH) ? SSH_PASSWORD : "") - .put("ssh_key", tunnelMethod.equals(SSH_KEY_AUTH) ? bastion.execInContainer("cat", "var/bastion/id_rsa").getStdout() : "") - .build())) + .put("tunnel_method", getTunnelMethod(tunnelMethod, innerAddress)) .build()); } @@ -83,6 +89,11 @@ public void stopAndClose() { bastion.close(); } + @Override + public void close() { + stopAndClose(); + } + public GenericContainer getContainer() { return bastion; } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index 6c39f216d22b..c720ecde1c21 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.4.11 +version=0.5.0 \ No newline at end of file diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java b/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java index c53d9624b66b..a8af1eb4abee 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java @@ -17,6 +17,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.testcontainers.containers.MSSQLServerContainer; import org.testcontainers.containers.MySQLContainer; /** @@ -80,7 +81,7 @@ void testCreatingMySQLDataSourceWithConnectionTimeoutSetBelowDefault() { try (MySQLContainer mySQLContainer = new MySQLContainer<>("mysql:8.0")) { mySQLContainer.start(); final Map connectionProperties = Map.of( - CONNECT_TIMEOUT, "30"); + CONNECT_TIMEOUT, "5000"); final DataSource dataSource = DataSourceFactory.create( mySQLContainer.getUsername(), mySQLContainer.getPassword(), @@ -89,7 +90,23 @@ void testCreatingMySQLDataSourceWithConnectionTimeoutSetBelowDefault() { connectionProperties); assertNotNull(dataSource); assertEquals(HikariDataSource.class, dataSource.getClass()); - assertEquals(60000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); + assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); + } + } + + @Test + void testCreatingMsSQLServerDataSourceWithConnectionTimeoutSetBelowDefault() { + try (var mssqlServerContainer = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense()) { + mssqlServerContainer.start(); + final DataSource dataSource = DataSourceFactory.create( + mssqlServerContainer.getUsername(), + mssqlServerContainer.getPassword(), + mssqlServerContainer.getDriverClassName(), + mssqlServerContainer.getJdbcUrl(), + Map.of("loginTimeout", "5")); + assertNotNull(dataSource); + assertEquals(HikariDataSource.class, dataSource.getClass()); + assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java index e292f6629e2a..49f7afa14a6b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java @@ -32,7 +32,6 @@ import io.debezium.engine.DebeziumEngine; import java.time.Duration; import java.time.Instant; -import java.util.Collections; import java.util.Optional; import java.util.OptionalInt; import java.util.Properties; @@ -57,18 +56,20 @@ public class AirbyteDebeziumHandler { private final JsonNode config; private final CdcTargetPosition targetPosition; private final boolean trackSchemaHistory; - private final Duration firstRecordWaitTime; + private final Duration firstRecordWaitTime, subsequentRecordWaitTime; private final OptionalInt queueSize; public AirbyteDebeziumHandler(final JsonNode config, final CdcTargetPosition targetPosition, final boolean trackSchemaHistory, final Duration firstRecordWaitTime, + final Duration subsequentRecordWaitTime, final OptionalInt queueSize) { this.config = config; this.targetPosition = targetPosition; this.trackSchemaHistory = trackSchemaHistory; this.firstRecordWaitTime = firstRecordWaitTime; + this.subsequentRecordWaitTime = subsequentRecordWaitTime; this.queueSize = queueSize; } @@ -97,7 +98,8 @@ public AutoCloseableIterator getSnapshotIterators( targetPosition, tableSnapshotPublisher::hasClosed, new DebeziumShutdownProcedure<>(queue, tableSnapshotPublisher::close, tableSnapshotPublisher::hasClosed), - firstRecordWaitTime); + firstRecordWaitTime, + subsequentRecordWaitTime); return AutoCloseableIterators.concatWithEagerClose(AutoCloseableIterators .transform( @@ -108,10 +110,6 @@ public AutoCloseableIterator getSnapshotIterators( .fromIterator(MoreIterators.singletonIteratorFromSupplier(cdcStateHandler::saveStateAfterCompletionOfSnapshotOfNewStreams))); } - /** - * In the default case here, we don't know for sure whether the Debezium Engine will produce records - * or not. We therefore pass {@link canShortCircuitDebeziumEngine} = false. - */ public AutoCloseableIterator getIncrementalIterators(final ConfiguredAirbyteCatalog catalog, final CdcSavedInfoFetcher cdcSavedInfoFetcher, final CdcStateHandler cdcStateHandler, @@ -120,32 +118,6 @@ public AutoCloseableIterator getIncrementalIterators(final Confi final DebeziumPropertiesManager.DebeziumConnectorType debeziumConnectorType, final Instant emittedAt, final boolean addDbNameToState) { - return getIncrementalIterators( - catalog, - cdcSavedInfoFetcher, - cdcStateHandler, - cdcMetadataInjector, - connectorProperties, - debeziumConnectorType, - emittedAt, addDbNameToState, - false); - } - - /** - * - * @param canShortCircuitDebeziumEngine This argument may be set to true in cases where we already - * know that the Debezium Engine is not going to be producing any change events. In this - * case, this method skips provisioning a Debezium Engine altogether. - */ - public AutoCloseableIterator getIncrementalIterators(final ConfiguredAirbyteCatalog catalog, - final CdcSavedInfoFetcher cdcSavedInfoFetcher, - final CdcStateHandler cdcStateHandler, - final CdcMetadataInjector cdcMetadataInjector, - final Properties connectorProperties, - final DebeziumPropertiesManager.DebeziumConnectorType debeziumConnectorType, - final Instant emittedAt, - final boolean addDbNameToState, - final boolean canShortCircuitDebeziumEngine) { LOGGER.info("Using CDC: {}", true); LOGGER.info("Using DBZ version: {}", DebeziumEngine.class.getPackage().getImplementationVersion()); final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState( @@ -157,23 +129,18 @@ public AutoCloseableIterator getIncrementalIterators(final Confi cdcStateHandler.compressSchemaHistoryForState()) : Optional.empty(); - final AutoCloseableIterator eventIterator; - if (!canShortCircuitDebeziumEngine) { - final var publisher = new DebeziumRecordPublisher( - connectorProperties, config, catalog, offsetManager, schemaHistoryManager, debeziumConnectorType); - final var queue = new LinkedBlockingQueue>(queueSize.orElse(QUEUE_CAPACITY)); - publisher.start(queue); - // handle state machine around pub/sub logic. - eventIterator = new DebeziumRecordIterator<>( - queue, - targetPosition, - publisher::hasClosed, - new DebeziumShutdownProcedure<>(queue, publisher::close, publisher::hasClosed), - firstRecordWaitTime); - } else { - LOGGER.info("Short-circuiting Debezium Engine: nothing of interest in target replication stream interval."); - eventIterator = AutoCloseableIterators.fromIterator(Collections.emptyIterator()); - } + final var publisher = new DebeziumRecordPublisher( + connectorProperties, config, catalog, offsetManager, schemaHistoryManager, debeziumConnectorType); + final var queue = new LinkedBlockingQueue>(queueSize.orElse(QUEUE_CAPACITY)); + publisher.start(queue); + // handle state machine around pub/sub logic. + final AutoCloseableIterator eventIterator = new DebeziumRecordIterator<>( + queue, + targetPosition, + publisher::hasClosed, + new DebeziumShutdownProcedure<>(queue, publisher::close, publisher::hasClosed), + firstRecordWaitTime, + subsequentRecordWaitTime); final Duration syncCheckpointDuration = config.get(SYNC_CHECKPOINT_DURATION_PROPERTY) != null ? Duration.ofSeconds(config.get(SYNC_CHECKPOINT_DURATION_PROPERTY).asLong()) diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java index 6255acefaa80..a599e0086ff3 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java @@ -39,13 +39,11 @@ public class DebeziumRecordIterator extends AbstractIterator, Field> heartbeatEventSourceField; private final LinkedBlockingQueue> queue; private final CdcTargetPosition targetPosition; private final Supplier publisherStatusSupplier; - private final Duration firstRecordWaitTime; + private final Duration firstRecordWaitTime, subsequentRecordWaitTime; private final DebeziumShutdownProcedure> debeziumShutdownProcedure; private boolean receivedFirstRecord; @@ -59,12 +57,14 @@ public DebeziumRecordIterator(final LinkedBlockingQueue targetPosition, final Supplier publisherStatusSupplier, final DebeziumShutdownProcedure> debeziumShutdownProcedure, - final Duration firstRecordWaitTime) { + final Duration firstRecordWaitTime, + final Duration subsequentRecordWaitTime) { this.queue = queue; this.targetPosition = targetPosition; this.publisherStatusSupplier = publisherStatusSupplier; this.debeziumShutdownProcedure = debeziumShutdownProcedure; this.firstRecordWaitTime = firstRecordWaitTime; + this.subsequentRecordWaitTime = subsequentRecordWaitTime; this.heartbeatEventSourceField = new HashMap<>(1); this.receivedFirstRecord = false; @@ -90,7 +90,7 @@ protected ChangeEventWithMetadata computeNext() { while (!MoreBooleans.isTruthy(publisherStatusSupplier.get()) || !queue.isEmpty()) { final ChangeEvent next; - final Duration waitTime = receivedFirstRecord ? SUBSEQUENT_RECORD_WAIT_TIME : this.firstRecordWaitTime; + final Duration waitTime = receivedFirstRecord ? this.subsequentRecordWaitTime : this.firstRecordWaitTime; try { next = queue.poll(waitTime.getSeconds(), TimeUnit.SECONDS); } catch (final InterruptedException e) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtil.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtil.java similarity index 77% rename from airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtil.java rename to airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtil.java index 74c426f35029..4bcec783a70b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtil.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtil.java @@ -10,13 +10,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class FirstRecordWaitTimeUtil { +public class RecordWaitTimeUtil { - private static final Logger LOGGER = LoggerFactory.getLogger(FirstRecordWaitTimeUtil.class); + private static final Logger LOGGER = LoggerFactory.getLogger(RecordWaitTimeUtil.class); public static final Duration MIN_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(2); public static final Duration MAX_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(20); public static final Duration DEFAULT_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(5); + public static final Duration DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME = Duration.ofMinutes(1); public static void checkFirstRecordWaitTime(final JsonNode config) { // we need to skip the check because in tests, we set initial_waiting_seconds @@ -59,6 +60,18 @@ public static Duration getFirstRecordWaitTime(final JsonNode config) { return firstRecordWaitTime; } + public static Duration getSubsequentRecordWaitTime(final JsonNode config) { + Duration subsequentRecordWaitTime = DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME; + final boolean isTest = config.has("is_test") && config.get("is_test").asBoolean(); + final Optional firstRecordWaitSeconds = getFirstRecordWaitSeconds(config); + if (isTest && firstRecordWaitSeconds.isPresent()) { + // In tests, reuse the initial_waiting_seconds property to speed things up. + subsequentRecordWaitTime = Duration.ofSeconds(firstRecordWaitSeconds.get()); + } + LOGGER.info("Subsequent record waiting time: {} seconds", subsequentRecordWaitTime.getSeconds()); + return subsequentRecordWaitTime; + } + public static Optional getFirstRecordWaitSeconds(final JsonNode config) { final JsonNode replicationMethod = config.get("replication_method"); if (replicationMethod != null && replicationMethod.has("initial_waiting_seconds")) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java index 938fd11e903e..174c03893fa2 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java @@ -124,48 +124,6 @@ public void commitLSNToPostgresDatabase(final JsonNode jdbcConfig, } } - public boolean maybeReplicationStreamIntervalHasRecords(final JsonNode jdbcConfig, - final String slotName, - final String publicationName, - final String plugin, - final long startOffset, - final long endOffset) { - try (final BaseConnection pgConnection = (BaseConnection) PostgresReplicationConnection.createConnection(jdbcConfig)) { - ChainedLogicalStreamBuilder streamBuilder = pgConnection - .getReplicationAPI() - .replicationStream() - .logical() - .withSlotName("\"" + slotName + "\"") - .withStartPosition(LogSequenceNumber.valueOf(startOffset)); - streamBuilder = addSlotOption(publicationName, plugin, pgConnection, streamBuilder); - - try (final PGReplicationStream stream = streamBuilder.start()) { - LogSequenceNumber current = stream.getLastReceiveLSN(); - final LogSequenceNumber end = LogSequenceNumber.valueOf(endOffset); - // Attempt to read from the stream. - // This will advance the stream past any bookkeeping entries, until: - // - either the end of the stream is reached, - // - or a meaningful entry is read. - // In the first case, we can update the current position and conclude that the stream contains - // nothing of - // interest to us between the starting position and the current position. - final var msg = stream.readPending(); - if (msg == null) { - current = stream.getLastReceiveLSN(); - } - if (current.compareTo(end) >= 0) { - // If we've reached or gone past the end of the interval which interests us, - // then there's nothing in it that we could possibly care about. - return false; - } - } - } catch (SQLException e) { - throw new RuntimeException(e); - } - // In all other cases, we can't draw any conclusions as to the contents of the stream interval. - return true; - } - private ChainedLogicalStreamBuilder addSlotOption(final String publicationName, final String plugin, final BaseConnection pgConnection, diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java index c1ef4f83de75..e386b100c647 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java @@ -36,6 +36,7 @@ public Long extractPositionFromHeartbeatOffset(final Map sourceOffset }, () -> false, mock(DebeziumShutdownProcedure.class), + Duration.ZERO, Duration.ZERO); final Long lsn = debeziumRecordIterator.getHeartbeatPosition(new ChangeEvent() { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java deleted file mode 100644 index 01c5d2ea47c5..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.debezium.internals; - -import static io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil.MAX_FIRST_RECORD_WAIT_TIME; -import static io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil.MIN_FIRST_RECORD_WAIT_TIME; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.commons.json.Jsons; -import java.time.Duration; -import java.util.Collections; -import java.util.Map; -import java.util.Optional; -import org.junit.jupiter.api.Test; - -public class FirstRecordWaitTimeUtilTest { - - @Test - void testGetFirstRecordWaitTime() { - final JsonNode emptyConfig = Jsons.jsonNode(Collections.emptyMap()); - assertDoesNotThrow(() -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(emptyConfig)); - assertEquals(Optional.empty(), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(emptyConfig)); - assertEquals(FirstRecordWaitTimeUtil.DEFAULT_FIRST_RECORD_WAIT_TIME, FirstRecordWaitTimeUtil.getFirstRecordWaitTime(emptyConfig)); - - final JsonNode normalConfig = Jsons.jsonNode(Map.of("replication_method", - Map.of("method", "CDC", "initial_waiting_seconds", 500))); - assertDoesNotThrow(() -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(normalConfig)); - assertEquals(Optional.of(500), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(normalConfig)); - assertEquals(Duration.ofSeconds(500), FirstRecordWaitTimeUtil.getFirstRecordWaitTime(normalConfig)); - - final int tooShortTimeout = (int) MIN_FIRST_RECORD_WAIT_TIME.getSeconds() - 1; - final JsonNode tooShortConfig = Jsons.jsonNode(Map.of("replication_method", - Map.of("method", "CDC", "initial_waiting_seconds", tooShortTimeout))); - assertThrows(IllegalArgumentException.class, () -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(tooShortConfig)); - assertEquals(Optional.of(tooShortTimeout), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(tooShortConfig)); - assertEquals(MIN_FIRST_RECORD_WAIT_TIME, FirstRecordWaitTimeUtil.getFirstRecordWaitTime(tooShortConfig)); - - final int tooLongTimeout = (int) MAX_FIRST_RECORD_WAIT_TIME.getSeconds() + 1; - final JsonNode tooLongConfig = Jsons.jsonNode(Map.of("replication_method", - Map.of("method", "CDC", "initial_waiting_seconds", tooLongTimeout))); - assertThrows(IllegalArgumentException.class, () -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(tooLongConfig)); - assertEquals(Optional.of(tooLongTimeout), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(tooLongConfig)); - assertEquals(MAX_FIRST_RECORD_WAIT_TIME, FirstRecordWaitTimeUtil.getFirstRecordWaitTime(tooLongConfig)); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java index d504c6dd3dfa..280d0ac2709e 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java @@ -26,7 +26,6 @@ import java.util.OptionalLong; import java.util.Properties; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -146,10 +145,9 @@ public void emptyState() { Assertions.assertTrue(savedOffsetAfterReplicationSlotLSN); } - @ParameterizedTest - @Disabled - @ValueSource(strings = {"pgoutput", "wal2json"}) - public void LsnCommitTest(final String plugin) throws SQLException { + @Test + public void LsnCommitTest() throws SQLException { + final String plugin = "pgoutput"; final DockerImageName myImage = DockerImageName.parse("debezium/postgres:13-alpine").asCompatibleSubstituteFor("postgres"); final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); final String fullReplicationSlot = "debezium_slot" + "_" + dbName; @@ -200,45 +198,6 @@ public void LsnCommitTest(final String plugin) throws SQLException { Assertions.assertEquals(targetLsn, lsnAfterCommit.asLong()); Assertions.assertNotEquals(slotStateAtTheBeginning, slotStateAfterCommit); - // Now check that maybeReplicationStreamIntervalHasRecords behaves as expected. - - final long lsnBeforeBookkeepingStatements = PostgresUtils.getLsn(database).asLong(); - - database.execute("SELECT txid_current();"); - database.execute("CHECKPOINT"); - final long lsnAfterBookkeepingStatements = PostgresUtils.getLsn(database).asLong(); - Assertions.assertNotEquals(lsnBeforeBookkeepingStatements, lsnAfterBookkeepingStatements); - - Assertions.assertFalse(postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - Jsons.jsonNode(databaseConfig), - fullReplicationSlot, - publication, - plugin, - lsnBeforeBookkeepingStatements, - lsnAfterBookkeepingStatements)); - - database.execute("INSERT INTO public.test_table VALUES (3, 'baz');"); - final long lsnAfterMeaningfulStatement = PostgresUtils.getLsn(database).asLong(); - Assertions.assertNotEquals(lsnBeforeBookkeepingStatements, lsnAfterMeaningfulStatement); - - Assertions.assertTrue(postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - Jsons.jsonNode(databaseConfig), - fullReplicationSlot, - publication, - plugin, - lsnBeforeBookkeepingStatements, - lsnAfterMeaningfulStatement)); - Assertions.assertTrue(postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - Jsons.jsonNode(databaseConfig), - fullReplicationSlot, - publication, - plugin, - lsnAfterBookkeepingStatements, - lsnAfterMeaningfulStatement)); - - final var slotStateAtTheEnd = getReplicationSlot(database, fullReplicationSlot, plugin, dbName); - Assertions.assertEquals(slotStateAfterCommit, slotStateAtTheEnd); - container.stop(); } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java new file mode 100644 index 000000000000..64701dd40668 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.integrations.debezium.internals; + +import static io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil.MAX_FIRST_RECORD_WAIT_TIME; +import static io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil.MIN_FIRST_RECORD_WAIT_TIME; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import java.time.Duration; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import org.junit.jupiter.api.Test; + +public class RecordWaitTimeUtilTest { + + @Test + void testGetFirstRecordWaitTime() { + final JsonNode emptyConfig = Jsons.jsonNode(Collections.emptyMap()); + assertDoesNotThrow(() -> RecordWaitTimeUtil.checkFirstRecordWaitTime(emptyConfig)); + assertEquals(Optional.empty(), RecordWaitTimeUtil.getFirstRecordWaitSeconds(emptyConfig)); + assertEquals(RecordWaitTimeUtil.DEFAULT_FIRST_RECORD_WAIT_TIME, RecordWaitTimeUtil.getFirstRecordWaitTime(emptyConfig)); + + final JsonNode normalConfig = Jsons.jsonNode(Map.of("replication_method", + Map.of("method", "CDC", "initial_waiting_seconds", 500))); + assertDoesNotThrow(() -> RecordWaitTimeUtil.checkFirstRecordWaitTime(normalConfig)); + assertEquals(Optional.of(500), RecordWaitTimeUtil.getFirstRecordWaitSeconds(normalConfig)); + assertEquals(Duration.ofSeconds(500), RecordWaitTimeUtil.getFirstRecordWaitTime(normalConfig)); + + final int tooShortTimeout = (int) MIN_FIRST_RECORD_WAIT_TIME.getSeconds() - 1; + final JsonNode tooShortConfig = Jsons.jsonNode(Map.of("replication_method", + Map.of("method", "CDC", "initial_waiting_seconds", tooShortTimeout))); + assertThrows(IllegalArgumentException.class, () -> RecordWaitTimeUtil.checkFirstRecordWaitTime(tooShortConfig)); + assertEquals(Optional.of(tooShortTimeout), RecordWaitTimeUtil.getFirstRecordWaitSeconds(tooShortConfig)); + assertEquals(MIN_FIRST_RECORD_WAIT_TIME, RecordWaitTimeUtil.getFirstRecordWaitTime(tooShortConfig)); + + final int tooLongTimeout = (int) MAX_FIRST_RECORD_WAIT_TIME.getSeconds() + 1; + final JsonNode tooLongConfig = Jsons.jsonNode(Map.of("replication_method", + Map.of("method", "CDC", "initial_waiting_seconds", tooLongTimeout))); + assertThrows(IllegalArgumentException.class, () -> RecordWaitTimeUtil.checkFirstRecordWaitTime(tooLongConfig)); + assertEquals(Optional.of(tooLongTimeout), RecordWaitTimeUtil.getFirstRecordWaitSeconds(tooLongConfig)); + assertEquals(MAX_FIRST_RECORD_WAIT_TIME, RecordWaitTimeUtil.getFirstRecordWaitTime(tooLongConfig)); + } + +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java index f7356a00bc7f..15bca65f35a1 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java @@ -16,87 +16,62 @@ import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.cdk.testutils.PostgreSQLContainerHelper; +import io.airbyte.cdk.testutils.TestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.io.IOs; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; import java.sql.JDBCType; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Stream; +import org.jooq.SQLDialect; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.utility.MountableFile; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; /** * Runs the acceptance tests in the source-jdbc test module. We want this module to run these tests * itself as a sanity check. The trade off here is that this class is duplicated from the one used * in source-postgres. */ -@ExtendWith(SystemStubsExtension.class) -class DefaultJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +class DefaultJdbcSourceAcceptanceTest + extends JdbcSourceAcceptanceTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - private static PostgreSQLContainer PSQL_DB; - - private JsonNode config; - private String dbName; + private static PostgreSQLContainer PSQL_CONTAINER; @BeforeAll static void init() { - PSQL_DB = new PostgreSQLContainer<>("postgres:13-alpine"); - PSQL_DB.start(); + PSQL_CONTAINER = new PostgreSQLContainer<>("postgres:13-alpine"); + PSQL_CONTAINER.start(); CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s BIT(3) NOT NULL);"; INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(B'101');"; } - @BeforeEach - public void setup() throws Exception { - dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, PSQL_DB.getHost()) - .put(JdbcUtils.PORT_KEY, PSQL_DB.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, PSQL_DB.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, PSQL_DB.getPassword()) - .build()); - - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - - final String initScriptName = "init_" + dbName.concat(".sql"); - final String tmpFilePath = IOs.writeFileToRandomTmpDir(initScriptName, "CREATE DATABASE " + dbName + ";"); - PostgreSQLContainerHelper.runSqlScript(MountableFile.forHostPath(tmpFilePath), PSQL_DB); - - super.setup(); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder().build(); } @Override - public boolean supportsSchemas() { - return true; + protected PostgresTestSource source() { + final var source = new PostgresTestSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } @Override - public AbstractJdbcSource getJdbcSource() { - return new PostgresTestSource(); + protected BareBonesTestDatabase createTestDatabase() { + return new BareBonesTestDatabase(PSQL_CONTAINER).initialized(); } @Override - public JsonNode getConfig() { - return config; + public boolean supportsSchemas() { + return true; } public JsonNode getConfigWithConnectionProperties(final PostgreSQLContainer psqlDb, final String dbName, final String additionalParameters) { @@ -111,11 +86,6 @@ public JsonNode getConfigWithConnectionProperties(final PostgreSQLContainer p .build()); } - @Override - public String getDriverClass() { - return PostgresTestSource.DRIVER_CLASS; - } - @Override protected boolean supportsPerStream() { return true; @@ -123,10 +93,10 @@ protected boolean supportsPerStream() { @AfterAll static void cleanUp() { - PSQL_DB.close(); + PSQL_CONTAINER.close(); } - private static class PostgresTestSource extends AbstractJdbcSource implements Source { + public static class PostgresTestSource extends AbstractJdbcSource implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(PostgresTestSource.class); @@ -171,10 +141,63 @@ public static void main(final String[] args) throws Exception { } + static protected class BareBonesTestDatabase + extends TestDatabase, BareBonesTestDatabase, BareBonesTestDatabase.BareBonesConfigBuilder> { + + public BareBonesTestDatabase(PostgreSQLContainer container) { + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + final var sql = Stream.of( + String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER %s PASSWORD '%s'", getUserName(), getPassword()), + String.format("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", getDatabaseName(), getUserName()), + String.format("ALTER USER %s WITH SUPERUSER", getUserName())); + return Stream.of(Stream.concat( + Stream.of("psql", + "-d", getContainer().getDatabaseName(), + "-U", getContainer().getUsername(), + "-v", "ON_ERROR_STOP=1", + "-a"), + sql.flatMap(stmt -> Stream.of("-c", stmt)))); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.POSTGRESQL; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.POSTGRES; + } + + @Override + public BareBonesConfigBuilder configBuilder() { + return new BareBonesConfigBuilder(this); + } + + static protected class BareBonesConfigBuilder extends TestDatabase.ConfigBuilder { + + private BareBonesConfigBuilder(BareBonesTestDatabase testDatabase) { + super(testDatabase); + } + + } + + } + @Test void testCustomParametersOverwriteDefaultParametersExpectException() { final String connectionPropertiesUrl = "ssl=false"; - final JsonNode config = getConfigWithConnectionProperties(PSQL_DB, dbName, connectionPropertiesUrl); + final JsonNode config = getConfigWithConnectionProperties(PSQL_CONTAINER, testdb.getDatabaseName(), connectionPropertiesUrl); final Map customParameters = JdbcUtils.parseJdbcParameters(config, JdbcUtils.CONNECTION_PROPERTIES_KEY, "&"); final Map defaultParameters = Map.of( "ssl", "true", diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java index c186f0084a72..c6bc26a7d14c 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java @@ -16,8 +16,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.collect.Streams; -import io.airbyte.cdk.db.Database; import io.airbyte.cdk.integrations.base.Source; +import io.airbyte.cdk.testutils.TestDatabase; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; @@ -36,7 +36,6 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -48,65 +47,24 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class CdcSourceTest { +public abstract class CdcSourceTest> { - private static final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class); + static private final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class); - protected static final String MODELS_SCHEMA = "models_schema"; - protected static final String MODELS_STREAM_NAME = "models"; - protected static final Set STREAM_NAMES = Sets - .newHashSet(MODELS_STREAM_NAME); - protected static final String COL_ID = "id"; - protected static final String COL_MAKE_ID = "make_id"; - protected static final String COL_MODEL = "model"; - protected static final int INITIAL_WAITING_SECONDS = 5; + static protected final String MODELS_STREAM_NAME = "models"; + static protected final Set STREAM_NAMES = Set.of(MODELS_STREAM_NAME); + static protected final String COL_ID = "id"; + static protected final String COL_MAKE_ID = "make_id"; + static protected final String COL_MODEL = "model"; - protected final List MODEL_RECORDS_RANDOM = ImmutableList.of( - Jsons - .jsonNode(ImmutableMap - .of(COL_ID + "_random", 11000, COL_MAKE_ID + "_random", 1, COL_MODEL + "_random", - "Fiesta-random")), - Jsons.jsonNode(ImmutableMap - .of(COL_ID + "_random", 12000, COL_MAKE_ID + "_random", 1, COL_MODEL + "_random", - "Focus-random")), - Jsons - .jsonNode(ImmutableMap - .of(COL_ID + "_random", 13000, COL_MAKE_ID + "_random", 1, COL_MODEL + "_random", - "Ranger-random")), - Jsons.jsonNode(ImmutableMap - .of(COL_ID + "_random", 14000, COL_MAKE_ID + "_random", 2, COL_MODEL + "_random", - "GLA-random")), - Jsons.jsonNode(ImmutableMap - .of(COL_ID + "_random", 15000, COL_MAKE_ID + "_random", 2, COL_MODEL + "_random", - "A 220-random")), - Jsons - .jsonNode(ImmutableMap - .of(COL_ID + "_random", 16000, COL_MAKE_ID + "_random", 2, COL_MODEL + "_random", - "E 350-random"))); - - protected static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(List.of( - CatalogHelpers.createAirbyteStream( - MODELS_STREAM_NAME, - MODELS_SCHEMA, - Field.of(COL_ID, JsonSchemaType.INTEGER), - Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), - Field.of(COL_MODEL, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))))); - protected static final ConfiguredAirbyteCatalog CONFIGURED_CATALOG = CatalogHelpers - .toDefaultConfiguredCatalog(CATALOG); - - // set all streams to incremental. - static { - CONFIGURED_CATALOG.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); - } - - protected static final List MODEL_RECORDS = ImmutableList.of( + static protected final List MODEL_RECORDS = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 11, COL_MAKE_ID, 1, COL_MODEL, "Fiesta")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 12, COL_MAKE_ID, 1, COL_MODEL, "Focus")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 13, COL_MAKE_ID, 1, COL_MODEL, "Ranger")), @@ -114,87 +72,134 @@ public abstract class CdcSourceTest { Jsons.jsonNode(ImmutableMap.of(COL_ID, 15, COL_MAKE_ID, 2, COL_MODEL, "A 220")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 16, COL_MAKE_ID, 2, COL_MODEL, "E 350"))); - protected void setup() throws SQLException { - createAndPopulateTables(); - } + static protected final String RANDOM_TABLE_NAME = MODELS_STREAM_NAME + "_random"; - private void createAndPopulateTables() { - createAndPopulateActualTable(); - createAndPopulateRandomTable(); - } + static protected final List MODEL_RECORDS_RANDOM = MODEL_RECORDS.stream() + .map(r -> Jsons.jsonNode(ImmutableMap.of( + COL_ID + "_random", r.get(COL_ID).asInt() * 1000, + COL_MAKE_ID + "_random", r.get(COL_MAKE_ID), + COL_MODEL + "_random", r.get(COL_MODEL).asText() + "-random"))) + .toList(); - protected void executeQuery(final String query) { - try { - getDatabase().query( - ctx -> ctx - .execute(query)); - } catch (final SQLException e) { - throw new RuntimeException(e); - } - } + protected T testdb; - public String columnClause(final Map columnsWithDataType, final Optional primaryKey) { - final StringBuilder columnClause = new StringBuilder(); - int i = 0; - for (final Map.Entry column : columnsWithDataType.entrySet()) { - columnClause.append(column.getKey()); - columnClause.append(" "); - columnClause.append(column.getValue()); - if (i < (columnsWithDataType.size() - 1)) { - columnClause.append(","); - columnClause.append(" "); - } - i++; - } - primaryKey.ifPresent(s -> columnClause.append(", PRIMARY KEY (").append(s).append(")")); + protected String createTableSqlFmt() { + return "CREATE TABLE %s.%s(%s);"; + } - return columnClause.toString(); + protected String createSchemaSqlFmt() { + return "CREATE SCHEMA %s;"; } - public void createTable(final String schemaName, final String tableName, final String columnClause) { - executeQuery(createTableQuery(schemaName, tableName, columnClause)); + protected String modelsSchema() { + return "models_schema"; } - public String createTableQuery(final String schemaName, final String tableName, final String columnClause) { - return String.format("CREATE TABLE %s.%s(%s);", schemaName, tableName, columnClause); + /** + * The schema of a random table which is used as a new table in snapshot test + */ + protected String randomSchema() { + return "models_schema_random"; } - public void createSchema(final String schemaName) { - executeQuery(createSchemaQuery(schemaName)); + protected AirbyteCatalog getCatalog() { + return new AirbyteCatalog().withStreams(List.of( + CatalogHelpers.createAirbyteStream( + MODELS_STREAM_NAME, + modelsSchema(), + Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), + Field.of(COL_MODEL, JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))))); } - public String createSchemaQuery(final String schemaName) { - return "CREATE DATABASE " + schemaName + ";"; + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog()); + configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); + return configuredCatalog; } - private void createAndPopulateActualTable() { - createSchema(MODELS_SCHEMA); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME, - columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); + protected abstract T createTestDatabase(); + + protected abstract S source(); + + protected abstract JsonNode config(); + + protected abstract CdcTargetPosition cdcLatestTargetPosition(); + + protected abstract CdcTargetPosition extractPosition(final JsonNode record); + + protected abstract void assertNullCdcMetaData(final JsonNode data); + + protected abstract void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull); + + protected abstract void removeCDCColumns(final ObjectNode data); + + protected abstract void addCdcMetadataColumns(final AirbyteStream stream); + + protected abstract void addCdcDefaultCursorField(final AirbyteStream stream); + + protected abstract void assertExpectedStateMessages(final List stateMessages); + + @BeforeEach + protected void setup() { + testdb = createTestDatabase(); + + // create and populate actual table + final var actualColumns = ImmutableMap.of( + COL_ID, "INTEGER", + COL_MAKE_ID, "INTEGER", + COL_MODEL, "VARCHAR(200)"); + testdb + .with(createSchemaSqlFmt(), modelsSchema()) + .with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME, columnClause(actualColumns, Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS) { writeModelRecord(recordJson); } - } - /** - * This database and table is not part of Airbyte sync. It is being created just to make sure the - * databases not being synced by Airbyte are not causing issues with our debezium logic - */ - private void createAndPopulateRandomTable() { - if (!randomTableSchema().equals(MODELS_SCHEMA)) { - createSchema(randomTableSchema()); + // Create and populate random table. + // This table is not part of Airbyte sync. It is being created just to make sure the schemas not + // being synced by Airbyte are not causing issues with our debezium logic. + final var randomColumns = ImmutableMap.of( + COL_ID + "_random", "INTEGER", + COL_MAKE_ID + "_random", "INTEGER", + COL_MODEL + "_random", "VARCHAR(200)"); + if (!randomSchema().equals(modelsSchema())) { + testdb.with(createSchemaSqlFmt(), randomSchema()); } - createTable(randomTableSchema(), MODELS_STREAM_NAME + "_random", - columnClause(ImmutableMap.of(COL_ID + "_random", "INTEGER", COL_MAKE_ID + "_random", "INTEGER", COL_MODEL + "_random", "VARCHAR(200)"), - Optional.of(COL_ID + "_random"))); + testdb.with(createTableSqlFmt(), randomSchema(), RANDOM_TABLE_NAME, columnClause(randomColumns, Optional.of(COL_ID + "_random"))); for (final JsonNode recordJson : MODEL_RECORDS_RANDOM) { - writeRecords(recordJson, randomTableSchema(), MODELS_STREAM_NAME + "_random", + writeRecords(recordJson, randomSchema(), RANDOM_TABLE_NAME, COL_ID + "_random", COL_MAKE_ID + "_random", COL_MODEL + "_random"); } } + @AfterEach + protected void tearDown() { + testdb.close(); + } + + protected String columnClause(final Map columnsWithDataType, final Optional primaryKey) { + final StringBuilder columnClause = new StringBuilder(); + int i = 0; + for (final Map.Entry column : columnsWithDataType.entrySet()) { + columnClause.append(column.getKey()); + columnClause.append(" "); + columnClause.append(column.getValue()); + if (i < (columnsWithDataType.size() - 1)) { + columnClause.append(","); + columnClause.append(" "); + } + i++; + } + primaryKey.ifPresent(s -> columnClause.append(", PRIMARY KEY (").append(s).append(")")); + + return columnClause.toString(); + } + protected void writeModelRecord(final JsonNode recordJson) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL); + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL); } protected void writeRecords( @@ -204,14 +209,13 @@ protected void writeRecords( final String idCol, final String makeIdCol, final String modelCol) { - executeQuery( - String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", dbName, streamName, - idCol, makeIdCol, modelCol, - recordJson.get(idCol).asInt(), recordJson.get(makeIdCol).asInt(), - recordJson.get(modelCol).asText())); + testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", dbName, streamName, + idCol, makeIdCol, modelCol, + recordJson.get(idCol).asInt(), recordJson.get(makeIdCol).asInt(), + recordJson.get(modelCol).asText()); } - protected static Set removeDuplicates(final Set messages) { + static protected Set removeDuplicates(final Set messages) { final Set existingDataRecordsWithoutUpdated = new HashSet<>(); final Set output = new HashSet<>(); @@ -272,7 +276,7 @@ protected void assertExpectedRecords(final Set expectedRecords, final private void assertExpectedRecords(final Set expectedRecords, final Set actualRecords, final Set cdcStreams) { - assertExpectedRecords(expectedRecords, actualRecords, cdcStreams, STREAM_NAMES, MODELS_SCHEMA); + assertExpectedRecords(expectedRecords, actualRecords, cdcStreams, STREAM_NAMES, modelsSchema()); } protected void assertExpectedRecords(final Set expectedRecords, @@ -309,7 +313,7 @@ protected void assertExpectedRecords(final Set expectedRecords, @DisplayName("On the first sync, produce returns records that exist in the database.") void testExistingData() throws Exception { final CdcTargetPosition targetPosition = cdcLatestTargetPosition(); - final AutoCloseableIterator read = getSource().read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read = source().read(config(), getConfiguredCatalog(), null); final List actualRecords = AutoCloseableIterators.toListAndClose(read); final Set recordMessages = extractRecordMessages(actualRecords); @@ -332,19 +336,17 @@ protected void compareTargetPositionFromTheRecordsWithTargetPostionGeneratedBefo @Test @DisplayName("When a record is deleted, produces a deletion record.") void testDelete() throws Exception { - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final List stateMessages1 = extractStateMessages(actualRecords1); assertExpectedStateMessages(stateMessages1); - executeQuery(String - .format("DELETE FROM %s.%s WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, - 11)); + testdb.with("DELETE FROM %s.%s WHERE %s = %s", modelsSchema(), MODELS_STREAM_NAME, COL_ID, 11); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(stateMessages1.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List recordMessages2 = new ArrayList<>( extractRecordMessages(actualRecords2)); @@ -363,19 +365,18 @@ protected void assertExpectedStateMessagesFromIncrementalSync(final List read1 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final List stateMessages1 = extractStateMessages(actualRecords1); assertExpectedStateMessages(stateMessages1); - executeQuery(String - .format("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, - COL_MODEL, updatedModel, COL_ID, 11)); + testdb.with("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", modelsSchema(), MODELS_STREAM_NAME, + COL_MODEL, updatedModel, COL_ID, 11); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(stateMessages1.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List recordMessages2 = new ArrayList<>( extractRecordMessages(actualRecords2)); @@ -402,8 +403,8 @@ protected void testRecordsProducedDuringAndAfterSync() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -422,8 +423,8 @@ protected void testRecordsProducedDuringAndAfterSync() throws Exception { } final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -457,7 +458,7 @@ protected void assertExpectedStateMessagesForRecordsProducedDuringAndAfterSync(f @Test @DisplayName("When both incremental CDC and full refresh are configured for different streams in a sync, the data is replicated as expected.") void testCdcAndFullRefreshInSameSync() throws Exception { - final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(CONFIGURED_CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); final List MODEL_RECORDS_2 = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), @@ -467,18 +468,17 @@ void testCdcAndFullRefreshInSameSync() throws Exception { Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", - columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); + final var columns = ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"); + testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2", columnClause(columns, Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS_2) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", COL_ID, - COL_MAKE_ID, COL_MODEL); + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID, COL_MAKE_ID, COL_MODEL); } final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() .withStream(CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + modelsSchema(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)) @@ -491,8 +491,8 @@ void testCdcAndFullRefreshInSameSync() throws Exception { streams.add(airbyteStream); configuredCatalog.withStreams(streams); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final Set recordMessages1 = extractRecordMessages(actualRecords1); @@ -505,15 +505,15 @@ void testCdcAndFullRefreshInSameSync() throws Exception { recordMessages1, Collections.singleton(MODELS_STREAM_NAME), names, - MODELS_SCHEMA); + modelsSchema()); final JsonNode puntoRecord = Jsons .jsonNode(ImmutableMap.of(COL_ID, 100, COL_MAKE_ID, 3, COL_MODEL, "Punto")); writeModelRecord(puntoRecord); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(stateMessages1.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, state); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final Set recordMessages2 = extractRecordMessages(actualRecords2); @@ -525,17 +525,16 @@ void testCdcAndFullRefreshInSameSync() throws Exception { recordMessages2, Collections.singleton(MODELS_STREAM_NAME), names, - MODELS_SCHEMA); + modelsSchema()); } @Test @DisplayName("When no records exist, no records are returned.") void testNoData() throws Exception { - executeQuery(String.format("DELETE FROM %s.%s", MODELS_SCHEMA, MODELS_STREAM_NAME)); + testdb.with("DELETE FROM %s.%s", modelsSchema(), MODELS_STREAM_NAME); - final AutoCloseableIterator read = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read = source().read(config(), getConfiguredCatalog(), null); final List actualRecords = AutoCloseableIterators.toListAndClose(read); final Set recordMessages = extractRecordMessages(actualRecords); @@ -551,14 +550,14 @@ protected void assertExpectedStateMessagesForNoData(final List read1 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final List stateMessagesFromFirstSync = extractStateMessages(actualRecords1); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessagesFromFirstSync.get(stateMessagesFromFirstSync.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final Set recordMessages2 = extractRecordMessages(actualRecords2); @@ -570,14 +569,14 @@ void testNoDataOnSecondSync() throws Exception { @Test void testCheck() throws Exception { - final AirbyteConnectionStatus status = getSource().check(getConfig()); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED); } @Test void testDiscover() throws Exception { final AirbyteCatalog expectedCatalog = expectedCatalogForDiscover(); - final AirbyteCatalog actualCatalog = getSource().discover(getConfig()); + final AirbyteCatalog actualCatalog = source().discover(config()); assertEquals( expectedCatalog.getStreams().stream().sorted(Comparator.comparing(AirbyteStream::getName)) @@ -588,8 +587,8 @@ void testDiscover() throws Exception { @Test public void newTableSnapshotTest() throws Exception { - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final Set recordsFromFirstBatch = extractRecordMessages( @@ -605,7 +604,7 @@ public void newTableSnapshotTest() throws Exception { .map(AirbyteStreamState::getStreamDescriptor) .collect(Collectors.toSet()); assertEquals(1, streamsInStateAfterFirstSyncCompletion.size()); - assertTrue(streamsInStateAfterFirstSyncCompletion.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))); + assertTrue(streamsInStateAfterFirstSyncCompletion.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()))); assertNotNull(stateMessageEmittedAfterFirstSyncCompletion.getData()); assertEquals((MODEL_RECORDS.size()), recordsFromFirstBatch.size()); @@ -616,8 +615,8 @@ public void newTableSnapshotTest() throws Exception { final ConfiguredAirbyteCatalog newTables = CatalogHelpers .toDefaultConfiguredCatalog(new AirbyteCatalog().withStreams(List.of( CatalogHelpers.createAirbyteStream( - MODELS_STREAM_NAME + "_random", - randomTableSchema(), + RANDOM_TABLE_NAME, + randomSchema(), Field.of(COL_ID + "_random", JsonSchemaType.NUMBER), Field.of(COL_MAKE_ID + "_random", JsonSchemaType.NUMBER), Field.of(COL_MODEL + "_random", JsonSchemaType.STRING)) @@ -626,7 +625,7 @@ public void newTableSnapshotTest() throws Exception { newTables.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); final List combinedStreams = new ArrayList<>(); - combinedStreams.addAll(CONFIGURED_CATALOG.getStreams()); + combinedStreams.addAll(getConfiguredCatalog().getStreams()); combinedStreams.addAll(newTables.getStreams()); final ConfiguredAirbyteCatalog updatedCatalog = new ConfiguredAirbyteCatalog().withStreams(combinedStreams); @@ -644,8 +643,8 @@ public void newTableSnapshotTest() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), updatedCatalog, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), updatedCatalog, state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -654,10 +653,10 @@ public void newTableSnapshotTest() throws Exception { final Map> recordsStreamWise = extractRecordMessagesStreamWise(dataFromSecondBatch); assertTrue(recordsStreamWise.containsKey(MODELS_STREAM_NAME)); - assertTrue(recordsStreamWise.containsKey(MODELS_STREAM_NAME + "_random")); + assertTrue(recordsStreamWise.containsKey(RANDOM_TABLE_NAME)); final Set recordsForModelsStreamFromSecondBatch = recordsStreamWise.get(MODELS_STREAM_NAME); - final Set recordsForModelsRandomStreamFromSecondBatch = recordsStreamWise.get(MODELS_STREAM_NAME + "_random"); + final Set recordsForModelsRandomStreamFromSecondBatch = recordsStreamWise.get(RANDOM_TABLE_NAME); assertEquals((MODEL_RECORDS_RANDOM.size()), recordsForModelsRandomStreamFromSecondBatch.size()); assertEquals(20, recordsForModelsStreamFromSecondBatch.size()); @@ -665,8 +664,8 @@ public void newTableSnapshotTest() throws Exception { recordsForModelsRandomStreamFromSecondBatch.stream().map(AirbyteRecordMessage::getStream).collect( Collectors.toSet()), Sets - .newHashSet(MODELS_STREAM_NAME + "_random"), - randomTableSchema()); + .newHashSet(RANDOM_TABLE_NAME), + randomSchema()); assertExpectedRecords(recordsWritten, recordsForModelsStreamFromSecondBatch); /* @@ -686,14 +685,14 @@ public void newTableSnapshotTest() throws Exception { .jsonNode(ImmutableMap .of(COL_ID + "_random", 11000 + recordsCreated, COL_MAKE_ID + "_random", 1 + recordsCreated, COL_MODEL + "_random", "Fiesta-random" + recordsCreated)); - writeRecords(record2, randomTableSchema(), MODELS_STREAM_NAME + "_random", + writeRecords(record2, randomSchema(), RANDOM_TABLE_NAME, COL_ID + "_random", COL_MAKE_ID + "_random", COL_MODEL + "_random"); recordsWrittenInRandomTable.add(record2); } final JsonNode state2 = stateAfterSecondBatch.get(stateAfterSecondBatch.size() - 1).getData(); - final AutoCloseableIterator thirdBatchIterator = getSource() - .read(getConfig(), updatedCatalog, state2); + final AutoCloseableIterator thirdBatchIterator = source() + .read(config(), updatedCatalog, state2); final List dataFromThirdBatch = AutoCloseableIterators .toListAndClose(thirdBatchIterator); @@ -710,16 +709,17 @@ public void newTableSnapshotTest() throws Exception { .collect(Collectors.toSet()); assertTrue( streamsInSyncCompletionStateAfterThirdSync.contains( - new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomTableSchema()))); - assertTrue(streamsInSyncCompletionStateAfterThirdSync.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))); + new StreamDescriptor().withName(RANDOM_TABLE_NAME).withNamespace(randomSchema()))); + assertTrue( + streamsInSyncCompletionStateAfterThirdSync.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()))); assertNotNull(stateMessageEmittedAfterThirdSyncCompletion.getData()); final Map> recordsStreamWiseFromThirdBatch = extractRecordMessagesStreamWise(dataFromThirdBatch); assertTrue(recordsStreamWiseFromThirdBatch.containsKey(MODELS_STREAM_NAME)); - assertTrue(recordsStreamWiseFromThirdBatch.containsKey(MODELS_STREAM_NAME + "_random")); + assertTrue(recordsStreamWiseFromThirdBatch.containsKey(RANDOM_TABLE_NAME)); final Set recordsForModelsStreamFromThirdBatch = recordsStreamWiseFromThirdBatch.get(MODELS_STREAM_NAME); - final Set recordsForModelsRandomStreamFromThirdBatch = recordsStreamWiseFromThirdBatch.get(MODELS_STREAM_NAME + "_random"); + final Set recordsForModelsRandomStreamFromThirdBatch = recordsStreamWiseFromThirdBatch.get(RANDOM_TABLE_NAME); assertEquals(20, recordsForModelsStreamFromThirdBatch.size()); assertEquals(20, recordsForModelsRandomStreamFromThirdBatch.size()); @@ -728,8 +728,8 @@ public void newTableSnapshotTest() throws Exception { recordsForModelsRandomStreamFromThirdBatch.stream().map(AirbyteRecordMessage::getStream).collect( Collectors.toSet()), Sets - .newHashSet(MODELS_STREAM_NAME + "_random"), - randomTableSchema()); + .newHashSet(RANDOM_TABLE_NAME), + randomSchema()); } protected void assertStateMessagesForNewTableSnapshotTest(final List stateMessages, @@ -745,8 +745,8 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List streams = expectedCatalog.getStreams(); // stream with PK @@ -779,7 +779,7 @@ protected AirbyteCatalog expectedCatalogForDiscover() { final AirbyteStream streamWithoutPK = CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + modelsSchema(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)); @@ -789,8 +789,8 @@ protected AirbyteCatalog expectedCatalogForDiscover() { addCdcMetadataColumns(streamWithoutPK); final AirbyteStream randomStream = CatalogHelpers.createAirbyteStream( - MODELS_STREAM_NAME + "_random", - randomTableSchema(), + RANDOM_TABLE_NAME, + randomSchema(), Field.of(COL_ID + "_random", JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID + "_random", JsonSchemaType.INTEGER), Field.of(COL_MODEL + "_random", JsonSchemaType.STRING)) @@ -807,31 +807,4 @@ protected AirbyteCatalog expectedCatalogForDiscover() { return expectedCatalog; } - /** - * The schema of a random table which is used as a new table in snapshot test - */ - protected abstract String randomTableSchema(); - - protected abstract CdcTargetPosition cdcLatestTargetPosition(); - - protected abstract CdcTargetPosition extractPosition(final JsonNode record); - - protected abstract void assertNullCdcMetaData(final JsonNode data); - - protected abstract void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull); - - protected abstract void removeCDCColumns(final ObjectNode data); - - protected abstract void addCdcMetadataColumns(final AirbyteStream stream); - - protected abstract void addCdcDefaultCursorField(final AirbyteStream stream); - - protected abstract Source getSource(); - - protected abstract JsonNode getConfig(); - - protected abstract Database getDatabase(); - - protected abstract void assertExpectedStateMessages(final List stateMessages); - } diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index bb613be8ac02..fea6f5709024 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -4,7 +4,6 @@ package io.airbyte.cdk.integrations.source.jdbc.test; -import static io.airbyte.cdk.db.jdbc.JdbcUtils.getDefaultSourceOperations; import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -17,20 +16,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcSourceOperations; +import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; -import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; import io.airbyte.cdk.integrations.base.Source; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils; import io.airbyte.cdk.integrations.source.relationaldb.models.DbState; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.cdk.testutils.TestDatabase; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; @@ -60,134 +54,97 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.Function; import java.util.stream.Collectors; -import javax.sql.DataSource; import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** * Tests that should be run on all Sources that extend the AbstractJdbcSource. */ -// How leverage these tests: -// 1. Extend this class in the test module of the Source. -// 2. From the class that extends this one, you MUST call super.setup() in a @BeforeEach method. -// Otherwise you'll see many NPE issues. Your before each should also handle providing a fresh -// database between each test. -// 3. From the class that extends this one, implement a @AfterEach that cleans out the database -// between each test. -// 4. Then implement the abstract methods documented below. @SuppressFBWarnings( value = {"MS_SHOULD_BE_FINAL"}, - justification = "The static variables are updated in sub classes for convenience, and cannot be final.") -public abstract class JdbcSourceAcceptanceTest { - - // schema name must be randomized for each test run, - // otherwise parallel runs can interfere with each other - public static String SCHEMA_NAME = Strings.addRandomSuffix("jdbc_integration_test1", "_", 5).toLowerCase(); - public static String SCHEMA_NAME2 = Strings.addRandomSuffix("jdbc_integration_test2", "_", 5).toLowerCase(); - public static Set TEST_SCHEMAS = Set.of(SCHEMA_NAME, SCHEMA_NAME2); - - public static String TABLE_NAME = "id_and_name"; - public static String TABLE_NAME_WITH_SPACES = "id and name"; - public static String TABLE_NAME_WITHOUT_PK = "id_and_name_without_pk"; - public static String TABLE_NAME_COMPOSITE_PK = "full_name_composite_pk"; - public static String TABLE_NAME_WITHOUT_CURSOR_TYPE = "table_without_cursor_type"; - public static String TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE = "table_with_null_cursor_type"; + justification = "The static variables are updated in subclasses for convenience, and cannot be final.") +abstract public class JdbcSourceAcceptanceTest> { + + static protected String SCHEMA_NAME = "jdbc_integration_test1"; + static protected String SCHEMA_NAME2 = "jdbc_integration_test2"; + static protected Set TEST_SCHEMAS = Set.of(SCHEMA_NAME, SCHEMA_NAME2); + + static protected String TABLE_NAME = "id_and_name"; + static protected String TABLE_NAME_WITH_SPACES = "id and name"; + static protected String TABLE_NAME_WITHOUT_PK = "id_and_name_without_pk"; + static protected String TABLE_NAME_COMPOSITE_PK = "full_name_composite_pk"; + static protected String TABLE_NAME_WITHOUT_CURSOR_TYPE = "table_without_cursor_type"; + static protected String TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE = "table_with_null_cursor_type"; // this table is used in testing incremental sync with concurrent insertions - public static String TABLE_NAME_AND_TIMESTAMP = "name_and_timestamp"; - - public static String COL_ID = "id"; - public static String COL_NAME = "name"; - public static String COL_UPDATED_AT = "updated_at"; - public static String COL_FIRST_NAME = "first_name"; - public static String COL_LAST_NAME = "last_name"; - public static String COL_LAST_NAME_WITH_SPACE = "last name"; - public static String COL_CURSOR = "cursor_field"; - public static String COL_TIMESTAMP = "timestamp"; - public static String COL_TIMESTAMP_TYPE = "TIMESTAMP"; - public static Number ID_VALUE_1 = 1; - public static Number ID_VALUE_2 = 2; - public static Number ID_VALUE_3 = 3; - public static Number ID_VALUE_4 = 4; - public static Number ID_VALUE_5 = 5; - - public static String DROP_SCHEMA_QUERY = "DROP SCHEMA IF EXISTS %s CASCADE"; - public static String COLUMN_CLAUSE_WITH_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; - public static String COLUMN_CLAUSE_WITHOUT_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; - public static String COLUMN_CLAUSE_WITH_COMPOSITE_PK = + static protected String TABLE_NAME_AND_TIMESTAMP = "name_and_timestamp"; + + static protected String COL_ID = "id"; + static protected String COL_NAME = "name"; + static protected String COL_UPDATED_AT = "updated_at"; + static protected String COL_FIRST_NAME = "first_name"; + static protected String COL_LAST_NAME = "last_name"; + static protected String COL_LAST_NAME_WITH_SPACE = "last name"; + static protected String COL_CURSOR = "cursor_field"; + static protected String COL_TIMESTAMP = "timestamp"; + static protected String COL_TIMESTAMP_TYPE = "TIMESTAMP"; + static protected Number ID_VALUE_1 = 1; + static protected Number ID_VALUE_2 = 2; + static protected Number ID_VALUE_3 = 3; + static protected Number ID_VALUE_4 = 4; + static protected Number ID_VALUE_5 = 5; + + static protected String DROP_SCHEMA_QUERY = "DROP SCHEMA IF EXISTS %s CASCADE"; + static protected String COLUMN_CLAUSE_WITH_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; + static protected String COLUMN_CLAUSE_WITHOUT_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; + static protected String COLUMN_CLAUSE_WITH_COMPOSITE_PK = "first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; - public static String CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s bit NOT NULL);"; - public static String INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(0);"; - public static String CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s VARCHAR(20));"; - public static String INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES('Hello world :)');"; - public static String INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY = "INSERT INTO %s (name, timestamp) VALUES ('%s', '%s')"; + static protected String CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s bit NOT NULL);"; + static protected String INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(0);"; + static protected String CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s VARCHAR(20));"; + static protected String INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES('Hello world :)');"; + static protected String INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY = "INSERT INTO %s (name, timestamp) VALUES ('%s', '%s')"; - public JsonNode config; - public DataSource dataSource; - public JdbcDatabase database; - public JdbcSourceOperations sourceOperations = getSourceOperations(); - public Source source; - public static String streamName; + protected T testdb; - /** - * These tests write records without specifying a namespace (schema name). They will be written into - * whatever the default schema is for the database. When they are discovered they will be namespaced - * by the schema name (e.g. .). Thus the source needs to tell the - * tests what that default schema name is. If the database does not support schemas, then database - * name should used instead. - * - * @return name that will be used to namespace the record. - */ - public abstract boolean supportsSchemas(); + protected String streamName() { + return TABLE_NAME; + } /** * A valid configuration to connect to a test database. * * @return config */ - public abstract JsonNode getConfig(); - - /** - * Full qualified class name of the JDBC driver for the database. - * - * @return driver - */ - public abstract String getDriverClass(); + abstract protected JsonNode config(); /** * An instance of the source that should be tests. * * @return abstract jdbc source */ - public abstract AbstractJdbcSource getJdbcSource(); + abstract protected S source(); /** - * In some cases the Source that is being tested may be an AbstractJdbcSource, but because it is - * decorated, Java cannot recognize it as such. In these cases, as a workaround a user can choose to - * override getJdbcSource and have it return null. Then they can override this method with the - * decorated source AND override getToDatabaseConfigFunction with the appropriate - * toDatabaseConfigFunction that is hidden behind the decorator. + * Creates a TestDatabase instance to be used in {@link #setup()}. * - * @return source + * @return TestDatabase instance to use for test case. */ - public Source getSource() { - return getJdbcSource(); - } + abstract protected T createTestDatabase(); /** - * See getSource() for when to override this method. + * These tests write records without specifying a namespace (schema name). They will be written into + * whatever the default schema is for the database. When they are discovered they will be namespaced + * by the schema name (e.g. .). Thus the source needs to tell the + * tests what that default schema name is. If the database does not support schemas, then database + * name should used instead. * - * @return a function that maps a source's config to a jdbc config. + * @return name that will be used to namespace the record. */ - public Function getToDatabaseConfigFunction() { - return getJdbcSource()::toDatabaseConfig; - } - - protected JdbcSourceOperations getSourceOperations() { - return getDefaultSourceOperations(); - } + abstract protected boolean supportsSchemas(); protected String createTableQuery(final String tableName, final String columnClause, final String primaryKeyClause) { return String.format("CREATE TABLE %s(%s %s %s)", @@ -211,100 +168,46 @@ protected String primaryKeyClause(final List columns) { return clause.toString(); } - protected String getJdbcParameterDelimiter() { - return "&"; - } - + @BeforeEach public void setup() throws Exception { - source = getSource(); - config = getConfig(); - final JsonNode jdbcConfig = getToDatabaseConfigFunction().apply(config); - - streamName = TABLE_NAME; - - dataSource = getDataSource(jdbcConfig); - - database = new StreamingJdbcDatabase(dataSource, - getDefaultSourceOperations(), - AdaptiveStreamingQueryConfig::new); - + testdb = createTestDatabase(); if (supportsSchemas()) { createSchemas(); } - - if (getDriverClass().toLowerCase().contains("oracle")) { - database.execute(connection -> connection.createStatement() - .execute("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD'")); + if (testdb.getDatabaseDriver().equals(DatabaseDriver.ORACLE)) { + testdb.with("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD'"); } - - database.execute(connection -> { - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, - primaryKeyClause(Collections.singletonList("id")))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), - COLUMN_CLAUSE_WITHOUT_PK, "")); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), - COLUMN_CLAUSE_WITH_COMPOSITE_PK, - primaryKeyClause(List.of("first_name", "last_name")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('first' ,'picard', '2004-10-19')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('second', 'crusher', '2005-10-19')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('third', 'vash', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - - }); - } - - protected void maybeSetShorterConnectionTimeout() { + testdb + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, primaryKeyClause(Collections.singletonList("id")))) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), COLUMN_CLAUSE_WITHOUT_PK, "")) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')", getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), COLUMN_CLAUSE_WITH_COMPOSITE_PK, + primaryKeyClause(List.of("first_name", "last_name")))) + .with("INSERT INTO %s(first_name, last_name, updated_at) VALUES ('first', 'picard', '2004-10-19')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with("INSERT INTO %s(first_name, last_name, updated_at) VALUES ('second', 'crusher', '2005-10-19')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with("INSERT INTO %s(first_name, last_name, updated_at) VALUES ('third', 'vash', '2006-10-19')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)); + } + + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { // Optionally implement this to speed up test cases which will result in a connection timeout. } - protected DataSource getDataSource(final JsonNode jdbcConfig) { - return DataSourceFactory.create( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - getDriverClass(), - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - JdbcUtils.parseJdbcParameters(jdbcConfig, JdbcUtils.CONNECTION_PROPERTIES_KEY, getJdbcParameterDelimiter())); - } - - public void tearDown() throws SQLException { - dropSchemas(); + @AfterEach + public void tearDown() { + testdb.close(); } @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final String resourceString = MoreResources.readResource("spec.json"); final ConnectorSpecification expected = Jsons.deserialize(resourceString, ConnectorSpecification.class); @@ -313,22 +216,23 @@ void testSpec() throws Exception { @Test void testCheckSuccess() throws Exception { - final AirbyteConnectionStatus actual = source.check(config); + final AirbyteConnectionStatus actual = source().check(config()); final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); assertEquals(expected, actual); } @Test void testCheckFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus actual = source.check(config); + final AirbyteConnectionStatus actual = source().check(config); assertEquals(Status.FAILED, actual.getStatus()); } @Test void testDiscover() throws Exception { - final AirbyteCatalog actual = filterOutOtherSchemas(source.discover(config)); + final AirbyteCatalog actual = filterOutOtherSchemas(source().discover(config())); final AirbyteCatalog expected = getCatalog(getDefaultNamespace()); assertEquals(expected.getStreams().size(), actual.getStreams().size()); actual.getStreams().forEach(actualStream -> { @@ -343,13 +247,9 @@ void testDiscover() throws Exception { @Test protected void testDiscoverWithNonCursorFields() throws Exception { - database.execute(connection -> { - connection.createStatement() - .execute(String.format(CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE), COL_CURSOR)); - connection.createStatement().execute(String.format(INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY, - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE))); - }); - final AirbyteCatalog actual = filterOutOtherSchemas(source.discover(config)); + testdb.with(CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE), COL_CURSOR) + .with(INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE)); + final AirbyteCatalog actual = filterOutOtherSchemas(source().discover(config())); final AirbyteStream stream = actual.getStreams().stream().filter(s -> s.getName().equalsIgnoreCase(TABLE_NAME_WITHOUT_CURSOR_TYPE)).findFirst().orElse(null); assertNotNull(stream); @@ -360,14 +260,9 @@ protected void testDiscoverWithNonCursorFields() throws Exception { @Test protected void testDiscoverWithNullableCursorFields() throws Exception { - database.execute(connection -> { - connection.createStatement() - .execute(String.format(CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE), - COL_CURSOR)); - connection.createStatement().execute(String.format(INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, - getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE))); - }); - final AirbyteCatalog actual = filterOutOtherSchemas(source.discover(config)); + testdb.with(CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE), COL_CURSOR) + .with(INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE)); + final AirbyteCatalog actual = filterOutOtherSchemas(source().discover(config())); final AirbyteStream stream = actual.getStreams().stream().filter(s -> s.getName().equalsIgnoreCase(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE)).findFirst().orElse(null); assertNotNull(stream); @@ -394,28 +289,22 @@ protected AirbyteCatalog filterOutOtherSchemas(final AirbyteCatalog catalog) { @Test void testDiscoverWithMultipleSchemas() throws Exception { // clickhouse and mysql do not have a concept of schemas, so this test does not make sense for them. - String driverClass = getDriverClass().toLowerCase(); - if (driverClass.contains("mysql") || driverClass.contains("clickhouse") || driverClass.contains("teradata")) { - return; + switch (testdb.getDatabaseDriver()) { + case MYSQL, CLICKHOUSE, TERADATA: + return; } // add table and data to a separate schema. - database.execute(connection -> { - connection.createStatement().execute( - String.format("CREATE TABLE %s(id VARCHAR(200) NOT NULL, name VARCHAR(200) NOT NULL)", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES ('1','picard')", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES ('2', 'crusher')", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES ('3', 'vash')", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - }); + testdb.with("CREATE TABLE %s(id VARCHAR(200) NOT NULL, name VARCHAR(200) NOT NULL)", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)) + .with("INSERT INTO %s(id, name) VALUES ('1','picard')", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)) + .with("INSERT INTO %s(id, name) VALUES ('2', 'crusher')", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)) + .with("INSERT INTO %s(id, name) VALUES ('3', 'vash')", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)); - final AirbyteCatalog actual = source.discover(config); + final AirbyteCatalog actual = source().discover(config()); final AirbyteCatalog expected = getCatalog(getDefaultNamespace()); final List catalogStreams = new ArrayList<>(); @@ -438,7 +327,7 @@ void testDiscoverWithMultipleSchemas() throws Exception { void testReadSuccess() throws Exception { final List actualMessages = MoreIterators.toList( - source.read(config, getConfiguredCatalogWithOneStream(getDefaultNamespace()), null)); + source().read(config(), getConfiguredCatalogWithOneStream(getDefaultNamespace()), null)); setEmittedAtToNull(actualMessages); final List expectedMessages = getTestMessages(); @@ -449,9 +338,9 @@ void testReadSuccess() throws Exception { @Test void testReadOneColumn() throws Exception { final ConfiguredAirbyteCatalog catalog = CatalogHelpers - .createConfiguredAirbyteCatalog(streamName, getDefaultNamespace(), Field.of(COL_ID, JsonSchemaType.NUMBER)); + .createConfiguredAirbyteCatalog(streamName(), getDefaultNamespace(), Field.of(COL_ID, JsonSchemaType.NUMBER)); final List actualMessages = MoreIterators - .toList(source.read(config, catalog, null)); + .toList(source().read(config(), catalog, null)); setEmittedAtToNull(actualMessages); @@ -481,23 +370,12 @@ void testReadMultipleTables() throws Exception { final List expectedMessages = new ArrayList<>(getTestMessages()); for (int i = 2; i < 10; i++) { - final int iFinal = i; - final String streamName2 = streamName + i; - database.execute(connection -> { - connection.createStatement() - .execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME + iFinal), - "id INTEGER, name VARCHAR(200)", "")); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES (1,'picard')", - getFullyQualifiedTableName(TABLE_NAME + iFinal))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES (2, 'crusher')", - getFullyQualifiedTableName(TABLE_NAME + iFinal))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES (3, 'vash')", - getFullyQualifiedTableName(TABLE_NAME + iFinal))); - }); + final String streamName2 = streamName() + i; + final String tableName = getFullyQualifiedTableName(TABLE_NAME + i); + testdb.with(createTableQuery(tableName, "id INTEGER, name VARCHAR(200)", "")) + .with("INSERT INTO %s(id, name) VALUES (1,'picard')", tableName) + .with("INSERT INTO %s(id, name) VALUES (2, 'crusher')", tableName) + .with("INSERT INTO %s(id, name) VALUES (3, 'vash')", tableName); catalog.getStreams().add(CatalogHelpers.createConfiguredAirbyteStream( streamName2, getDefaultNamespace(), @@ -508,7 +386,7 @@ void testReadMultipleTables() throws Exception { } final List actualMessages = MoreIterators - .toList(source.read(config, catalog, null)); + .toList(source().read(config(), catalog, null)); setEmittedAtToNull(actualMessages); @@ -541,7 +419,7 @@ void testTablesWithQuoting() throws Exception { getConfiguredCatalogWithOneStream(getDefaultNamespace()).getStreams().get(0), streamForTableWithSpaces)); final List actualMessages = MoreIterators - .toList(source.read(config, catalog, null)); + .toList(source().read(config(), catalog, null)); setEmittedAtToNull(actualMessages); @@ -577,7 +455,7 @@ void testReadFailure() { .withStreams(List.of(spiedAbStream)); doCallRealMethod().doThrow(new RuntimeException()).when(spiedAbStream).getStream(); - assertThrows(RuntimeException.class, () -> source.read(config, catalog, null)); + assertThrows(RuntimeException.class, () -> source().read(config(), catalog, null)); } @Test @@ -665,6 +543,7 @@ void testIncrementalCursorChanges() throws Exception { @Test void testReadOneTableIncrementallyTwice() throws Exception { + final var config = config(); final String namespace = getDefaultNamespace(); final ConfiguredAirbyteCatalog configuredCatalog = getConfiguredCatalogWithOneStream(namespace); configuredCatalog.getStreams().forEach(airbyteStream -> { @@ -674,7 +553,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { }); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); + .toList(source().read(config, configuredCatalog, createEmptyState(streamName(), namespace))); final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() .filter(r -> r.getType() == Type.STATE).findFirst(); @@ -683,7 +562,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { executeStatementReadIncrementallyTwice(); final List actualMessagesSecondSync = MoreIterators - .toList(source.read(config, configuredCatalog, extractState(stateAfterFirstSyncOptional.get()))); + .toList(source().read(config, configuredCatalog, extractState(stateAfterFirstSyncOptional.get()))); assertEquals(2, (int) actualMessagesSecondSync.stream().filter(r -> r.getType() == Type.RECORD).count()); @@ -696,33 +575,28 @@ void testReadOneTableIncrementallyTwice() throws Exception { assertTrue(actualMessagesSecondSync.containsAll(expectedMessages)); } - protected void executeStatementReadIncrementallyTwice() throws SQLException { - database.execute(connection -> { - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (4,'riker', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (5, 'data', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - }); + protected void executeStatementReadIncrementallyTwice() { + testdb + .with("INSERT INTO %s(id, name, updated_at) VALUES (4, 'riker', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (5, 'data', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME)); } protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_5, COL_NAME, "data", COL_UPDATED_AT, "2006-10-19"))))); final DbStreamState state = new DbStreamState() - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("5") @@ -734,20 +608,12 @@ protected List getExpectedAirbyteMessagesSecondSync(final String @Test void testReadMultipleTablesIncrementally() throws Exception { final String tableName2 = TABLE_NAME + 2; - final String streamName2 = streamName + 2; - database.execute(ctx -> { - ctx.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(tableName2), "id INTEGER, name VARCHAR(200)", "")); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name) VALUES (1,'picard')", - getFullyQualifiedTableName(tableName2))); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name) VALUES (2, 'crusher')", - getFullyQualifiedTableName(tableName2))); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name) VALUES (3, 'vash')", - getFullyQualifiedTableName(tableName2))); - }); + final String streamName2 = streamName() + 2; + final String fqTableName2 = getFullyQualifiedTableName(tableName2); + testdb.with(createTableQuery(fqTableName2, "id INTEGER, name VARCHAR(200)", "")) + .with("INSERT INTO %s(id, name) VALUES (1,'picard')", fqTableName2) + .with("INSERT INTO %s(id, name) VALUES (2, 'crusher')", fqTableName2) + .with("INSERT INTO %s(id, name) VALUES (3, 'vash')", fqTableName2); final String namespace = getDefaultNamespace(); final ConfiguredAirbyteCatalog configuredCatalog = getConfiguredCatalogWithOneStream( @@ -764,7 +630,7 @@ void testReadMultipleTablesIncrementally() throws Exception { }); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); + .toList(source().read(config(), configuredCatalog, createEmptyState(streamName(), namespace))); // get last state message. final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() @@ -779,7 +645,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Represents the state after the first stream has been updated final List expectedStateStreams1 = List.of( new DbStreamState() - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("3") @@ -792,7 +658,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Represents the state after both streams have been updated final List expectedStateStreams2 = List.of( new DbStreamState() - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("3") @@ -843,17 +709,14 @@ protected void incrementalCursorCheck( // See https://github.com/airbytehq/airbyte/issues/14732 for rationale and details. @Test public void testIncrementalWithConcurrentInsertion() throws Exception { - final String driverName = getDriverClass().toLowerCase(); final String namespace = getDefaultNamespace(); final String fullyQualifiedTableName = getFullyQualifiedTableName(TABLE_NAME_AND_TIMESTAMP); final String columnDefinition = String.format("name VARCHAR(200) NOT NULL, %s %s NOT NULL", COL_TIMESTAMP, COL_TIMESTAMP_TYPE); // 1st sync - database.execute(ctx -> { - ctx.createStatement().execute(createTableQuery(fullyQualifiedTableName, columnDefinition, "")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "a", "2021-01-01 00:00:00")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "b", "2021-01-01 00:00:00")); - }); + testdb.with(createTableQuery(fullyQualifiedTableName, columnDefinition, "")) + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "a", "2021-01-01 00:00:00") + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "b", "2021-01-01 00:00:00"); final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( new AirbyteCatalog().withStreams(List.of( @@ -870,7 +733,7 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { }); final List firstSyncActualMessages = MoreIterators.toList( - source.read(config, configuredCatalog, createEmptyState(TABLE_NAME_AND_TIMESTAMP, namespace))); + source().read(config(), configuredCatalog, createEmptyState(TABLE_NAME_AND_TIMESTAMP, namespace))); // cursor after 1st sync: 2021-01-01 00:00:00, count 2 final Optional firstSyncStateOptional = firstSyncActualMessages.stream().filter(r -> r.getType() == Type.STATE).findFirst(); @@ -886,19 +749,17 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { .map(r -> r.getRecord().getData().get(COL_NAME).asText()) .toList(); // teradata doesn't make insertion order guarantee when equal ordering value - if (driverName.contains("teradata")) { + if (testdb.getDatabaseDriver().equals(DatabaseDriver.TERADATA)) { assertThat(List.of("a", "b"), Matchers.containsInAnyOrder(firstSyncNames.toArray())); } else { assertEquals(List.of("a", "b"), firstSyncNames); } // 2nd sync - database.execute(ctx -> { - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "c", "2021-01-02 00:00:00")); - }); + testdb.with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "c", "2021-01-02 00:00:00"); final List secondSyncActualMessages = MoreIterators.toList( - source.read(config, configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, firstSyncState))); + source().read(config(), configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, firstSyncState))); // cursor after 2nd sync: 2021-01-02 00:00:00, count 1 final Optional secondSyncStateOptional = secondSyncActualMessages.stream().filter(r -> r.getType() == Type.STATE).findFirst(); @@ -916,14 +777,12 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { assertEquals(List.of("c"), secondSyncNames); // 3rd sync has records with duplicated cursors - database.execute(ctx -> { - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "d", "2021-01-02 00:00:00")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "e", "2021-01-02 00:00:00")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "f", "2021-01-03 00:00:00")); - }); + testdb.with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "d", "2021-01-02 00:00:00") + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "e", "2021-01-02 00:00:00") + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "f", "2021-01-03 00:00:00"); final List thirdSyncActualMessages = MoreIterators.toList( - source.read(config, configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, secondSyncState))); + source().read(config(), configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, secondSyncState))); // Cursor after 3rd sync is: 2021-01-03 00:00:00, count 1. final Optional thirdSyncStateOptional = thirdSyncActualMessages.stream().filter(r -> r.getType() == Type.STATE).findFirst(); @@ -942,12 +801,11 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { .toList(); // teradata doesn't make insertion order guarantee when equal ordering value - if (driverName.contains("teradata")) { + if (testdb.getDatabaseDriver().equals(DatabaseDriver.TERADATA)) { assertThat(List.of("c", "d", "e", "f"), Matchers.containsInAnyOrder(thirdSyncExpectedNames.toArray())); } else { assertEquals(List.of("c", "d", "e", "f"), thirdSyncExpectedNames); } - } protected JsonNode getStateData(final AirbyteMessage airbyteMessage, final String streamName) { @@ -989,7 +847,7 @@ protected void incrementalCursorCheck( final DbStreamState dbStreamState = buildStreamState(airbyteStream, initialCursorField, initialCursorValue); final List actualMessages = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(createState(List.of(dbStreamState))))); + .toList(source().read(config(), configuredCatalog, Jsons.jsonNode(createState(List.of(dbStreamState))))); setEmittedAtToNull(actualMessages); @@ -1019,7 +877,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalogWithOneStream(final Strin final ConfiguredAirbyteCatalog catalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog(defaultNamespace)); // Filter to only keep the main stream name as configured stream catalog.withStreams( - catalog.getStreams().stream().filter(s -> s.getStream().getName().equals(streamName)) + catalog.getStreams().stream().filter(s -> s.getStream().getName().equals(streamName())) .collect(Collectors.toList())); return catalog; } @@ -1056,20 +914,20 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { protected List getTestMessages() { return List.of( new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(getDefaultNamespace()) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_1, COL_NAME, "picard", COL_UPDATED_AT, "2004-10-19")))), new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(getDefaultNamespace()) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_2, COL_NAME, "crusher", COL_UPDATED_AT, "2005-10-19")))), new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(getDefaultNamespace()) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_3, COL_NAME, "vash", @@ -1108,7 +966,7 @@ protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { final String tableNameWithSpaces = TABLE_NAME_WITH_SPACES + "2"; final String streamName2 = tableNameWithSpaces; - database.execute(connection -> { + try (final var connection = testdb.getDataSource().getConnection()) { final String identifierQuoteString = connection.getMetaData().getIdentifierQuoteString(); connection.createStatement() .execute( @@ -1132,7 +990,7 @@ protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { getFullyQualifiedTableName( enquoteIdentifier(tableNameWithSpaces, identifierQuoteString)), enquoteIdentifier(COL_LAST_NAME_WITH_SPACE, identifierQuoteString))); - }); + } return CatalogHelpers.createConfiguredAirbyteStream( streamName2, @@ -1145,32 +1003,27 @@ public String getFullyQualifiedTableName(final String tableName) { return RelationalDbQueryUtils.getFullyQualifiedTableName(getDefaultSchemaName(), tableName); } - public void createSchemas() throws SQLException { + protected void createSchemas() { if (supportsSchemas()) { for (final String schemaName : TEST_SCHEMAS) { - final String createSchemaQuery = String.format("CREATE SCHEMA %s;", schemaName); - database.execute(connection -> connection.createStatement().execute(createSchemaQuery)); + testdb.with("CREATE SCHEMA %s;", schemaName); } } } - public void dropSchemas() throws SQLException { + protected void dropSchemas() { if (supportsSchemas()) { for (final String schemaName : TEST_SCHEMAS) { - final String dropSchemaQuery = String - .format(DROP_SCHEMA_QUERY, schemaName); - database.execute(connection -> connection.createStatement().execute(dropSchemaQuery)); + testdb.with(DROP_SCHEMA_QUERY, schemaName); } } } private JsonNode convertIdBasedOnDatabase(final int idValue) { - final var driverClass = getDriverClass().toLowerCase(); - if (driverClass.contains("oracle") || driverClass.contains("snowflake")) { - return Jsons.jsonNode(BigDecimal.valueOf(idValue)); - } else { - return Jsons.jsonNode(idValue); - } + return switch (testdb.getDatabaseDriver()) { + case ORACLE, SNOWFLAKE -> Jsons.jsonNode(BigDecimal.valueOf(idValue)); + default -> Jsons.jsonNode(idValue); + }; } private String getDefaultSchemaName() { @@ -1178,13 +1031,11 @@ private String getDefaultSchemaName() { } protected String getDefaultNamespace() { - // mysql does not support schemas. it namespaces using database names instead. - if (getDriverClass().toLowerCase().contains("mysql") || getDriverClass().toLowerCase().contains("clickhouse") || - getDriverClass().toLowerCase().contains("teradata")) { - return config.get(JdbcUtils.DATABASE_KEY).asText(); - } else { - return SCHEMA_NAME; - } + return switch (testdb.getDatabaseDriver()) { + // mysql does not support schemas, it namespaces using database names instead. + case MYSQL, CLICKHOUSE, TERADATA -> testdb.getDatabaseName(); + default -> SCHEMA_NAME; + }; } protected static void setEmittedAtToNull(final Iterable messages) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java new file mode 100644 index 000000000000..4735716dc05e --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.testutils; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.JdbcDatabaseContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.utility.DockerImageName; + +/** + * ContainerFactory is the companion interface to {@link TestDatabase} for providing it with + * suitable testcontainer instances. + */ +public interface ContainerFactory> { + + /** + * Creates a new, unshared testcontainer instance. This usually wraps the default constructor for + * the testcontainer type. + */ + C createNewContainer(DockerImageName imageName); + + /** + * Returns the class object of the testcontainer. + */ + Class getContainerClass(); + + /** + * Returns a shared instance of the testcontainer. + */ + default C shared(String imageName, String... methods) { + final String mapKey = Stream.concat( + Stream.of(imageName, this.getClass().getCanonicalName()), + Stream.of(methods)) + .collect(Collectors.joining("+")); + return Singleton.getOrCreate(mapKey, this); + } + + /** + * This class is exclusively used by {@link #shared(String, String...)}. It wraps a specific shared + * testcontainer instance, which is created exactly once. + */ + class Singleton> { + + static private final Logger LOGGER = LoggerFactory.getLogger(Singleton.class); + static private final ConcurrentHashMap> LAZY = new ConcurrentHashMap<>(); + + @SuppressWarnings("unchecked") + static private > C getOrCreate(String mapKey, ContainerFactory factory) { + final Singleton singleton = LAZY.computeIfAbsent(mapKey, Singleton::new); + return ((Singleton) singleton).getOrCreate(factory); + } + + final private String imageName; + final private List methodNames; + + private C sharedContainer; + private RuntimeException containerCreationError; + + private Singleton(String imageNamePlusMethods) { + final String[] parts = imageNamePlusMethods.split("\\+"); + this.imageName = parts[0]; + this.methodNames = Arrays.stream(parts).skip(2).toList(); + } + + private synchronized C getOrCreate(ContainerFactory factory) { + if (sharedContainer == null && containerCreationError == null) { + try { + create(imageName, factory, methodNames); + } catch (RuntimeException e) { + sharedContainer = null; + containerCreationError = e; + } + } + if (containerCreationError != null) { + throw new RuntimeException( + "Error during container creation for imageName=" + imageName + + ", factory=" + factory.getClass().getName() + + ", methods=" + methodNames, + containerCreationError); + } + return sharedContainer; + } + + private void create(String imageName, ContainerFactory factory, List methodNames) { + LOGGER.info("Creating new shared container based on {} with {}.", imageName, methodNames); + try { + final var parsed = DockerImageName.parse(imageName); + final var methods = new ArrayList(); + for (String methodName : methodNames) { + methods.add(factory.getClass().getMethod(methodName, factory.getContainerClass())); + } + sharedContainer = factory.createNewContainer(parsed); + sharedContainer.withLogConsumer(new Slf4jLogConsumer(LOGGER)); + for (Method method : methods) { + LOGGER.info("Calling {} in {} on new shared container based on {}.", + method.getName(), factory.getClass().getName(), imageName); + method.invoke(factory, sharedContainer); + } + sharedContainer.start(); + } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + } + +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java deleted file mode 100644 index b52752f38b64..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.testutils; - -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.PostgresUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.string.Strings; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testcontainers.containers.Network; -import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.utility.DockerImageName; -import org.testcontainers.utility.MountableFile; - -/** - * {@link PostgresTestDatabase} is a convenience object which allows for efficient use of - * {@link PostgreSQLContainer} instances in tests. Each test container is shared throughout the - * whole JVM. Isolation is performed by creating a new database and a new user for each - * {@link PostgresTestDatabase} instance. These are dropped when the instance is closed. - */ -public class PostgresTestDatabase implements AutoCloseable { - - static private final Logger LOGGER = LoggerFactory.getLogger(PostgresTestDatabase.class); - - /** - * Create a new {@link PostgresTestDatabase} instance. - * - * @param imageName base image to use for the underlying {@link PostgreSQLContainer}. - * @param methods {@link ContainerFactory} methods that need to be called. - * @return a new {@link PostgresTestDatabase} instance which may reuse a shared - * {@link PostgreSQLContainer}. - */ - static public PostgresTestDatabase make(String imageName, String... methods) { - final String imageNamePlusMethods = Stream.concat( - Stream.of(imageName), - Stream.of(methods)) - .collect(Collectors.joining("+")); - final ContainerFactory factory = ContainerFactory.LAZY.computeIfAbsent(imageNamePlusMethods, ContainerFactory::new); - return new PostgresTestDatabase(factory.getOrCreateSharedContainer()); - } - - private PostgresTestDatabase(PostgreSQLContainer sharedContainer) { - this.container = sharedContainer; - this.suffix = Strings.addRandomSuffix("", "_", 10); - this.dbName = "db" + suffix; - this.userName = "test_user" + suffix; - this.password = "test_password" + suffix; - execSQL( - String.format("CREATE DATABASE %s", dbName), - String.format("CREATE USER %s PASSWORD '%s'", userName, password), - String.format("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", dbName, userName), - String.format("ALTER USER %s WITH SUPERUSER", userName)); - - this.jdbcUrl = String.format( - DatabaseDriver.POSTGRESQL.getUrlFormatString(), - sharedContainer.getHost(), - sharedContainer.getFirstMappedPort(), - dbName); - this.dslContext = DSLContextFactory.create( - userName, - password, - DatabaseDriver.POSTGRESQL.getDriverClassName(), - jdbcUrl, - SQLDialect.POSTGRES); - this.database = new Database(dslContext); - } - - public final PostgreSQLContainer container; - public final String suffix, dbName, userName, password, jdbcUrl; - public final DSLContext dslContext; - public final Database database; - - /** - * Convenience method for building identifiers which are unique to this instance. - */ - public String withSuffix(String str) { - return str + suffix; - } - - /** - * Convenience method for initializing a config builder for use in integration tests. - */ - public ImmutableMap.Builder makeConfigBuilder() { - return ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, userName) - .put(JdbcUtils.PASSWORD_KEY, password); - } - - /** - * @return the {@link PostgresUtils.Certificate} for this instance; requires - * {@link ContainerFactory#withCert} call. - */ - public PostgresUtils.Certificate getCertificate() { - final String caCert, clientKey, clientCert; - try { - caCert = container.execInContainer("su", "-c", "cat ca.crt").getStdout().trim(); - clientKey = container.execInContainer("su", "-c", "cat client.key").getStdout().trim(); - clientCert = container.execInContainer("su", "-c", "cat client.crt").getStdout().trim(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - return new PostgresUtils.Certificate(caCert, clientCert, clientKey); - } - - private void execSQL(String... stmts) { - final List cmd = Stream.concat( - Stream.of("psql", "-a", "-d", container.getDatabaseName(), "-U", container.getUsername()), - Stream.of(stmts).flatMap(stmt -> Stream.of("-c", stmt))) - .toList(); - try { - LOGGER.debug("executing {}", Strings.join(cmd, " ")); - final var exec = container.execInContainer(cmd.toArray(new String[0])); - LOGGER.debug("exit code: {}\nstdout:\n{}\nstderr:\n{}", exec.getExitCode(), exec.getStdout(), exec.getStderr()); - } catch (IOException e) { - throw new UncheckedIOException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - /** - * Drop the database owned by this instance. - */ - public void dropDatabase() { - execSQL(String.format("DROP DATABASE %s", dbName)); - } - - /** - * Close resources held by this instance. This deliberately avoids dropping the database, which is - * really expensive in Postgres. This is because a DROP DATABASE in Postgres triggers a CHECKPOINT. - * Call {@link #dropDatabase} to explicitly drop the database. - */ - @Override - public void close() { - dslContext.close(); - execSQL(String.format("DROP USER %s", userName)); - } - - static private class ContainerFactory { - - static private final Logger LOGGER = LoggerFactory.getLogger(ContainerFactory.class); - static private final ConcurrentHashMap LAZY = new ConcurrentHashMap<>(); - - final private String imageName; - final private List methods; - private PostgreSQLContainer sharedContainer; - private RuntimeException containerCreationError; - - private ContainerFactory(String imageNamePlusMethods) { - final String[] parts = imageNamePlusMethods.split("\\+"); - this.imageName = parts[0]; - this.methods = Arrays.stream(parts).skip(1).map(methodName -> { - try { - return ContainerFactory.class.getMethod(methodName); - } catch (NoSuchMethodException e) { - throw new RuntimeException(e); - } - }).toList(); - } - - private synchronized PostgreSQLContainer getOrCreateSharedContainer() { - if (sharedContainer == null) { - if (containerCreationError != null) { - throw new RuntimeException( - "Error during container creation for imageName=" + imageName + ", methods=" + methods.stream().map(Method::getName).toList(), - containerCreationError); - } - LOGGER.info("Creating new shared container based on {} with {}.", imageName, methods.stream().map(Method::getName).toList()); - try { - final var parsed = DockerImageName.parse(imageName).asCompatibleSubstituteFor("postgres"); - sharedContainer = new PostgreSQLContainer<>(parsed); - for (Method method : methods) { - LOGGER.info("Calling {} on new shared container based on {}.", method.getName(), - imageName); - method.invoke(this); - } - sharedContainer.start(); - } catch (IllegalAccessException | InvocationTargetException e) { - containerCreationError = new RuntimeException(e); - this.sharedContainer = null; - throw containerCreationError; - } catch (RuntimeException e) { - this.sharedContainer = null; - containerCreationError = e; - throw e; - } - } - return sharedContainer; - } - - /** - * Apply the postgresql.conf file that we've packaged as a resource. - */ - public void withConf() { - sharedContainer - .withCopyFileToContainer( - MountableFile.forClasspathResource("postgresql.conf"), - "/etc/postgresql/postgresql.conf") - .withCommand("postgres -c config_file=/etc/postgresql/postgresql.conf"); - } - - /** - * Create a new network and bind it to the container. - */ - public void withNetwork() { - sharedContainer.withNetwork(Network.newNetwork()); - } - - /** - * Configure postgres with wal_level=logical. - */ - public void withWalLevelLogical() { - sharedContainer.withCommand("postgres -c wal_level=logical"); - } - - /** - * Generate SSL certificates and tell postgres to enable SSL and use them. - */ - public void withCert() { - sharedContainer.start(); - String[] commands = { - "psql -U test -c \"CREATE USER postgres WITH PASSWORD 'postgres';\"", - "psql -U test -c \"GRANT CONNECT ON DATABASE \"test\" TO postgres;\"", - "psql -U test -c \"ALTER USER postgres WITH SUPERUSER;\"", - "openssl ecparam -name prime256v1 -genkey -noout -out ca.key", - "openssl req -new -x509 -sha256 -key ca.key -out ca.crt -subj \"/CN=127.0.0.1\"", - "openssl ecparam -name prime256v1 -genkey -noout -out server.key", - "openssl req -new -sha256 -key server.key -out server.csr -subj \"/CN=localhost\"", - "openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256", - "cp server.key /etc/ssl/private/", - "cp server.crt /etc/ssl/private/", - "cp ca.crt /etc/ssl/private/", - "chmod og-rwx /etc/ssl/private/server.* /etc/ssl/private/ca.*", - "chown postgres:postgres /etc/ssl/private/server.crt /etc/ssl/private/server.key /etc/ssl/private/ca.crt", - "echo \"ssl = on\" >> /var/lib/postgresql/data/postgresql.conf", - "echo \"ssl_cert_file = '/etc/ssl/private/server.crt'\" >> /var/lib/postgresql/data/postgresql.conf", - "echo \"ssl_key_file = '/etc/ssl/private/server.key'\" >> /var/lib/postgresql/data/postgresql.conf", - "echo \"ssl_ca_file = '/etc/ssl/private/ca.crt'\" >> /var/lib/postgresql/data/postgresql.conf", - "mkdir root/.postgresql", - "echo \"hostssl all all 127.0.0.1/32 cert clientcert=verify-full\" >> /var/lib/postgresql/data/pg_hba.conf", - "openssl ecparam -name prime256v1 -genkey -noout -out client.key", - "openssl req -new -sha256 -key client.key -out client.csr -subj \"/CN=postgres\"", - "openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256", - "cp client.crt ~/.postgresql/postgresql.crt", - "cp client.key ~/.postgresql/postgresql.key", - "chmod 0600 ~/.postgresql/postgresql.crt ~/.postgresql/postgresql.key", - "cp ca.crt root/.postgresql/ca.crt", - "chown postgres:postgres ~/.postgresql/ca.crt", - "psql -U test -c \"SELECT pg_reload_conf();\"", - }; - for (String cmd : commands) { - try { - sharedContainer.execInContainer("su", "-c", cmd); - } catch (IOException e) { - throw new UncheckedIOException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - } - - /** - * Tell postgres to enable SSL. - */ - public void withSSL() { - sharedContainer.withCommand("postgres " + - "-c ssl=on " + - "-c ssl_cert_file=/var/lib/postgresql/server.crt " + - "-c ssl_key_file=/var/lib/postgresql/server.key"); - } - - /** - * Configure postgres with client_encoding=sql_ascii. - */ - public void withASCII() { - sharedContainer.withCommand("postgres -c client_encoding=sql_ascii"); - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java new file mode 100644 index 000000000000..6a5d80104718 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.testutils; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.ContextQueryFunction; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.string.Strings; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.sql.SQLException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import javax.sql.DataSource; +import org.jooq.DSLContext; +import org.jooq.SQLDialect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.JdbcDatabaseContainer; + +/** + * TestDatabase provides a convenient pattern for interacting with databases when testing SQL + * database sources. The basic idea is to share the same database testcontainer instance for all + * tests and to use SQL constructs such as DATABASE and USER to isolate each test case's state. + * + * @param the type of the backing testcontainer. + * @param itself + * @param the type of the object returned by {@link #configBuilder()} + */ +abstract public class TestDatabase, T extends TestDatabase, B extends TestDatabase.ConfigBuilder> + implements AutoCloseable { + + static private final Logger LOGGER = LoggerFactory.getLogger(TestDatabase.class); + + final private C container; + final private String suffix; + final private ArrayList cleanupSQL = new ArrayList<>(); + final private Map connectionProperties = new HashMap<>(); + + private DataSource dataSource; + private DSLContext dslContext; + + protected TestDatabase(C container) { + this.container = container; + this.suffix = Strings.addRandomSuffix("", "_", 10); + } + + @SuppressWarnings("unchecked") + protected T self() { + return (T) this; + } + + /** + * Adds a key-value pair to the JDBC URL's query parameters. + */ + public T withConnectionProperty(String key, String value) { + if (isInitialized()) { + throw new RuntimeException("TestDatabase instance is already initialized"); + } + connectionProperties.put(key, value); + return self(); + } + + /** + * Enqueues a SQL statement to be executed when this object is closed. + */ + public T onClose(String fmtSql, Object... fmtArgs) { + cleanupSQL.add(String.format(fmtSql, fmtArgs)); + return self(); + } + + /** + * Executes a SQL statement after calling String.format on the arguments. + */ + public T with(String fmtSql, Object... fmtArgs) { + execSQL(Stream.of(String.format(fmtSql, fmtArgs))); + return self(); + } + + /** + * Executes SQL statements as root to provide the necessary isolation for the lifetime of this + * object. This typically entails at least a CREATE DATABASE and a CREATE USER. Also Initializes the + * {@link DataSource} and {@link DSLContext} owned by this object. + */ + final public T initialized() { + inContainerBootstrapCmd().forEach(this::execInContainer); + this.dataSource = DataSourceFactory.create( + getUserName(), + getPassword(), + getDatabaseDriver().getDriverClassName(), + getJdbcUrl(), + connectionProperties); + this.dslContext = DSLContextFactory.create(dataSource, getSqlDialect()); + return self(); + } + + final public boolean isInitialized() { + return dslContext != null; + } + + abstract protected Stream> inContainerBootstrapCmd(); + + abstract protected Stream inContainerUndoBootstrapCmd(); + + abstract public DatabaseDriver getDatabaseDriver(); + + abstract public SQLDialect getSqlDialect(); + + final public C getContainer() { + return container; + } + + public String withNamespace(String name) { + return name + suffix; + } + + public String getDatabaseName() { + return withNamespace("db"); + } + + public String getUserName() { + return withNamespace("user"); + } + + public String getPassword() { + return "password"; + } + + public DataSource getDataSource() { + if (!isInitialized()) { + throw new RuntimeException("TestDatabase instance is not yet initialized"); + } + return dataSource; + } + + final public DSLContext getDslContext() { + if (!isInitialized()) { + throw new RuntimeException("TestDatabase instance is not yet initialized"); + } + return dslContext; + } + + public String getJdbcUrl() { + return String.format( + getDatabaseDriver().getUrlFormatString(), + getContainer().getHost(), + getContainer().getFirstMappedPort(), + getDatabaseName()); + } + + public Database getDatabase() { + return new Database(getDslContext()); + } + + protected void execSQL(Stream sql) { + try { + getDatabase().query(ctx -> { + sql.forEach(ctx::execute); + return null; + }); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + protected void execInContainer(Stream cmds) { + final List cmd = cmds.toList(); + if (cmd.isEmpty()) { + return; + } + try { + LOGGER.debug("executing {}", Strings.join(cmd, " ")); + final var exec = getContainer().execInContainer(cmd.toArray(new String[0])); + if (exec.getExitCode() == 0) { + LOGGER.debug("execution success\nstdout:\n{}\nstderr:\n{}", exec.getStdout(), exec.getStderr()); + } else { + LOGGER.error("execution failure, code {}\nstdout:\n{}\nstderr:\n{}", exec.getExitCode(), exec.getStdout(), exec.getStderr()); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + public X query(final ContextQueryFunction transform) throws SQLException { + return getDatabase().query(transform); + } + + public X transaction(final ContextQueryFunction transform) throws SQLException { + return getDatabase().transaction(transform); + } + + /** + * Returns a builder for the connector config object. + */ + public B configBuilder() { + return new ConfigBuilder(self()).self(); + } + + public B testConfigBuilder() { + return configBuilder() + .withHostAndPort() + .withCredentials() + .withDatabase(); + } + + public B integrationTestConfigBuilder() { + return configBuilder() + .withResolvedHostAndPort() + .withCredentials() + .withDatabase(); + } + + @Override + public void close() { + execSQL(this.cleanupSQL.stream()); + dslContext.close(); + execInContainer(inContainerUndoBootstrapCmd()); + } + + static public class ConfigBuilder, B extends ConfigBuilder> { + + static public final Duration DEFAULT_CDC_REPLICATION_INITIAL_WAIT = Duration.ofSeconds(5); + + protected final ImmutableMap.Builder builder = ImmutableMap.builder(); + protected final T testDatabase; + + protected ConfigBuilder(T testDatabase) { + this.testDatabase = testDatabase; + } + + public JsonNode build() { + return Jsons.jsonNode(builder.build()); + } + + @SuppressWarnings("unchecked") + final protected B self() { + return (B) this; + } + + public B with(Object key, Object value) { + builder.put(key, value); + return self(); + } + + public B withDatabase() { + return this + .with(JdbcUtils.DATABASE_KEY, testDatabase.getDatabaseName()); + } + + public B withCredentials() { + return this + .with(JdbcUtils.USERNAME_KEY, testDatabase.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, testDatabase.getPassword()); + } + + public B withResolvedHostAndPort() { + return this + .with(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(testDatabase.getContainer())) + .with(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(testDatabase.getContainer())); + } + + public B withHostAndPort() { + return this + .with(JdbcUtils.HOST_KEY, testDatabase.getContainer().getHost()) + .with(JdbcUtils.PORT_KEY, testDatabase.getContainer().getFirstMappedPort()); + } + + public B withoutSsl() { + return with(JdbcUtils.SSL_KEY, false); + } + + public B withSsl(Map sslMode) { + return with(JdbcUtils.SSL_KEY, true).with(JdbcUtils.SSL_MODE_KEY, sslMode); + } + + } + +} diff --git a/airbyte-cdk/python/.bumpversion.cfg b/airbyte-cdk/python/.bumpversion.cfg index 08bf1ea465db..c1c6f6b90cc0 100644 --- a/airbyte-cdk/python/.bumpversion.cfg +++ b/airbyte-cdk/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.53.7 +current_version = 0.53.9 commit = False [bumpversion:file:setup.py] diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 8778ed30577b..591ed0c349e0 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 0.53.9 +Fix of generate the error message using _try_get_error based on list of errors + +## 0.53.8 +Vector DB CDK: Remove CDC records, File CDK: Update unstructured parser + ## 0.53.7 low-code: fix debug logging when using --debug flag diff --git a/airbyte-cdk/python/Dockerfile b/airbyte-cdk/python/Dockerfile index fdd41a9820ee..6942df8c0eea 100644 --- a/airbyte-cdk/python/Dockerfile +++ b/airbyte-cdk/python/Dockerfile @@ -10,7 +10,7 @@ RUN apk --no-cache upgrade \ && apk --no-cache add tzdata build-base # install airbyte-cdk -RUN pip install --prefix=/install airbyte-cdk==0.53.7 +RUN pip install --prefix=/install airbyte-cdk==0.53.9 # build a clean environment FROM base @@ -32,5 +32,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] # needs to be the same as CDK -LABEL io.airbyte.version=0.53.7 +LABEL io.airbyte.version=0.53.9 LABEL io.airbyte.name=airbyte/source-declarative-manifest diff --git a/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py b/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py index 7d7d174baee9..3ed3e3511dd1 100644 --- a/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py +++ b/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py @@ -19,6 +19,8 @@ METADATA_STREAM_FIELD = "_ab_stream" METADATA_RECORD_ID_FIELD = "_ab_record_id" +CDC_DELETED_FIELD = "_ab_cdc_deleted_at" + @dataclass class Chunk: @@ -103,6 +105,8 @@ def process(self, record: AirbyteRecordMessage) -> Tuple[List[Chunk], Optional[s :param records: List of AirbyteRecordMessages :return: Tuple of (List of document chunks, record id to delete if a stream is in dedup mode to avoid stale documents in the vector store) """ + if CDC_DELETED_FIELD in record.data and record.data[CDC_DELETED_FIELD]: + return [], self._extract_primary_key(record) doc = self._generate_document(record) if doc is None: text_fields = ", ".join(self.text_fields) if self.text_fields else "all fields" @@ -139,22 +143,27 @@ def _extract_relevant_fields(self, record: AirbyteRecordMessage, fields: Optiona def _extract_metadata(self, record: AirbyteRecordMessage) -> Dict[str, Any]: metadata = self._extract_relevant_fields(record, self.metadata_fields) + metadata[METADATA_STREAM_FIELD] = create_stream_identifier(record) + primary_key = self._extract_primary_key(record) + if primary_key: + metadata[METADATA_RECORD_ID_FIELD] = primary_key + return metadata + + def _extract_primary_key(self, record: AirbyteRecordMessage) -> Optional[str]: stream_identifier = create_stream_identifier(record) current_stream: ConfiguredAirbyteStream = self.streams[stream_identifier] - metadata[METADATA_STREAM_FIELD] = stream_identifier # if the sync mode is deduping, use the primary key to upsert existing records instead of appending new ones - if current_stream.primary_key and current_stream.destination_sync_mode == DestinationSyncMode.append_dedup: - metadata[METADATA_RECORD_ID_FIELD] = f"{stream_identifier}_{self._extract_primary_key(record, current_stream)}" - return metadata + if not current_stream.primary_key or current_stream.destination_sync_mode != DestinationSyncMode.append_dedup: + return None - def _extract_primary_key(self, record: AirbyteRecordMessage, stream: ConfiguredAirbyteStream) -> str: primary_key = [] - for key in stream.primary_key: + for key in current_stream.primary_key: try: primary_key.append(str(dpath.util.get(record.data, key))) except KeyError: primary_key.append("__not_found__") - return "_".join(primary_key) + stringified_primary_key = "_".join(primary_key) + return f"{stream_identifier}_{stringified_primary_key}" def _split_document(self, doc: Document) -> List[Document]: chunks: List[Document] = self.splitter.split_documents([doc]) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 445325f77b2d..a3a8ec657090 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -538,7 +538,8 @@ def _try_get_error(value: Any) -> Any: if isinstance(value, str): return value elif isinstance(value, list): - return ", ".join(_try_get_error(v) for v in value) + error_list = [_try_get_error(v) for v in value] + return ", ".join(v for v in error_list if v is not None) elif isinstance(value, dict): new_value = ( value.get("message") @@ -547,6 +548,8 @@ def _try_get_error(value: Any) -> Any: or value.get("errors") or value.get("failures") or value.get("failure") + or value.get("details") + or value.get("detail") ) return _try_get_error(new_value) return None diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py index 54d0ab9d7c73..b91be567f9b3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py @@ -115,9 +115,9 @@ def _read_file(self, file_handle: IOBase, remote_file: RemoteFile, format: Unstr if filetype == FileType.PDF: # for PDF, read the file into a BytesIO object because some code paths in pdf parsing are doing an instance check on the file object and don't work with file-like objects file_handle.seek(0) - file = BytesIO(file_handle.read()) - file_handle.seek(0) - elements = unstructured_partition_pdf(file=file) + with BytesIO(file_handle.read()) as file: + file_handle.seek(0) + elements = unstructured_partition_pdf(file=file) elif filetype == FileType.DOCX: elements = unstructured_partition_docx(file=file) elif filetype == FileType.PPTX: diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 9949399cfcca..bfb4ec5d274c 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -23,8 +23,8 @@ tiktoken_dependency = "tiktoken==0.4.0" unstructured_dependencies = [ - "unstructured==0.10.19", - "unstructured[docx,pptx]==0.10.19", + "unstructured==0.10.27", # can't be bumped higher due to transitive dependencies we can't provide + "unstructured[docx,pptx]==0.10.27", "pdf2image==1.16.3", "pdfminer.six==20221105", "unstructured.pytesseract>=0.3.12", @@ -36,7 +36,7 @@ name="airbyte-cdk", # The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be # updated if our semver format changes such as using release candidate versions. - version="0.53.7", + version="0.53.9", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", diff --git a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py index 59f5f8c011bb..2660ee791512 100644 --- a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py +++ b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py @@ -23,13 +23,22 @@ def initialize_processor(config=ProcessingConfigModel(chunk_size=48, chunk_overl catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( - stream=AirbyteStream(name="stream1", json_schema={}, namespace="namespace1", supported_sync_modes=[SyncMode.full_refresh]), + stream=AirbyteStream( + name="stream1", + json_schema={}, + namespace="namespace1", + supported_sync_modes=[SyncMode.full_refresh], + ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.overwrite, primary_key=[["id"]], ), ConfiguredAirbyteStream( - stream=AirbyteStream(name="stream2", json_schema={}, supported_sync_modes=[SyncMode.full_refresh]), + stream=AirbyteStream( + name="stream2", + json_schema={}, + supported_sync_modes=[SyncMode.full_refresh], + ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.overwrite, ), @@ -53,8 +62,14 @@ def initialize_processor(config=ProcessingConfigModel(chunk_size=48, chunk_overl ), (["id"], {"_ab_stream": "namespace1_stream1", "id": 1}), (["id", "non_existing"], {"_ab_stream": "namespace1_stream1", "id": 1}), - (["id", "complex.test"], {"_ab_stream": "namespace1_stream1", "id": 1, "complex.test": "abc"}), - (["id", "arr.*.test"], {"_ab_stream": "namespace1_stream1", "id": 1, "arr.*.test": ["abc", "def"]}), + ( + ["id", "complex.test"], + {"_ab_stream": "namespace1_stream1", "id": 1, "complex.test": "abc"}, + ), + ( + ["id", "arr.*.test"], + {"_ab_stream": "namespace1_stream1", "id": 1, "arr.*.test": ["abc", "def"]}, + ), ], ) def test_process_single_chunk_with_metadata(metadata_fields, expected_metadata): @@ -82,7 +97,7 @@ def test_process_single_chunk_with_metadata(metadata_fields, expected_metadata): assert id_to_delete is None -def test_process_single_chunk_limit4ed_metadata(): +def test_process_single_chunk_limited_metadata(): processor = initialize_processor() record = AirbyteRecordMessage( @@ -112,7 +127,11 @@ def test_process_single_chunk_without_namespace(): catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( - stream=AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.full_refresh]), + stream=AirbyteStream( + name="stream1", + json_schema={}, + supported_sync_modes=[SyncMode.full_refresh], + ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.overwrite, ), @@ -155,7 +174,12 @@ def test_complex_text_fields(): emitted_at=1234, ) - processor.text_fields = ["nested.texts.*.text", "text", "other_nested.non_text", "non.*.existing"] + processor.text_fields = [ + "nested.texts.*.text", + "text", + "other_nested.non_text", + "non.*.existing", + ] processor.metadata_fields = ["non_text", "non_text_2", "id"] chunks, _ = processor.process(record) @@ -169,7 +193,12 @@ def test_complex_text_fields(): other_nested.non_text: \na: xyz b: abc""" ) - assert chunks[0].metadata == {"id": 1, "non_text": "a", "non_text_2": 1, "_ab_stream": "namespace1_stream1"} + assert chunks[0].metadata == { + "id": 1, + "non_text": "a", + "non_text_2": 1, + "_ab_stream": "namespace1_stream1", + } def test_no_text_fields(): @@ -228,7 +257,11 @@ def test_process_multiple_chunks_with_relevant_fields(): 10, 0, None, - ["text: By default, splits are done", "on multi newlines,", "then single newlines, then spaces"], + [ + "text: By default, splits are done", + "on multi newlines,", + "then single newlines, then spaces", + ], ), ( "Overlap splitting", @@ -346,7 +379,11 @@ def test_process_multiple_chunks_with_relevant_fields(): def test_text_splitters(label, text, chunk_size, chunk_overlap, splitter_config, expected_chunks): processor = initialize_processor( ProcessingConfigModel( - chunk_size=chunk_size, chunk_overlap=chunk_overlap, text_fields=["text"], metadata_fields=None, text_splitter=splitter_config + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + text_fields=["text"], + metadata_fields=None, + text_splitter=splitter_config, ) ) @@ -378,16 +415,42 @@ def test_text_splitters(label, text, chunk_size, chunk_overlap, splitter_config, @pytest.mark.parametrize( "label, split_config, has_error_message", [ - ("Invalid separator", SeparatorSplitterConfigModel(mode="separator", separators=['"xxx']), True), - ("Missing quotes", SeparatorSplitterConfigModel(mode="separator", separators=["xxx"]), True), - ("Non-string separator", SeparatorSplitterConfigModel(mode="separator", separators=["123"]), True), - ("Object separator", SeparatorSplitterConfigModel(mode="separator", separators=["{}"]), True), - ("Proper separator", SeparatorSplitterConfigModel(mode="separator", separators=['"xxx"', '"\\n\\n"']), False), + ( + "Invalid separator", + SeparatorSplitterConfigModel(mode="separator", separators=['"xxx']), + True, + ), + ( + "Missing quotes", + SeparatorSplitterConfigModel(mode="separator", separators=["xxx"]), + True, + ), + ( + "Non-string separator", + SeparatorSplitterConfigModel(mode="separator", separators=["123"]), + True, + ), + ( + "Object separator", + SeparatorSplitterConfigModel(mode="separator", separators=["{}"]), + True, + ), + ( + "Proper separator", + SeparatorSplitterConfigModel(mode="separator", separators=['"xxx"', '"\\n\\n"']), + False, + ), ], ) def test_text_splitter_check(label, split_config, has_error_message): error = DocumentProcessor.check_config( - ProcessingConfigModel(chunk_size=48, chunk_overlap=0, text_fields=None, metadata_fields=None, text_splitter=split_config) + ProcessingConfigModel( + chunk_size=48, + chunk_overlap=0, + text_fields=None, + metadata_fields=None, + text_splitter=split_config, + ) ) if has_error_message: assert error is not None @@ -400,12 +463,22 @@ def test_text_splitter_check(label, split_config, has_error_message): [ (None, {"abc": "def", "xyz": 123}, {"abc": "def", "xyz": 123}), ([], {"abc": "def", "xyz": 123}, {"abc": "def", "xyz": 123}), - ([FieldNameMappingConfigModel(from_field="abc", to_field="AAA")], {"abc": "def", "xyz": 123}, {"AAA": "def", "xyz": 123}), - ([FieldNameMappingConfigModel(from_field="non_existing", to_field="AAA")], {"abc": "def", "xyz": 123}, {"abc": "def", "xyz": 123}), + ( + [FieldNameMappingConfigModel(from_field="abc", to_field="AAA")], + {"abc": "def", "xyz": 123}, + {"AAA": "def", "xyz": 123}, + ), + ( + [FieldNameMappingConfigModel(from_field="non_existing", to_field="AAA")], + {"abc": "def", "xyz": 123}, + {"abc": "def", "xyz": 123}, + ), ], ) def test_rename_metadata_fields( - mappings: Optional[List[FieldNameMappingConfigModel]], fields: Mapping[str, Any], expected_chunk_metadata: Mapping[str, Any] + mappings: Optional[List[FieldNameMappingConfigModel]], + fields: Mapping[str, Any], + expected_chunk_metadata: Mapping[str, Any], ): processor = initialize_processor() @@ -422,21 +495,43 @@ def test_rename_metadata_fields( chunks, id_to_delete = processor.process(record) assert len(chunks) == 1 - assert chunks[0].metadata == {**expected_chunk_metadata, "_ab_stream": "namespace1_stream1", "text": "abc"} + assert chunks[0].metadata == { + **expected_chunk_metadata, + "_ab_stream": "namespace1_stream1", + "text": "abc", + } @pytest.mark.parametrize( "primary_key_value, stringified_primary_key, primary_key", [ ({"id": 99}, "namespace1_stream1_99", [["id"]]), - ({"id": 99, "name": "John Doe"}, "namespace1_stream1_99_John Doe", [["id"], ["name"]]), - ({"id": 99, "name": "John Doe", "age": 25}, "namespace1_stream1_99_John Doe_25", [["id"], ["name"], ["age"]]), - ({"nested": {"id": "abc"}, "name": "John Doe"}, "namespace1_stream1_abc_John Doe", [["nested", "id"], ["name"]]), - ({"nested": {"id": "abc"}}, "namespace1_stream1_abc___not_found__", [["nested", "id"], ["name"]]), + ( + {"id": 99, "name": "John Doe"}, + "namespace1_stream1_99_John Doe", + [["id"], ["name"]], + ), + ( + {"id": 99, "name": "John Doe", "age": 25}, + "namespace1_stream1_99_John Doe_25", + [["id"], ["name"], ["age"]], + ), + ( + {"nested": {"id": "abc"}, "name": "John Doe"}, + "namespace1_stream1_abc_John Doe", + [["nested", "id"], ["name"]], + ), + ( + {"nested": {"id": "abc"}}, + "namespace1_stream1_abc___not_found__", + [["nested", "id"], ["name"]], + ), ], ) def test_process_multiple_chunks_with_dedupe_mode( - primary_key_value: Mapping[str, Any], stringified_primary_key: str, primary_key: List[List[str]] + primary_key_value: Mapping[str, Any], + stringified_primary_key: str, + primary_key: List[List[str]], ): processor = initialize_processor() @@ -462,3 +557,90 @@ def test_process_multiple_chunks_with_dedupe_mode( for chunk in chunks: assert chunk.metadata["_ab_record_id"] == stringified_primary_key assert id_to_delete == stringified_primary_key + + +@pytest.mark.parametrize( + "record, sync_mode, has_chunks, raises, expected_id_to_delete", + [ + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"text": "This is the text", "id": "1"}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + True, + False, + "namespace1_stream1_1", + id="update", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"text": "This is the text", "id": "1"}, + emitted_at=1234, + ), + DestinationSyncMode.append, + True, + False, + None, + id="append", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"text": "This is the text", "id": "1", "_ab_cdc_deleted_at": 1234}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + False, + False, + "namespace1_stream1_1", + id="cdc_delete", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"id": "1", "_ab_cdc_deleted_at": 1234}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + False, + False, + "namespace1_stream1_1", + id="cdc_delete_without_text", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"id": "1"}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + False, + True, + "namespace1_stream1_1", + id="update_without_text", + ), + ], +) +def test_process_cdc_records(record, sync_mode, has_chunks, raises, expected_id_to_delete): + processor = initialize_processor() + + processor.text_fields = ["text"] + + processor.streams["namespace1_stream1"].destination_sync_mode = sync_mode + + if raises: + with pytest.raises(AirbyteTracedException): + processor.process(record) + else: + chunks, id_to_delete = processor.process(record) + if has_chunks: + assert len(chunks) > 0 + assert id_to_delete == expected_id_to_delete diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py index ecec2379693f..0800ff62d8f6 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -686,14 +686,19 @@ def test_raise_on_http_errors(mocker, error): ({"error": {"message": "something broke"}}, "something broke"), ({"error": "err-001", "message": "something broke"}, "something broke"), ({"failure": {"message": "something broke"}}, "something broke"), + ({"detail": {"message": "something broke"}}, "something broke"), ({"error": {"errors": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}}, "one, two, three"), ({"errors": ["one", "two", "three"]}, "one, two, three"), + ({"errors": [None, {}, "third error", 9002.09]}, "third error"), ({"messages": ["one", "two", "three"]}, "one, two, three"), ({"errors": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), ({"error": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), ({"errors": [{"error": "one"}, {"error": "two"}, {"error": "three"}]}, "one, two, three"), ({"failures": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), + ({"details": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), + ({"details": ["one", 10087, True]}, "one"), (["one", "two", "three"], "one, two, three"), + ({"detail": False}, None), ([{"error": "one"}, {"error": "two"}, {"error": "three"}], "one, two, three"), ({"error": True}, None), ({"something_else": "hi"}, None), diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index 205498c331da..f6073542ca2f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package. + """Runs the tests for the given airbyte-ci package Args: pipeline_context (ClickPipelineContext): The context object. diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index d98c22f8be33..1eb99d5a25cd 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "2.7.0" +version = "2.7.2" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] diff --git a/airbyte-integrations/connectors/destination-bigquery/gradle.properties b/airbyte-integrations/connectors/destination-bigquery/gradle.properties new file mode 100644 index 000000000000..4dbe8b8729df --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 diff --git a/airbyte-integrations/connectors/destination-langchain/Dockerfile b/airbyte-integrations/connectors/destination-langchain/Dockerfile index b4b48cdd04c2..30452c2628ac 100644 --- a/airbyte-integrations/connectors/destination-langchain/Dockerfile +++ b/airbyte-integrations/connectors/destination-langchain/Dockerfile @@ -42,5 +42,5 @@ COPY destination_langchain ./destination_langchain ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-langchain diff --git a/airbyte-integrations/connectors/destination-langchain/metadata.yaml b/airbyte-integrations/connectors/destination-langchain/metadata.yaml index a76d4126868f..f8db27c1afe0 100644 --- a/airbyte-integrations/connectors/destination-langchain/metadata.yaml +++ b/airbyte-integrations/connectors/destination-langchain/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: database connectorType: destination definitionId: cf98d52c-ba5a-4dfd-8ada-c1baebfa6e73 - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 dockerRepository: airbyte/destination-langchain githubIssueLabel: destination-langchain icon: langchain.svg diff --git a/airbyte-integrations/connectors/destination-langchain/setup.py b/airbyte-integrations/connectors/destination-langchain/setup.py index 80f25bd65f1e..5446952fc464 100644 --- a/airbyte-integrations/connectors/destination-langchain/setup.py +++ b/airbyte-integrations/connectors/destination-langchain/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk", + "airbyte-cdk==0.51.10", "langchain", "openai", "requests", diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml index 1ea5e6f7eb52..47cd75672448 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml @@ -55,8 +55,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_FLAT_FILE_RETURNS_DATA_BY_RETURN_DATE bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA - bypass_reason: "no records" - name: GET_VENDOR_SALES_REPORT bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT @@ -67,8 +65,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA - bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT bypass_reason: "no records" - name: GET_AFN_INVENTORY_DATA @@ -83,8 +79,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA - bypass_reason: "no records" - name: GET_MERCHANT_LISTINGS_DATA_BACK_COMPAT bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT @@ -97,8 +91,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_FBA_SNS_PERFORMANCE_DATA bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA - bypass_reason: "no records" - name: GET_FBA_ESTIMATED_FBA_FEES_TXT_DATA bypass_reason: "no records" - name: GET_FBA_INVENTORY_PLANNING_DATA @@ -113,8 +105,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_STRANDED_INVENTORY_UI_DATA bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA - bypass_reason: "no records" - name: GET_XML_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL bypass_reason: "no records" - name: ListFinancialEvents diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json deleted file mode 100644 index 376c90214355..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA", - "json_schema": { - "title": "FBA Daily Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json deleted file mode 100644 index eb241d9e7e3e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA", - "json_schema": { - "title": "FBA Inventory Adjustments Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "adjusted-date": { "type": ["null", "string"] }, - "transaction-item-id": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "reason": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] }, - "reconciled": { "type": ["null", "string"] }, - "unreconciled": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json deleted file mode 100644 index 92575cfb052e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA", - "json_schema": { - "title": "FBA Received Inventory Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "received-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fba-shipment-id": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json deleted file mode 100644 index b38e9b2849ef..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA", - "json_schema": { - "title": "FBA Inventory Event Detail Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "transaction-type": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json deleted file mode 100644 index c695f887c910..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA", - "json_schema": { - "title": "FBA Monthly Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "month": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "average-quantity": { "type": ["null", "string"] }, - "end-quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json index bebd0fa00a49..a58a834e17d3 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json @@ -41,24 +41,9 @@ "GET_FBA_ESTIMATED_FBA_FEES_TXT_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, - "GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, "GET_FBA_FULFILLMENT_CUSTOMER_SHIPMENT_PROMOTION_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, - "GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, - "GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, - "GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, - "GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, "GET_FBA_MYI_UNSUPPRESSED_INVENTORY_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json deleted file mode 100644 index 9c8e32370a3e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json +++ /dev/null @@ -1,200 +0,0 @@ -{ - "documentationUrl": "https://docs.airbyte.com/integrations/sources/amazon-seller-partner", - "changelogUrl": "https://docs.airbyte.com/integrations/sources/amazon-seller-partner", - "connectionSpecification": { - "title": "Amazon Seller Partner Spec", - "type": "object", - "required": [ - "aws_environment", - "region", - "lwa_app_id", - "lwa_client_secret", - "refresh_token", - "replication_start_date" - ], - "additionalProperties": true, - "properties": { - "auth_type": { - "title": "Auth Type", - "const": "oauth2.0", - "order": 0, - "type": "string" - }, - "aws_environment": { - "title": "AWS Environment", - "description": "Select the AWS Environment.", - "enum": ["PRODUCTION", "SANDBOX"], - "default": "PRODUCTION", - "type": "string", - "order": 1 - }, - "region": { - "title": "AWS Region", - "description": "Select the AWS Region.", - "enum": [ - "AE", - "AU", - "BE", - "BR", - "CA", - "DE", - "EG", - "ES", - "FR", - "GB", - "IN", - "IT", - "JP", - "MX", - "NL", - "PL", - "SA", - "SE", - "SG", - "TR", - "UK", - "US" - ], - "default": "US", - "type": "string", - "order": 2 - }, - "aws_access_key": { - "title": "AWS Access Key", - "description": "Specifies the AWS access key used as part of the credentials to authenticate the user.", - "airbyte_secret": true, - "order": 3, - "type": "string" - }, - "aws_secret_key": { - "title": "AWS Secret Access Key", - "description": "Specifies the AWS secret key used as part of the credentials to authenticate the user.", - "airbyte_secret": true, - "order": 4, - "type": "string" - }, - "role_arn": { - "title": "Role ARN", - "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. (Needs permission to 'Assume Role' STS).", - "airbyte_secret": true, - "order": 5, - "type": "string" - }, - "lwa_app_id": { - "title": "LWA Client Id", - "description": "Your Login with Amazon Client ID.", - "order": 6, - "airbyte_secret": true, - "type": "string" - }, - "lwa_client_secret": { - "title": "LWA Client Secret", - "description": "Your Login with Amazon Client Secret.", - "airbyte_secret": true, - "order": 7, - "type": "string" - }, - "refresh_token": { - "title": "Refresh Token", - "description": "The Refresh Token obtained via OAuth flow authorization.", - "airbyte_secret": true, - "order": 8, - "type": "string" - }, - "replication_start_date": { - "title": "Start Date", - "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", - "examples": ["2017-01-25T00:00:00Z"], - "order": 9, - "type": "string" - }, - "replication_end_date": { - "title": "End Date", - "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data after this date will not be replicated.", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$|^$", - "examples": ["2017-01-25T00:00:00Z"], - "order": 10, - "type": "string" - }, - "period_in_days": { - "title": "Period In Days", - "type": "integer", - "description": "Will be used for stream slicing for initial full_refresh sync when no updated state is present for reports that support sliced incremental sync.", - "default": 90, - "order": 11 - }, - "report_options": { - "title": "Report Options", - "description": "Additional information passed to reports. This varies by report type. Must be a valid json string.", - "examples": [ - "{\"GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT\": {\"reportPeriod\": \"WEEK\"}}", - "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" - ], - "order": 12, - "type": "string" - }, - "max_wait_seconds": { - "title": "Max wait time for reports (in seconds)", - "description": "Sometimes report can take up to 30 minutes to generate. This will set the limit for how long to wait for a successful report.", - "default": 500, - "examples": ["500", "1980"], - "order": 13, - "type": "integer" - }, - "advanced_stream_options": { - "title": "Advanced Stream Options", - "description": "Additional information to configure report options. This varies by report type, not every report implement this kind of feature. Must be a valid json string.", - "examples": [ - "{\"GET_SALES_AND_TRAFFIC_REPORT\": {\"availability_sla_days\": 3}}", - "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" - ], - "order": 14, - "type": "string" - } - } - }, - "advanced_auth": { - "auth_flow_type": "oauth2.0", - "predicate_key": ["auth_type"], - "predicate_value": "oauth2.0", - "oauth_config_specification": { - "complete_oauth_output_specification": { - "type": "object", - "additionalProperties": false, - "properties": { - "refresh_token": { - "type": "string", - "path_in_connector_config": ["refresh_token"] - } - } - }, - "complete_oauth_server_input_specification": { - "type": "object", - "additionalProperties": false, - "properties": { - "lwa_app_id": { - "type": "string" - }, - "lwa_client_secret": { - "type": "string" - } - } - }, - "complete_oauth_server_output_specification": { - "type": "object", - "additionalProperties": false, - "properties": { - "lwa_app_id": { - "type": "string", - "path_in_connector_config": ["lwa_app_id"] - }, - "lwa_client_secret": { - "type": "string", - "path_in_connector_config": ["lwa_client_secret"] - } - } - } - } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/main.py b/airbyte-integrations/connectors/source-amazon-seller-partner/main.py index a09a9063026c..f5089129f6a6 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/main.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/main.py @@ -7,7 +7,9 @@ from airbyte_cdk.entrypoint import launch from source_amazon_seller_partner import SourceAmazonSellerPartner +from source_amazon_seller_partner.config_migrations import MigrateAccountType if __name__ == "__main__": source = SourceAmazonSellerPartner() + MigrateAccountType.migrate(sys.argv[1:], source) launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml index 83c764ab41f1..80b56d5d014b 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 - dockerImageTag: 1.6.2 + dockerImageTag: 2.0.2 dockerRepository: airbyte/source-amazon-seller-partner documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner githubIssueLabel: source-amazon-seller-partner @@ -20,6 +20,11 @@ data: oss: enabled: true releaseStage: alpha + releases: + breakingChanges: + 2.0.0: + message: "Deprecated FBA reports will be removed permanently from Cloud and Brand Analytics Reports will be removed temporarily. Updates on Brand Analytics Reports can be tracked here: [#32353](https://github.com/airbytehq/airbyte/issues/32353)" + upgradeDeadline: "2023-11-29" supportLevel: community tags: - language:python diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py b/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py index af80eec8c453..9b4396a6c472 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk", "pendulum~=2.1", "pycryptodome~=3.10", "xmltodict~=0.12"] +MAIN_REQUIREMENTS = ["airbyte-cdk", "xmltodict~=0.12"] TEST_REQUIREMENTS = [ "requests-mock~=1.9.3", diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py new file mode 100644 index 000000000000..5d2daf748f6c --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py @@ -0,0 +1,79 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import logging +from typing import Any, List, Mapping + +from airbyte_cdk.config_observation import create_connector_config_control_message +from airbyte_cdk.entrypoint import AirbyteEntrypoint +from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository + +from .source import SourceAmazonSellerPartner + +logger = logging.getLogger("airbyte_logger") + + +class MigrateAccountType: + """ + This class stands for migrating the config at runtime, + while providing the backward compatibility when falling back to the previous source version. + + Specifically, starting from `2.0.1`, the `account_type` property becomes required. + For those connector configs that do not contain this key, the default value of `Seller` will be used. + Reverse operation is not needed as this field is ignored in previous versions of the connector. + """ + + message_repository: MessageRepository = InMemoryMessageRepository() + migration_key: str = "account_type" + + @classmethod + def _should_migrate(cls, config: Mapping[str, Any]) -> bool: + """ + This method determines whether config requires migration. + Returns: + > True, if the transformation is neccessary + > False, otherwise. + """ + return cls.migration_key not in config + + @classmethod + def _populate_with_default_value(cls, config: Mapping[str, Any], source: SourceAmazonSellerPartner = None) -> Mapping[str, Any]: + config[cls.migration_key] = "Seller" + return config + + @classmethod + def _modify_and_save(cls, config_path: str, source: SourceAmazonSellerPartner, config: Mapping[str, Any]) -> Mapping[str, Any]: + # modify the config + migrated_config = cls._populate_with_default_value(config, source) + # save the config + source.write_config(migrated_config, config_path) + # return modified config + return migrated_config + + @classmethod + def _emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None: + # add the Airbyte Control Message to message repo + cls.message_repository.emit_message(create_connector_config_control_message(migrated_config)) + # emit the Airbyte Control Message from message queue to stdout + for message in cls.message_repository.consume_queue(): + print(message.json(exclude_unset=True)) + + @classmethod + def migrate(cls, args: List[str], source: SourceAmazonSellerPartner) -> None: + """ + This method checks the input args, should the config be migrated, + transform if neccessary and emit the CONTROL message. + """ + # get config path + config_path = AirbyteEntrypoint(source).extract_config(args) + # proceed only if `--config` arg is provided + if config_path: + # read the existing config + config = source.read_config(config_path) + # migration check + if cls._should_migrate(config): + cls._emit_control_message( + cls._modify_and_save(config_path, source, config), + ) diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json deleted file mode 100644 index 401cbf484380..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "title": "FBA Daily Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json deleted file mode 100644 index 916f932cc057..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "title": "FBA Inventory Adjustments Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "adjusted-date": { "type": ["null", "string"] }, - "transaction-item-id": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "reason": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] }, - "reconciled": { "type": ["null", "string"] }, - "unreconciled": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json deleted file mode 100644 index 3d23369d51e1..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "title": "FBA Received Inventory Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "received-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fba-shipment-id": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json deleted file mode 100644 index 1ddf4fceca59..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "title": "FBA Inventory Event Detail Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "transaction-type": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json deleted file mode 100644 index 796985c5210e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "title": "FBA Monthly Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "month": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "average-quantity": { "type": ["null", "string"] }, - "end-quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py index 2ca326c5f47f..dfe04d11d35f 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +from os import getenv from typing import Any, List, Mapping, Tuple from airbyte_cdk.logger import AirbyteLogger @@ -20,12 +20,7 @@ FbaAfnInventoryReports, FbaCustomerReturnsReports, FbaEstimatedFbaFeesTxtReport, - FbaFulfillmentCurrentInventoryReport, FbaFulfillmentCustomerShipmentPromotionReport, - FbaFulfillmentInventoryAdjustReport, - FbaFulfillmentInventoryReceiptsReport, - FbaFulfillmentInventorySummaryReport, - FbaFulfillmentMonthlyInventoryReport, FbaInventoryPlaningReport, FbaMyiUnsuppressedInventoryReport, FbaOrdersReports, @@ -86,7 +81,6 @@ def _get_stream_kwargs(self, config: Mapping[str, Any]) -> Mapping[str, Any]: "marketplace_id": marketplace_id, "period_in_days": config.get("period_in_days", 90), "report_options": config.get("report_options"), - "max_wait_seconds": config.get("max_wait_seconds", 500), "replication_end_date": config.get("replication_end_date"), "advanced_stream_options": config.get("advanced_stream_options"), } @@ -127,7 +121,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: :param config: A Mapping of the user input configuration as defined in the connector spec. """ stream_kwargs = self._get_stream_kwargs(config) - return [ + streams = [ FbaCustomerReturnsReports(**stream_kwargs), FbaAfnInventoryReports(**stream_kwargs), FbaAfnInventoryByCountryReports(**stream_kwargs), @@ -144,28 +138,16 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: FulfilledShipmentsReports(**stream_kwargs), MerchantListingsReports(**stream_kwargs), VendorDirectFulfillmentShipping(**stream_kwargs), - VendorInventoryReports(**stream_kwargs), - VendorSalesReports(**stream_kwargs), Orders(**stream_kwargs), OrderItems(**stream_kwargs), OrderReportDataShipping(**stream_kwargs), - SellerAnalyticsSalesAndTrafficReports(**stream_kwargs), SellerFeedbackReports(**stream_kwargs), - BrandAnalyticsMarketBasketReports(**stream_kwargs), - BrandAnalyticsSearchTermsReports(**stream_kwargs), - BrandAnalyticsRepeatPurchaseReports(**stream_kwargs), - BrandAnalyticsAlternatePurchaseReports(**stream_kwargs), - BrandAnalyticsItemComparisonReports(**stream_kwargs), GetXmlBrowseTreeData(**stream_kwargs), ListFinancialEventGroups(**stream_kwargs), ListFinancialEvents(**stream_kwargs), LedgerDetailedViewReports(**stream_kwargs), FbaEstimatedFbaFeesTxtReport(**stream_kwargs), - FbaFulfillmentCurrentInventoryReport(**stream_kwargs), FbaFulfillmentCustomerShipmentPromotionReport(**stream_kwargs), - FbaFulfillmentInventoryAdjustReport(**stream_kwargs), - FbaFulfillmentInventoryReceiptsReport(**stream_kwargs), - FbaFulfillmentInventorySummaryReport(**stream_kwargs), FbaMyiUnsuppressedInventoryReport(**stream_kwargs), MerchantCancelledListingsReport(**stream_kwargs), MerchantListingsReport(**stream_kwargs), @@ -173,7 +155,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: MerchantListingsInactiveData(**stream_kwargs), StrandedInventoryUiReport(**stream_kwargs), XmlAllOrdersDataByOrderDataGeneral(**stream_kwargs), - FbaFulfillmentMonthlyInventoryReport(**stream_kwargs), MerchantListingsFypReport(**stream_kwargs), FbaSnsForecastReport(**stream_kwargs), FbaSnsPerformanceReport(**stream_kwargs), @@ -183,3 +164,18 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: LedgerSummaryViewReport(**stream_kwargs), FbaReimbursementsReports(**stream_kwargs), ] + # TODO: Remove after Brand Analytics will be enabled in CLOUD: + # https://github.com/airbytehq/airbyte/issues/32353 + if getenv("DEPLOYMENT_MODE", "").upper() != "CLOUD": + brand_analytics_reports = [ + BrandAnalyticsMarketBasketReports(**stream_kwargs), + BrandAnalyticsSearchTermsReports(**stream_kwargs), + BrandAnalyticsRepeatPurchaseReports(**stream_kwargs), + BrandAnalyticsAlternatePurchaseReports(**stream_kwargs), + BrandAnalyticsItemComparisonReports(**stream_kwargs), + SellerAnalyticsSalesAndTrafficReports(**stream_kwargs), + VendorSalesReports(**stream_kwargs), + VendorInventoryReports(**stream_kwargs), + ] + streams += brand_analytics_reports + return streams diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json index afcd6279342f..f0f37d084ff1 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json @@ -7,6 +7,7 @@ "required": [ "aws_environment", "region", + "account_type", "lwa_app_id", "lwa_client_secret", "refresh_token", @@ -59,10 +60,18 @@ "type": "string", "order": 2 }, + "account_type": { + "title": "AWS Seller Partner Account Type", + "description": "Type of the Account you're going to authorize the Airbyte application by", + "enum": ["Seller", "Vendor"], + "default": "Seller", + "type": "string", + "order": 3 + }, "lwa_app_id": { "title": "LWA Client Id", "description": "Your Login with Amazon Client ID.", - "order": 3, + "order": 4, "airbyte_secret": true, "type": "string" }, @@ -70,14 +79,14 @@ "title": "LWA Client Secret", "description": "Your Login with Amazon Client Secret.", "airbyte_secret": true, - "order": 4, + "order": 5, "type": "string" }, "refresh_token": { "title": "Refresh Token", "description": "The Refresh Token obtained via OAuth flow authorization.", "airbyte_secret": true, - "order": 5, + "order": 6, "type": "string" }, "replication_start_date": { @@ -85,7 +94,7 @@ "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "examples": ["2017-01-25T00:00:00Z"], - "order": 6, + "order": 7, "type": "string", "format": "date-time" }, @@ -94,7 +103,7 @@ "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data after this date will not be replicated.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$|^$", "examples": ["2017-01-25T00:00:00Z"], - "order": 7, + "order": 8, "type": "string", "format": "date-time" }, @@ -104,7 +113,7 @@ "description": "Will be used for stream slicing for initial full_refresh sync when no updated state is present for reports that support sliced incremental sync.", "default": 90, "minimum": 1, - "order": 8 + "order": 9 }, "report_options": { "title": "Report Options", @@ -113,17 +122,8 @@ "{\"GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT\": {\"reportPeriod\": \"WEEK\"}}", "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" ], - "order": 9, - "type": "string" - }, - "max_wait_seconds": { - "title": "Max wait time for reports (in seconds)", - "description": "Sometimes report can take up to 30 minutes to generate. This will set the limit for how long to wait for a successful report.", - "default": 500, - "examples": ["500", "1980"], "order": 10, - "minimum": 1, - "type": "integer" + "type": "string" }, "advanced_stream_options": { "title": "Advanced Stream Options", @@ -132,7 +132,7 @@ "{\"GET_SALES_AND_TRAFFIC_REPORT\": {\"availability_sla_days\": 3}}", "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" ], - "order": 11, + "order": 12, "type": "string" } } @@ -142,6 +142,19 @@ "predicate_key": ["auth_type"], "predicate_value": "oauth2.0", "oauth_config_specification": { + "oauth_user_input_from_connector_config_specification": { + "type": "object", + "properties": { + "region": { + "type": "string", + "path_in_connector_config": ["region"] + }, + "account_type": { + "type": "string", + "path_in_connector_config": ["account_type"] + } + } + }, "complete_oauth_output_specification": { "type": "object", "additionalProperties": false, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index 38304231378d..ac7ff0485a74 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -38,7 +38,6 @@ def __init__( period_in_days: Optional[int], report_options: Optional[str], advanced_stream_options: Optional[str], - max_wait_seconds: Optional[int], replication_end_date: Optional[str], *args, **kwargs, @@ -132,6 +131,7 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late class ReportsAmazonSPStream(HttpStream, ABC): + max_wait_seconds = 3600 """ API docs: https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reports_2020-09-04.md API model: https://github.com/amzn/selling-partner-api-models/blob/main/models/reports-api-model/reports_2020-09-04.json @@ -163,7 +163,6 @@ def __init__( marketplace_id: str, period_in_days: Optional[int], report_options: Optional[str], - max_wait_seconds: Optional[int], replication_end_date: Optional[str], advanced_stream_options: Optional[str], *args, @@ -176,7 +175,6 @@ def __init__( self.marketplace_id = marketplace_id self.period_in_days = max(period_in_days, self.replication_start_date_limit_in_days) # ensure old configs work as well self._report_options = report_options or "{}" - self.max_wait_seconds = max_wait_seconds self._advanced_stream_options = dict() self._http_method = "GET" if advanced_stream_options is not None: @@ -250,11 +248,11 @@ def _retrieve_report(self, report_id: str) -> Mapping[str, Any]: return report_payload @default_backoff_handler(factor=5, max_tries=5) - def download_and_decompress_report_document(self, url, payload): + def download_and_decompress_report_document(self, payload: dict) -> str: """ Unpacks a report document """ - report = requests.get(url) + report = requests.get(payload.get("url")) report.raise_for_status() if "compressionAlgorithm" in payload: return gzip.decompress(report.content).decode("iso-8859-1") @@ -265,7 +263,7 @@ def parse_response( ) -> Iterable[Mapping]: payload = response.json() - document = self.download_and_decompress_report_document(payload.get("url"), payload) + document = self.download_and_decompress_report_document(payload) document_records = self.parse_document(document) yield from document_records @@ -474,26 +472,10 @@ class FbaEstimatedFbaFeesTxtReport(ReportsAmazonSPStream): name = "GET_FBA_ESTIMATED_FBA_FEES_TXT_DATA" -class FbaFulfillmentCurrentInventoryReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA" - - class FbaFulfillmentCustomerShipmentPromotionReport(ReportsAmazonSPStream): name = "GET_FBA_FULFILLMENT_CUSTOMER_SHIPMENT_PROMOTION_DATA" -class FbaFulfillmentInventoryAdjustReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA" - - -class FbaFulfillmentInventoryReceiptsReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA" - - -class FbaFulfillmentInventorySummaryReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA" - - class FbaMyiUnsuppressedInventoryReport(ReportsAmazonSPStream): name = "GET_FBA_MYI_UNSUPPRESSED_INVENTORY_DATA" @@ -532,10 +514,6 @@ class MerchantCancelledListingsReport(ReportsAmazonSPStream): name = "GET_MERCHANT_CANCELLED_LISTINGS_DATA" -class FbaFulfillmentMonthlyInventoryReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA" - - class MerchantListingsFypReport(ReportsAmazonSPStream): name = "GET_MERCHANTS_LISTINGS_FYP_REPORT" @@ -922,10 +900,7 @@ def parse_response( payload = response.json() - document = self.decompress_report_document( - payload.get("url"), - payload, - ) + document = self.download_and_decompress_report_document(payload) document_records = self.parse_document(document) # Not all (partial) responses include the request date, so adding it manually here diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py index 7621df97a0f2..5e84a2cf47f3 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py @@ -104,7 +104,6 @@ def _internal(start_date: str = START_DATE_1, end_date: str = END_DATE_1): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream @@ -123,7 +122,6 @@ def _internal(start_date: str = START_DATE_1, end_date: str = END_DATE_1): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py new file mode 100644 index 000000000000..52af77133e47 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +from typing import Any, Mapping + +from airbyte_cdk.models import OrchestratorType, Type +from airbyte_cdk.sources import Source +from source_amazon_seller_partner.config_migrations import MigrateAccountType +from source_amazon_seller_partner.source import SourceAmazonSellerPartner + +CMD = "check" +TEST_NOT_MIGRATED_CONFIG_PATH = "unit_tests/test_migrations/not_migrated_config.json" +TEST_MIGRATED_CONFIG_PATH = "unit_tests/test_migrations/migrated_config.json" +SOURCE: Source = SourceAmazonSellerPartner() + + +def load_config(config_path: str = TEST_NOT_MIGRATED_CONFIG_PATH) -> Mapping[str, Any]: + with open(config_path, "r") as config: + return json.load(config) + + +def test_migrate_config(capsys): + config = load_config(TEST_NOT_MIGRATED_CONFIG_PATH) + assert "acount_type" not in config + migration_instance = MigrateAccountType() + migration_instance.migrate([CMD, "--config", TEST_NOT_MIGRATED_CONFIG_PATH], SOURCE) + control_msg = json.loads(capsys.readouterr().out) + assert control_msg["type"] == Type.CONTROL.value + assert control_msg["control"]["type"] == OrchestratorType.CONNECTOR_CONFIG.value + migrated_config = control_msg["control"]["connectorConfig"]["config"] + assert migrated_config["account_type"] == "Seller" + + +def test_should_not_migrate(): + config = load_config(TEST_MIGRATED_CONFIG_PATH) + assert config["account_type"] + migration_instance = MigrateAccountType() + assert not migration_instance._should_migrate(config) diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json new file mode 100644 index 000000000000..3b65000693d3 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json @@ -0,0 +1,9 @@ +{ + "refresh_token": "refresh_token", + "lwa_app_id": "amzn1.application-oa2-client.lwa_app_id", + "lwa_client_secret": "amzn1.oa2-cs.v1.lwa_client_secret", + "replication_start_date": "2022-09-01T00:00:00Z", + "aws_environment": "PRODUCTION", + "account_type": "Vendor", + "region": "US" +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json new file mode 100644 index 000000000000..e7f89850ba5b --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json @@ -0,0 +1,8 @@ +{ + "refresh_token": "refresh_token", + "lwa_app_id": "amzn1.application-oa2-client.lwa_app_id", + "lwa_client_secret": "amzn1.oa2-cs.v1.lwa_client_secret", + "replication_start_date": "2022-09-01T00:00:00Z", + "aws_environment": "PRODUCTION", + "region": "US" +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py index 0c4a80a1bcf4..7b7d5c016a9a 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py @@ -39,7 +39,6 @@ def _internal(): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py index c255aa92eee6..106b5b543785 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py @@ -18,7 +18,6 @@ def test_stream_uses_advanced_options(): period_in_days=0, report_options=None, advanced_stream_options='{"GET_SALES_AND_TRAFFIC_REPORT":{"availability_sla_days": 3}}', - max_wait_seconds=500, ) assert stream.availability_sla_days == 3 diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py index a1f64cf10b64..77a21ef4bca8 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py @@ -86,7 +86,6 @@ def _internal(start_date: str = START_DATE_1, end_date: str = END_DATE_1): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py index 27d7ca2d3b36..9b2aab17a298 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py @@ -16,7 +16,6 @@ def reports_stream(marketplace_id): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=0, ) return stream diff --git a/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl index 62627cf0aeae..73359a964320 100644 --- a/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl @@ -15,8 +15,8 @@ {"stream":"campaign_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"TimePeriod":"2023-11-05","CurrencyCode":"USD","AdDistribution":"Search","DeviceType":"Computer","Network":"Syndicated search partners","DeliveredMatchType":"Exact","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","CampaignType":"Search & content","CampaignStatus":"Active","CampaignLabels":null,"Impressions":9,"Clicks":1,"Ctr":11.11,"Spend":0.03,"CostPerConversion":null,"QualityScore":5.0,"AdRelevance":3.0,"LandingPageExperience":1.0,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Assists":0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"CustomParameters":null,"ViewThroughConversions":0,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"AverageCpc":0.03,"AveragePosition":0.0,"AverageCpm":3.33,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":0,"LowQualitySophisticatedClicks":0,"LowQualityConversions":0,"LowQualityConversionRate":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null,"BudgetName":null,"BudgetStatus":null,"BudgetAssociationStatus":"Current","HistoricalQualityScore":5.0,"HistoricalExpectedCtr":2.0,"HistoricalAdRelevance":3.0,"HistoricalLandingPageExperience":1.0},"emitted_at":1699954081143} {"stream":"campaign_impression_performance_report_daily","data":{"AccountName":"Airbyte","AccountNumber":"F149MJ18","AccountId":180519267,"TimePeriod":"2023-11-07","CampaignStatus":"Active","CampaignName":"Airbyte test","CampaignId":531016227,"CurrencyCode":"USD","AdDistribution":"Search","Impressions":10,"Clicks":1,"Ctr":10.0,"AverageCpc":0.33,"Spend":0.33,"AveragePosition":0.0,"Conversions":0,"ConversionRate":null,"CostPerConversion":null,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":9,"LowQualityImpressionsPercent":47.37,"LowQualityConversions":0,"LowQualityConversionRate":null,"DeviceType":"Computer","ImpressionSharePercent":3.37,"ImpressionLostToBudgetPercent":85.19,"ImpressionLostToRankAggPercent":11.45,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"HistoricalQualityScore":null,"HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Network":"Syndicated search partners","Assists":0,"Revenue":0.0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"RevenuePerConversion":null,"RevenuePerAssist":null,"TrackingTemplate":null,"CustomParameters":null,"AccountStatus":"Active","LowQualityGeneralClicks":0,"LowQualitySophisticatedClicks":0,"CampaignLabels":null,"ExactMatchImpressionSharePercent":null,"ClickSharePercent":null,"AbsoluteTopImpressionSharePercent":6.02,"FinalUrlSuffix":null,"CampaignType":"Search & content","TopImpressionShareLostToRankPercent":14.63,"TopImpressionShareLostToBudgetPercent":77.24,"AbsoluteTopImpressionShareLostToRankPercent":15.66,"AbsoluteTopImpressionShareLostToBudgetPercent":78.31,"TopImpressionSharePercent":8.13,"AbsoluteTopImpressionRatePercent":50.0,"TopImpressionRatePercent":100.0,"BaseCampaignId":531016227,"AllConversions":0,"AllRevenue":0.0,"AllConversionRate":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllRevenuePerConversion":null,"ViewThroughConversions":0,"AudienceImpressionSharePercent":null,"AudienceImpressionLostToRankPercent":null,"AudienceImpressionLostToBudgetPercent":null,"RelativeCtr":null,"AverageCpm":33.0,"ConversionsQualified":0.0,"LowQualityConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0,"VideoViews":0,"ViewThroughRate":0.0,"AverageCPV":null,"VideoViewsAt25Percent":0,"VideoViewsAt50Percent":0,"VideoViewsAt75Percent":0,"CompletedVideoViews":0,"VideoCompletionRate":null,"TotalWatchTimeInMS":0,"AverageWatchTimePerVideoView":null,"AverageWatchTimePerImpression":0.0,"Sales":0,"CostPerSale":null,"RevenuePerSale":null,"Installs":0,"CostPerInstall":null,"RevenuePerInstall":null},"emitted_at":1699954182626} {"stream":"campaign_impression_performance_report_weekly","data":{"AccountName":"Airbyte","AccountNumber":"F149MJ18","AccountId":180519267,"TimePeriod":"2023-11-05","CampaignStatus":"Active","CampaignName":"Airbyte test","CampaignId":531016227,"CurrencyCode":"USD","AdDistribution":"Search","Impressions":10,"Clicks":1,"Ctr":10.0,"AverageCpc":0.33,"Spend":0.33,"AveragePosition":0.0,"Conversions":0,"ConversionRate":null,"CostPerConversion":null,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":9,"LowQualityImpressionsPercent":47.37,"LowQualityConversions":0,"LowQualityConversionRate":null,"DeviceType":"Computer","ImpressionSharePercent":10.87,"ImpressionLostToBudgetPercent":17.05,"ImpressionLostToRankAggPercent":72.08,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"HistoricalQualityScore":null,"HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Network":"Syndicated search partners","Assists":0,"Revenue":0.0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"RevenuePerConversion":null,"RevenuePerAssist":null,"TrackingTemplate":null,"CustomParameters":null,"AccountStatus":"Active","LowQualityGeneralClicks":0,"LowQualitySophisticatedClicks":0,"CampaignLabels":null,"ExactMatchImpressionSharePercent":29.07,"ClickSharePercent":2.89,"AbsoluteTopImpressionSharePercent":8.88,"FinalUrlSuffix":null,"CampaignType":"Search & content","TopImpressionShareLostToRankPercent":76.51,"TopImpressionShareLostToBudgetPercent":9.99,"AbsoluteTopImpressionShareLostToRankPercent":81.99,"AbsoluteTopImpressionShareLostToBudgetPercent":9.13,"TopImpressionSharePercent":13.5,"AbsoluteTopImpressionRatePercent":50.0,"TopImpressionRatePercent":100.0,"BaseCampaignId":531016227,"AllConversions":0,"AllRevenue":0.0,"AllConversionRate":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllRevenuePerConversion":null,"ViewThroughConversions":0,"AudienceImpressionSharePercent":null,"AudienceImpressionLostToRankPercent":null,"AudienceImpressionLostToBudgetPercent":null,"RelativeCtr":null,"AverageCpm":33.0,"ConversionsQualified":0.0,"LowQualityConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0,"VideoViews":0,"ViewThroughRate":0.0,"AverageCPV":null,"VideoViewsAt25Percent":0,"VideoViewsAt50Percent":0,"VideoViewsAt75Percent":0,"CompletedVideoViews":0,"VideoCompletionRate":null,"TotalWatchTimeInMS":0,"AverageWatchTimePerVideoView":null,"AverageWatchTimePerImpression":0.0,"Sales":0,"CostPerSale":null,"RevenuePerSale":null,"Installs":0,"CostPerInstall":null,"RevenuePerInstall":null},"emitted_at":1699954211223} -{"stream":"keyword_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-07","CurrencyCode":"USD","DeliveredMatchType":"Phrase","AdDistribution":"Search","DeviceType":"Computer","Language":"German","Network":"Syndicated search partners","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"HistoricalQualityScore":null,"Impressions":1,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.18,"FirstPageBid":0.51,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700078149400} -{"stream":"keyword_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-05","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","Impressions":2,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.18,"FirstPageBid":0.51,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700078299436} +{"stream":"keyword_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-07","CurrencyCode":"USD","DeliveredMatchType":"Phrase","AdDistribution":"Search","DeviceType":"Computer","Language":"German","Network":"Syndicated search partners","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"HistoricalQualityScore":null,"Impressions":1,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.0,"FirstPageBid":0.43,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700237754157} +{"stream":"keyword_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-05","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","Impressions":2,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.0,"FirstPageBid":0.43,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700237801690} {"stream":"geographic_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"TimePeriod":"2023-11-07","Country":"Australia","CurrencyCode":"USD","DeliveredMatchType":"Broad","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Syndicated search partners","DeviceOS":"Windows","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","MetroArea":null,"State":"New South Wales","City":null,"AdGroupName":"keywords","Ctr":0.0,"ProximityTargetLocation":null,"Radius":"0","Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"LocationType":"Physical location","MostSpecificLocation":"2000","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","County":null,"PostalCode":"2000","LocationId":"122395","BaseCampaignId":"531016227","AllCostPerConversion":null,"AllReturnOnAdSpend":null,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":0.0,"TopImpressionRatePercent":"100.00","AllConversionsQualified":"0.00","ViewThroughConversionsQualified":null,"Neighborhood":null,"ViewThroughRevenue":"0.00","CampaignType":"Search & content","AssetGroupId":null,"AssetGroupName":null,"AssetGroupStatus":null,"Clicks":0,"Spend":0.0,"Impressions":1,"CostPerConversion":null,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1699956863587} {"stream":"geographic_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"TimePeriod":"2023-11-05","Country":"Argentina","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"Spanish","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","MetroArea":null,"State":"Buenos Aires Province","City":null,"AdGroupName":"keywords","Ctr":0.0,"ProximityTargetLocation":null,"Radius":"0","Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"LocationType":"Physical location","MostSpecificLocation":"Buenos Aires Province","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","County":null,"PostalCode":null,"LocationId":"141965","BaseCampaignId":"531016227","AllCostPerConversion":null,"AllReturnOnAdSpend":null,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":0.0,"TopImpressionRatePercent":"100.00","AllConversionsQualified":"0.00","ViewThroughConversionsQualified":null,"Neighborhood":null,"ViewThroughRevenue":"0.00","CampaignType":"Search & content","AssetGroupId":null,"AssetGroupName":null,"AssetGroupStatus":null,"Clicks":0,"Spend":0.0,"Impressions":1,"CostPerConversion":null,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1699953673210} {"stream":"age_gender_audience_report_daily","data":{"AccountId":180519267,"AgeGroup":"Unknown","Gender":"Unknown","TimePeriod":"2023-11-07","AllConversions":0,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","CampaignId":531016227,"AdGroupName":"keywords","AdGroupId":1356799861840328,"AdDistribution":"Search","Impressions":3,"Clicks":1,"Conversions":0.0,"Spend":0.79,"Revenue":0.0,"ExtendedCost":0.0,"Assists":0,"Language":"German","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","BaseCampaignId":"531016227","AllRevenue":0.0,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":33.33,"TopImpressionRatePercent":100.0,"ConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0},"emitted_at":1699954406862} diff --git a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml index 63f2a336f97f..102607de6664 100644 --- a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml +++ b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml @@ -16,7 +16,7 @@ data: connectorSubtype: api connectorType: source definitionId: 47f25999-dd5e-4636-8c39-e7cea2453331 - dockerImageTag: 2.0.0 + dockerImageTag: 2.0.1 dockerRepository: airbyte/source-bing-ads documentationUrl: https://docs.airbyte.com/integrations/sources/bing-ads githubIssueLabel: source-bing-ads diff --git a/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py b/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py index 2be5dd890834..5a0d1c5818a3 100644 --- a/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py +++ b/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py @@ -154,7 +154,7 @@ def request_params( def get_start_date(self, stream_state: Mapping[str, Any] = None, account_id: str = None): if stream_state and account_id: if stream_state.get(account_id, {}).get(self.cursor_field): - return pendulum.parse(self.get_report_record_timestamp(stream_state[account_id][self.cursor_field])) + return pendulum.parse(stream_state[account_id][self.cursor_field]) return self.client.reports_start_date diff --git a/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py b/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py index 44c5329bd83a..a10ba7f94022 100644 --- a/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py +++ b/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py @@ -200,7 +200,7 @@ def test_get_report_record_timestamp_hourly(stream_report_hourly_cls): def test_report_get_start_date_wo_stream_state(): expected_start_date = "2020-01-01" - test_report = TestReport() + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.client.reports_start_date = "2020-01-01" stream_state = {} account_id = "123" @@ -209,20 +209,18 @@ def test_report_get_start_date_wo_stream_state(): def test_report_get_start_date_with_stream_state(): expected_start_date = pendulum.parse("2023-04-17T21:29:57") - test_report = TestReport() - test_report.cursor_field = "cursor_field" + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.client.reports_start_date = "2020-01-01" - stream_state = {"123": {"cursor_field": "2023-04-17T21:29:57+00:00"}} + stream_state = {"123": {"TimePeriod": "2023-04-17T21:29:57+00:00"}} account_id = "123" assert expected_start_date == test_report.get_start_date(stream_state, account_id) def test_report_get_start_date_performance_report_with_stream_state(): expected_start_date = pendulum.parse("2023-04-07T21:29:57") - test_report = TestPerformanceReport() - test_report.cursor_field = "cursor_field" + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.config = {"lookback_window": 10} - stream_state = {"123": {"cursor_field": "2023-04-17T21:29:57+00:00"}} + stream_state = {"123": {"TimePeriod": "2023-04-17T21:29:57+00:00"}} account_id = "123" assert expected_start_date == test_report.get_start_date(stream_state, account_id) @@ -230,8 +228,7 @@ def test_report_get_start_date_performance_report_with_stream_state(): def test_report_get_start_date_performance_report_wo_stream_state(): days_to_subtract = 10 reports_start_date = pendulum.parse("2021-04-07T00:00:00") - test_report = TestPerformanceReport() - test_report.cursor_field = "cursor_field" + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.client.reports_start_date = reports_start_date test_report.config = {"lookback_window": days_to_subtract} stream_state = {} diff --git a/airbyte-integrations/connectors/source-cart/Dockerfile b/airbyte-integrations/connectors/source-cart/Dockerfile index 4323b70cdd86..d526813a96ab 100644 --- a/airbyte-integrations/connectors/source-cart/Dockerfile +++ b/airbyte-integrations/connectors/source-cart/Dockerfile @@ -21,5 +21,5 @@ COPY source_cart ./source_cart ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.1 +LABEL io.airbyte.version=0.3.0 LABEL io.airbyte.name=airbyte/source-cart diff --git a/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml b/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml index 3ab3140291d3..52803b05481a 100644 --- a/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml @@ -1,38 +1,51 @@ # See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) # for more information about how to configure these tests connector_image: airbyte/source-cart:dev -tests: +test_strictness_level: low +acceptance_tests: spec: - - spec_path: "source_cart/spec.json" - backward_compatibility_tests_config: - disable_for_version: "0.1.6" + tests: + - spec_path: "source_cart/spec.json" + backward_compatibility_tests_config: + disable_for_version: "0.1.6" connection: - - config_path: "secrets/config.json" - status: "succeed" - - config_path: "integration_tests/invalid_config.json" - status: "failed" - timeout_seconds: 180 + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + timeout_seconds: 180 discovery: - - config_path: "secrets/config.json" - backward_compatibility_tests_config: - disable_for_version: "0.1.6" + tests: + - config_path: "secrets/config.json" + backward_compatibility_tests_config: + disable_for_version: "0.1.6" basic_read: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - timeout_seconds: 1800 + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 1800 + empty_streams: + - name: "order_payments" + bypass_reason: "no data" + - name: "products" + bypass_reason: "no data" incremental: - - config_path: "secrets/config_central_api_router.json" - configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" - future_state_path: "integration_tests/abnormal_state.json" - timeout_seconds: 1800 - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - future_state_path: "integration_tests/abnormal_state.json" - timeout_seconds: 1800 + tests: + # - config_path: "secrets/config_central_api_router.json" + # configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" + # future_state_path: "integration_tests/abnormal_state.json" + # timeout_seconds: 1800 + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + future_state: + future_state_path: "integration_tests/abnormal_state.json" + timeout_seconds: 1800 full_refresh: - - config_path: "secrets/config_central_api_router.json" - configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" - timeout_seconds: 1800 - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - timeout_seconds: 1800 + tests: + - config_path: "secrets/config_central_api_router.json" + configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" + timeout_seconds: 1800 + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 1800 diff --git a/airbyte-integrations/connectors/source-cart/metadata.yaml b/airbyte-integrations/connectors/source-cart/metadata.yaml index 8f75a5db4ceb..191e92810614 100644 --- a/airbyte-integrations/connectors/source-cart/metadata.yaml +++ b/airbyte-integrations/connectors/source-cart/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: api connectorType: source definitionId: bb1a6d31-6879-4819-a2bd-3eed299ea8e2 - dockerImageTag: 0.2.1 + dockerImageTag: 0.3.0 dockerRepository: airbyte/source-cart githubIssueLabel: source-cart icon: cart.svg @@ -10,7 +10,7 @@ data: name: Cart.com registries: cloud: - enabled: false + enabled: true oss: enabled: true releaseStage: alpha diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json index 98fe097f1779..e7d377656a16 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -10,6 +12,9 @@ "address_line_1": { "type": ["string", "null"] }, + "address_type": { + "type": ["string", "null"] + }, "address_line_2": { "type": ["string", "null"] }, diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json index 8520252b4485..23c4e341dce3 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -7,6 +9,12 @@ "customer_number": { "type": ["string", "null"] }, + "credit_limit": { + "type": ["string", "null"] + }, + "payment_net_term": { + "type": ["string", "null"] + }, "last_name": { "type": ["string", "null"] }, @@ -38,7 +46,13 @@ "type": ["integer", "null"] }, "is_no_tax_customer": { - "type": "boolean" + "type": ["boolean", "null"] + }, + "is_inactive": { + "type": ["boolean", "null"] + }, + "lock_default_address": { + "type": ["boolean", "null"] }, "comments": { "type": ["string", "null"] diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json index e803c78c5ac1..b7223e79fd55 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -81,6 +83,69 @@ }, "warehouse_id": { "type": ["integer", "null"] + }, + "configuration": { + "type": ["string", "null"] + }, + "description": { + "type": ["string", "null"] + }, + "discount_amount": { + "type": ["number", "null"] + }, + "discount_percentage": { + "type": ["number", "null"] + }, + "fitment": { + "type": ["string", "null"] + }, + "is_non_shipping_item": { + "type": ["boolean", "null"] + }, + "item_number_full": { + "type": ["string", "null"] + }, + "order_shipping_address_id": { + "type": ["string", "null"] + }, + "personalizations": { + "type": ["array", "null"] + }, + "selected_shipping_method": { + "type": ["string", "null"] + }, + "selected_shipping_method_id": { + "type": ["string", "null"] + }, + "selected_shipping_provider_service": { + "type": ["string", "null"] + }, + "shipping_total": { + "type": ["string", "null"] + }, + "status": { + "type": ["string", "null"] + }, + "tax": { + "type": ["number", "null"] + }, + "tax_code": { + "type": ["string", "null"] + }, + "variant_inventory_id": { + "type": ["string", "null"] + }, + "shipping_classification_code": { + "type": ["string", "null"] + }, + "variants": { + "type": ["array", "null"] + }, + "vendor_store_id": { + "type": ["integer", "null"] + }, + "weight_unit": { + "type": ["string", "null"] } } } diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json index f4dee9743008..ab2f2c844d71 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json index eb7182c2f368..b77422eb2f54 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" }, "name": { "type": ["null", "string"] }, @@ -13,6 +15,7 @@ "created_at": { "type": ["null", "string"] }, "is_fully_refunded": { "type": ["null", "boolean"] }, "is_partially_refunded": { "type": ["null", "boolean"] }, - "is_quote_status": { "type": ["null", "boolean"] } + "is_quote_status": { "type": ["null", "boolean"] }, + "is_partially_shipped": { "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json index e5e7091efda4..f1ebdb8b5b9d 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -7,6 +9,23 @@ "customer_id": { "type": ["integer", "null"] }, + "delivery_tax": { + "type": ["string", "null"] + }, + "entered_by_type": { + "type": ["string", "null"] + }, + "shipping_selections": { + "type": ["array", "null"], + "items": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": {} + } + }, + "sales_agent_user_id": { + "type": ["string", "null"] + }, "customer_type_id": { "type": ["integer", "null"] }, diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json index ed1473eb08a6..5d0ac08fa31a 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" diff --git a/airbyte-integrations/connectors/source-chargebee/metadata.yaml b/airbyte-integrations/connectors/source-chargebee/metadata.yaml index 52df4ecc5974..854035925e39 100644 --- a/airbyte-integrations/connectors/source-chargebee/metadata.yaml +++ b/airbyte-integrations/connectors/source-chargebee/metadata.yaml @@ -1,6 +1,6 @@ data: ab_internal: - ql: 400 + ql: 200 sl: 200 allowedHosts: hosts: diff --git a/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl index 7ccbbee8037c..9b65df5c424e 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl @@ -21,10 +21,10 @@ {"stream":"projects_v2","data":{"closed":false,"created_at":"2023-09-25T18:34:52Z","closed_at":null,"updated_at":"2023-09-25T18:35:45Z","creator":{"avatarUrl":"https://avatars.githubusercontent.com/u/92915184?u=e53c87d81ec6fb0596bc0f75e12e84e8f0df8d83&v=4","login":"airbyteio","resourcePath":"/airbyteio","url":"https://github.com/airbyteio"},"node_id":"PVT_kwDOA4_XW84AV7NS","id":5747538,"number":58,"public":false,"readme":"# Title\nintegration test project","short_description":"integration test project description","template":false,"title":"integration test project","url":"https://github.com/orgs/airbytehq/projects/58","viewerCanClose":true,"viewerCanReopen":true,"viewerCanUpdate":true,"owner_id":"MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3","repository":"airbytehq/integration-test"},"emitted_at":1695666959656} {"stream":"pull_request_comment_reactions","data":{"node_id":"MDMyOlB1bGxSZXF1ZXN0UmV2aWV3Q29tbWVudFJlYWN0aW9uMTI3MDUxNDM4","id":127051438,"content":"HEART","created_at":"2021-09-06T11:37:25Z","user":{"node_id":"MDQ6VXNlcjM0MTAzMTI1","id":34103125,"login":"yevhenii-ldv","avatar_url":"https://avatars.githubusercontent.com/u/34103125?u=3e49bb73177a9f70896e3d49b34656ab659c70a5&v=4","html_url":"https://github.com/yevhenii-ldv","site_admin":false,"type":"User"},"repository":"airbytehq/integration-test","comment_id":699253726},"emitted_at":1677668755106} {"stream":"pull_request_commits","data":{"sha":"00a74695eb754865a552196ee158a87f0b9dcff7","node_id":"MDY6Q29tbWl0NDAwMDUyMjEzOjAwYTc0Njk1ZWI3NTQ4NjVhNTUyMTk2ZWUxNThhODdmMGI5ZGNmZjc=","commit":{"author":{"name":"Arthur Galuza","email":"a.galuza@exaft.com","date":"2021-08-27T15:41:11Z"},"committer":{"name":"Arthur Galuza","email":"a.galuza@exaft.com","date":"2021-08-27T15:41:11Z"},"message":"commit number 0","tree":{"sha":"3f2a52f90f9acc30359b00065e5b989267fef1f5","url":"https://api.github.com/repos/airbytehq/integration-test/git/trees/3f2a52f90f9acc30359b00065e5b989267fef1f5"},"url":"https://api.github.com/repos/airbytehq/integration-test/git/commits/00a74695eb754865a552196ee158a87f0b9dcff7","comment_count":0,"verification":{"verified":false,"reason":"unsigned","signature":null,"payload":null}},"url":"https://api.github.com/repos/airbytehq/integration-test/commits/00a74695eb754865a552196ee158a87f0b9dcff7","html_url":"https://github.com/airbytehq/integration-test/commit/00a74695eb754865a552196ee158a87f0b9dcff7","comments_url":"https://api.github.com/repos/airbytehq/integration-test/commits/00a74695eb754865a552196ee158a87f0b9dcff7/comments","author":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"committer":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"parents":[{"sha":"978753aeb56f7b49872279d1b491411a6235aa90","url":"https://api.github.com/repos/airbytehq/integration-test/commits/978753aeb56f7b49872279d1b491411a6235aa90","html_url":"https://github.com/airbytehq/integration-test/commit/978753aeb56f7b49872279d1b491411a6235aa90"}],"repository":"airbytehq/integration-test","pull_number":5},"emitted_at":1677668756160} -{"stream":"pull_request_stats","data":{"node_id":"MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2","id":721435506,"number":5,"updated_at":"2021-08-27T15:53:14Z","changed_files":5,"deletions":0,"additions":5,"merged":false,"mergeable":"MERGEABLE","can_be_rebased":true,"maintainer_can_modify":false,"merge_state_status":"BLOCKED","comments":0,"commits":5,"review_comments":0,"merged_by":null,"repository":"airbytehq/integration-test"},"emitted_at":1677668759962} -{"stream": "pull_requests", "data": {"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5", "id": 721435506, "node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "html_url": "https://github.com/airbytehq/integration-test/pull/5", "diff_url": "https://github.com/airbytehq/integration-test/pull/5.diff", "patch_url": "https://github.com/airbytehq/integration-test/pull/5.patch", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5", "number": 5, "state": "open", "locked": false, "title": "New PR from feature/branch_4", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "body": null, "created_at": "2021-08-27T15:43:40Z", "updated_at": "2021-08-27T15:53:14Z", "closed_at": null, "merged_at": null, "merge_commit_sha": "191309e3da8b36705156348ae73f4dca836533f9", "assignee": null, "assignees": [], "requested_reviewers": [], "requested_teams": [], "labels": [{"id": 3295756566, "node_id": "MDU6TGFiZWwzMjk1NzU2NTY2", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/bug", "name": "bug", "color": "d73a4a", "default": true, "description": "Something isn't working"}, {"id": 3300346197, "node_id": "MDU6TGFiZWwzMzAwMzQ2MTk3", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/critical", "name": "critical", "color": "ededed", "default": false, "description": null}], "milestone": null, "draft": false, "commits_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits", "review_comments_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments", "review_comment_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "head": {"label": "airbytehq:feature/branch_4", "ref": "feature/branch_4", "sha": "31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo_id": 400052213}, "base": {"label": "airbytehq:master", "ref": "master", "sha": "978753aeb56f7b49872279d1b491411a6235aa90", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments", "created_at": "2021-08-26T05:32:43Z", "updated_at": "2022-07-08T01:27:13Z", "pushed_at": "2023-05-03T16:40:56Z", "git_url": "git://github.com/airbytehq/integration-test.git", "ssh_url": "git@github.com:airbytehq/integration-test.git", "clone_url": "https://github.com/airbytehq/integration-test.git", "svn_url": "https://github.com/airbytehq/integration-test", "homepage": null, "size": 11, "stargazers_count": 4, "watchers_count": 4, "language": null, "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": true, "has_pages": false, "has_discussions": false, "forks_count": 2, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 10, "license": null, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [], "visibility": "public", "forks": 2, "open_issues": 10, "watchers": 4, "default_branch": "master"}, "repo_id": null}, "_links": {"self": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5"}, "html": {"href": "https://github.com/airbytehq/integration-test/pull/5"}, "issue": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5"}, "comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments"}, "review_comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments"}, "review_comment": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}"}, "commits": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits"}, "statuses": {"href": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083"}}, "author_association": "CONTRIBUTOR", "auto_merge": null, "active_lock_reason": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1685698519242} +{"stream": "pull_request_stats", "data": {"node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "id": 721435506, "number": 5, "updated_at": "2023-11-16T14:38:58Z", "changed_files": 5, "deletions": 0, "additions": 5, "merged": false, "mergeable": "MERGEABLE", "can_be_rebased": false, "maintainer_can_modify": false, "merge_state_status": "BLOCKED", "comments": 0, "commits": 5, "review_comments": 0, "merged_by": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1700557306144} +{"stream": "pull_requests", "data": {"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5", "id": 721435506, "node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "html_url": "https://github.com/airbytehq/integration-test/pull/5", "diff_url": "https://github.com/airbytehq/integration-test/pull/5.diff", "patch_url": "https://github.com/airbytehq/integration-test/pull/5.patch", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5", "number": 5, "state": "closed", "locked": false, "title": "New PR from feature/branch_4", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "body": null, "created_at": "2021-08-27T15:43:40Z", "updated_at": "2023-11-16T14:38:58Z", "closed_at": "2023-11-16T14:38:58Z", "merged_at": null, "merge_commit_sha": "191309e3da8b36705156348ae73f4dca836533f9", "assignee": null, "assignees": [], "requested_reviewers": [], "requested_teams": [], "labels": [{"id": 3295756566, "node_id": "MDU6TGFiZWwzMjk1NzU2NTY2", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/bug", "name": "bug", "color": "d73a4a", "default": true, "description": "Something isn't working"}, {"id": 3300346197, "node_id": "MDU6TGFiZWwzMzAwMzQ2MTk3", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/critical", "name": "critical", "color": "ededed", "default": false, "description": null}], "milestone": null, "draft": false, "commits_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits", "review_comments_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments", "review_comment_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "head": {"label": "airbytehq:feature/branch_4", "ref": "feature/branch_4", "sha": "31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo_id": 400052213}, "base": {"label": "airbytehq:master", "ref": "master", "sha": "978753aeb56f7b49872279d1b491411a6235aa90", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments", "created_at": "2021-08-26T05:32:43Z", "updated_at": "2023-11-16T14:48:53Z", "pushed_at": "2023-05-03T16:40:56Z", "git_url": "git://github.com/airbytehq/integration-test.git", "ssh_url": "git@github.com:airbytehq/integration-test.git", "clone_url": "https://github.com/airbytehq/integration-test.git", "svn_url": "https://github.com/airbytehq/integration-test", "homepage": null, "size": 11, "stargazers_count": 4, "watchers_count": 4, "language": null, "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": true, "has_pages": false, "has_discussions": false, "forks_count": 2, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 6, "license": null, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [], "visibility": "public", "forks": 2, "open_issues": 6, "watchers": 4, "default_branch": "master"}, "repo_id": null}, "_links": {"self": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5"}, "html": {"href": "https://github.com/airbytehq/integration-test/pull/5"}, "issue": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5"}, "comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments"}, "review_comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments"}, "review_comment": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}"}, "commits": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits"}, "statuses": {"href": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083"}}, "author_association": "CONTRIBUTOR", "auto_merge": null, "active_lock_reason": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1700585060024} {"stream":"releases","data":{"url":"https://api.github.com/repos/airbytehq/integration-test/releases/48581586","assets_url":"https://api.github.com/repos/airbytehq/integration-test/releases/48581586/assets","upload_url":"https://uploads.github.com/repos/airbytehq/integration-test/releases/48581586/assets{?name,label}","html_url":"https://github.com/airbytehq/integration-test/releases/tag/dev-0.9","id":48581586,"author":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"node_id":"MDc6UmVsZWFzZTQ4NTgxNTg2","tag_name":"dev-0.9","target_commitish":"master","name":"9 global release","draft":false,"prerelease":false,"created_at":"2021-08-27T07:03:09Z","published_at":"2021-08-27T15:43:53Z","assets":[],"tarball_url":"https://api.github.com/repos/airbytehq/integration-test/tarball/dev-0.9","zipball_url":"https://api.github.com/repos/airbytehq/integration-test/zipball/dev-0.9","body":"","repository":"airbytehq/integration-test"},"emitted_at":1677668760424} -{"stream":"repositories","data":{"id":283046497,"node_id":"MDEwOlJlcG9zaXRvcnkyODMwNDY0OTc=","name":"airbyte","full_name":"airbytehq/airbyte","private":false,"owner":{"login":"airbytehq","id":59758427,"node_id":"MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3","avatar_url":"https://avatars.githubusercontent.com/u/59758427?v=4","gravatar_id":"","url":"https://api.github.com/users/airbytehq","html_url":"https://github.com/airbytehq","followers_url":"https://api.github.com/users/airbytehq/followers","following_url":"https://api.github.com/users/airbytehq/following{/other_user}","gists_url":"https://api.github.com/users/airbytehq/gists{/gist_id}","starred_url":"https://api.github.com/users/airbytehq/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/airbytehq/subscriptions","organizations_url":"https://api.github.com/users/airbytehq/orgs","repos_url":"https://api.github.com/users/airbytehq/repos","events_url":"https://api.github.com/users/airbytehq/events{/privacy}","received_events_url":"https://api.github.com/users/airbytehq/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/airbytehq/airbyte","description":"Data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes.","fork":false,"url":"https://api.github.com/repos/airbytehq/airbyte","forks_url":"https://api.github.com/repos/airbytehq/airbyte/forks","keys_url":"https://api.github.com/repos/airbytehq/airbyte/keys{/key_id}","collaborators_url":"https://api.github.com/repos/airbytehq/airbyte/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/airbytehq/airbyte/teams","hooks_url":"https://api.github.com/repos/airbytehq/airbyte/hooks","issue_events_url":"https://api.github.com/repos/airbytehq/airbyte/issues/events{/number}","events_url":"https://api.github.com/repos/airbytehq/airbyte/events","assignees_url":"https://api.github.com/repos/airbytehq/airbyte/assignees{/user}","branches_url":"https://api.github.com/repos/airbytehq/airbyte/branches{/branch}","tags_url":"https://api.github.com/repos/airbytehq/airbyte/tags","blobs_url":"https://api.github.com/repos/airbytehq/airbyte/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/airbytehq/airbyte/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/airbytehq/airbyte/git/refs{/sha}","trees_url":"https://api.github.com/repos/airbytehq/airbyte/git/trees{/sha}","statuses_url":"https://api.github.com/repos/airbytehq/airbyte/statuses/{sha}","languages_url":"https://api.github.com/repos/airbytehq/airbyte/languages","stargazers_url":"https://api.github.com/repos/airbytehq/airbyte/stargazers","contributors_url":"https://api.github.com/repos/airbytehq/airbyte/contributors","subscribers_url":"https://api.github.com/repos/airbytehq/airbyte/subscribers","subscription_url":"https://api.github.com/repos/airbytehq/airbyte/subscription","commits_url":"https://api.github.com/repos/airbytehq/airbyte/commits{/sha}","git_commits_url":"https://api.github.com/repos/airbytehq/airbyte/git/commits{/sha}","comments_url":"https://api.github.com/repos/airbytehq/airbyte/comments{/number}","issue_comment_url":"https://api.github.com/repos/airbytehq/airbyte/issues/comments{/number}","contents_url":"https://api.github.com/repos/airbytehq/airbyte/contents/{+path}","compare_url":"https://api.github.com/repos/airbytehq/airbyte/compare/{base}...{head}","merges_url":"https://api.github.com/repos/airbytehq/airbyte/merges","archive_url":"https://api.github.com/repos/airbytehq/airbyte/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/airbytehq/airbyte/downloads","issues_url":"https://api.github.com/repos/airbytehq/airbyte/issues{/number}","pulls_url":"https://api.github.com/repos/airbytehq/airbyte/pulls{/number}","milestones_url":"https://api.github.com/repos/airbytehq/airbyte/milestones{/number}","notifications_url":"https://api.github.com/repos/airbytehq/airbyte/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/airbytehq/airbyte/labels{/name}","releases_url":"https://api.github.com/repos/airbytehq/airbyte/releases{/id}","deployments_url":"https://api.github.com/repos/airbytehq/airbyte/deployments","created_at":"2020-07-27T23:55:54Z","updated_at":"2023-09-22T09:10:42Z","pushed_at":"2023-09-22T09:17:52Z","git_url":"git://github.com/airbytehq/airbyte.git","ssh_url":"git@github.com:airbytehq/airbyte.git","clone_url":"https://github.com/airbytehq/airbyte.git","svn_url":"https://github.com/airbytehq/airbyte","homepage":"https://airbyte.com","size":396556,"stargazers_count":11806,"watchers_count":11806,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":false,"has_pages":false,"has_discussions":true,"forks_count":3071,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":5029,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":["airbyte","bigquery","change-data-capture","data","data-analysis","data-collection","data-engineering","data-ingestion","data-integration","elt","etl","java","pipeline","python","redshift","snowflake"],"visibility":"public","forks":3071,"open_issues":5029,"watchers":11806,"default_branch":"master","permissions":{"admin":true,"maintain":true,"push":true,"triage":true,"pull":true},"security_and_analysis":{"secret_scanning":{"status":"disabled"},"secret_scanning_push_protection":{"status":"disabled"},"dependabot_security_updates":{"status":"enabled"}},"organization":"airbytehq"},"emitted_at":1695374353086} +{"stream": "repositories", "data": {"id": 283046497, "node_id": "MDEwOlJlcG9zaXRvcnkyODMwNDY0OTc=", "name": "airbyte", "full_name": "airbytehq/airbyte", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/airbyte", "description": "Data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes.", "fork": false, "url": "https://api.github.com/repos/airbytehq/airbyte", "forks_url": "https://api.github.com/repos/airbytehq/airbyte/forks", "keys_url": "https://api.github.com/repos/airbytehq/airbyte/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/airbyte/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/airbyte/teams", "hooks_url": "https://api.github.com/repos/airbytehq/airbyte/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/airbyte/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/airbyte/events", "assignees_url": "https://api.github.com/repos/airbytehq/airbyte/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/airbyte/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/airbyte/tags", "blobs_url": "https://api.github.com/repos/airbytehq/airbyte/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/airbyte/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/airbyte/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/airbyte/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/airbyte/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/airbyte/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/airbyte/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/airbyte/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/airbyte/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/airbyte/subscription", "commits_url": "https://api.github.com/repos/airbytehq/airbyte/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/airbyte/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/airbyte/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/airbyte/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/airbyte/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/airbyte/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/airbyte/merges", "archive_url": "https://api.github.com/repos/airbytehq/airbyte/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/airbyte/downloads", "issues_url": "https://api.github.com/repos/airbytehq/airbyte/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/airbyte/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/airbyte/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/airbyte/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/airbyte/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/airbyte/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/airbyte/deployments", "created_at": "2020-07-27T23:55:54Z", "updated_at": "2023-11-21T14:55:05Z", "pushed_at": "2023-11-21T16:55:37Z", "git_url": "git://github.com/airbytehq/airbyte.git", "ssh_url": "git@github.com:airbytehq/airbyte.git", "clone_url": "https://github.com/airbytehq/airbyte.git", "svn_url": "https://github.com/airbytehq/airbyte", "homepage": "https://airbyte.com", "size": 455477, "stargazers_count": 12328, "watchers_count": 12328, "language": "Python", "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": false, "has_pages": false, "has_discussions": true, "forks_count": 3226, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 5053, "license": {"key": "other", "name": "Other", "spdx_id": "NOASSERTION", "url": null, "node_id": "MDc6TGljZW5zZTA="}, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": ["airbyte", "bigquery", "change-data-capture", "data", "data-analysis", "data-collection", "data-engineering", "data-ingestion", "data-integration", "elt", "etl", "java", "pipeline", "python", "redshift", "snowflake"], "visibility": "public", "forks": 3226, "open_issues": 5053, "watchers": 12328, "default_branch": "master", "permissions": {"admin": true, "maintain": true, "push": true, "triage": true, "pull": true}, "security_and_analysis": {"secret_scanning": {"status": "disabled"}, "secret_scanning_push_protection": {"status": "disabled"}, "dependabot_security_updates": {"status": "enabled"}, "secret_scanning_validity_checks": {"status": "disabled"}}, "organization": "airbytehq"}, "emitted_at": 1700585836592} {"stream":"review_comments","data":{"url":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726","pull_request_review_id":742633128,"id":699253726,"node_id":"MDI0OlB1bGxSZXF1ZXN0UmV2aWV3Q29tbWVudDY5OTI1MzcyNg==","diff_hunk":"@@ -0,0 +1 @@\n+text_for_file_","path":"github_sources/file_1.txt","commit_id":"da5fa314f9b3a272d0aa47a453aec0f68a80cbae","original_commit_id":"da5fa314f9b3a272d0aa47a453aec0f68a80cbae","user":{"login":"yevhenii-ldv","id":34103125,"node_id":"MDQ6VXNlcjM0MTAzMTI1","avatar_url":"https://avatars.githubusercontent.com/u/34103125?v=4","gravatar_id":"","url":"https://api.github.com/users/yevhenii-ldv","html_url":"https://github.com/yevhenii-ldv","followers_url":"https://api.github.com/users/yevhenii-ldv/followers","following_url":"https://api.github.com/users/yevhenii-ldv/following{/other_user}","gists_url":"https://api.github.com/users/yevhenii-ldv/gists{/gist_id}","starred_url":"https://api.github.com/users/yevhenii-ldv/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/yevhenii-ldv/subscriptions","organizations_url":"https://api.github.com/users/yevhenii-ldv/orgs","repos_url":"https://api.github.com/users/yevhenii-ldv/repos","events_url":"https://api.github.com/users/yevhenii-ldv/events{/privacy}","received_events_url":"https://api.github.com/users/yevhenii-ldv/received_events","type":"User","site_admin":false},"body":"Good point","created_at":"2021-08-31T12:01:15Z","updated_at":"2021-08-31T12:01:15Z","html_url":"https://github.com/airbytehq/integration-test/pull/4#discussion_r699253726","pull_request_url":"https://api.github.com/repos/airbytehq/integration-test/pulls/4","author_association":"MEMBER","_links":{"self":{"href":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726"},"html":{"href":"https://github.com/airbytehq/integration-test/pull/4#discussion_r699253726"},"pull_request":{"href":"https://api.github.com/repos/airbytehq/integration-test/pulls/4"}},"reactions":{"url":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726/reactions","total_count":1,"+1":0,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":1,"rocket":0,"eyes":0},"start_line":null,"original_start_line":null,"start_side":null,"line":1,"original_line":1,"side":"RIGHT","original_position":1,"position":1,"subject_type":"line","repository":"airbytehq/integration-test"},"emitted_at":1695375624151} {"stream":"reviews","data":{"node_id":"MDE3OlB1bGxSZXF1ZXN0UmV2aWV3NzQwNjU5Nzk4","id":740659798,"body":"Review commit for branch feature/branch_4","state":"COMMENTED","html_url":"https://github.com/airbytehq/integration-test/pull/5#pullrequestreview-740659798","author_association":"CONTRIBUTOR","submitted_at":"2021-08-27T15:43:42Z","created_at":"2021-08-27T15:43:42Z","updated_at":"2021-08-27T15:43:42Z","user":{"node_id":"MDQ6VXNlcjc0MzkwMQ==","id":743901,"login":"gaart","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","html_url":"https://github.com/gaart","site_admin":false,"type":"User"},"repository":"airbytehq/integration-test","pull_request_url":"https://github.com/airbytehq/integration-test/pull/5","commit_id":"31a3e3f19fefce60fba6bfc69dd2b3fb5195a083","_links":{"html":{"href":"https://github.com/airbytehq/integration-test/pull/5#pullrequestreview-740659798"},"pull_request":{"href":"https://github.com/airbytehq/integration-test/pull/5"}}},"emitted_at":1677668764954} {"stream":"stargazers","data":{"starred_at":"2021-08-27T16:23:34Z","user":{"login":"VasylLazebnyk","id":68591643,"node_id":"MDQ6VXNlcjY4NTkxNjQz","avatar_url":"https://avatars.githubusercontent.com/u/68591643?v=4","gravatar_id":"","url":"https://api.github.com/users/VasylLazebnyk","html_url":"https://github.com/VasylLazebnyk","followers_url":"https://api.github.com/users/VasylLazebnyk/followers","following_url":"https://api.github.com/users/VasylLazebnyk/following{/other_user}","gists_url":"https://api.github.com/users/VasylLazebnyk/gists{/gist_id}","starred_url":"https://api.github.com/users/VasylLazebnyk/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/VasylLazebnyk/subscriptions","organizations_url":"https://api.github.com/users/VasylLazebnyk/orgs","repos_url":"https://api.github.com/users/VasylLazebnyk/repos","events_url":"https://api.github.com/users/VasylLazebnyk/events{/privacy}","received_events_url":"https://api.github.com/users/VasylLazebnyk/received_events","type":"User","site_admin":false},"repository":"airbytehq/integration-test","user_id":68591643},"emitted_at":1677668765231} @@ -32,8 +32,8 @@ {"stream":"teams", "data": {"name": "Zazmic", "id": 4432406, "node_id": "MDQ6VGVhbTQ0MzI0MDY=", "slug": "zazmic", "description": "", "privacy": "closed", "notification_setting": "notifications_enabled", "url": "https://api.github.com/organizations/59758427/team/4432406", "html_url": "https://github.com/orgs/airbytehq/teams/zazmic", "members_url": "https://api.github.com/organizations/59758427/team/4432406/members{/member}", "repositories_url": "https://api.github.com/organizations/59758427/team/4432406/repos", "permission": "pull", "parent": null, "organization": "airbytehq"}, "emitted_at": 1681307598422} {"stream":"users","data":{"login":"AirbyteEricksson","id":101604444,"node_id":"U_kgDOBg5cXA","avatar_url":"https://avatars.githubusercontent.com/u/101604444?v=4","gravatar_id":"","url":"https://api.github.com/users/AirbyteEricksson","html_url":"https://github.com/AirbyteEricksson","followers_url":"https://api.github.com/users/AirbyteEricksson/followers","following_url":"https://api.github.com/users/AirbyteEricksson/following{/other_user}","gists_url":"https://api.github.com/users/AirbyteEricksson/gists{/gist_id}","starred_url":"https://api.github.com/users/AirbyteEricksson/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/AirbyteEricksson/subscriptions","organizations_url":"https://api.github.com/users/AirbyteEricksson/orgs","repos_url":"https://api.github.com/users/AirbyteEricksson/repos","events_url":"https://api.github.com/users/AirbyteEricksson/events{/privacy}","received_events_url":"https://api.github.com/users/AirbyteEricksson/received_events","type":"User","site_admin":false,"organization":"airbytehq"},"emitted_at":1677668766142} {"stream":"workflows","data":{"id":22952989,"node_id":"W_kwDOF9hP9c4BXjwd","name":"Pull Request Labeler","path":".github/workflows/labeler.yml","state":"active","created_at":"2022-03-30T21:30:37.000+02:00","updated_at":"2022-03-30T21:30:37.000+02:00","url":"https://api.github.com/repos/airbytehq/integration-test/actions/workflows/22952989","html_url":"https://github.com/airbytehq/integration-test/blob/master/.github/workflows/labeler.yml","badge_url":"https://github.com/airbytehq/integration-test/workflows/Pull%20Request%20Labeler/badge.svg","repository":"airbytehq/integration-test"},"emitted_at":1677668766580} -{"stream": "workflow_runs", "data": {"id": 3184250176, "name": "Pull Request Labeler", "node_id": "WFR_kwLOF9hP9c69y81A", "head_branch": "feature/branch_5", "head_sha": "f71e5f6894578148d52b487dff07e55804fd9cfd", "path": ".github/workflows/labeler.yml", "display_title": "New PR from feature/branch_5", "run_number": 3, "event": "pull_request_target", "status": "completed", "conclusion": "success", "workflow_id": 22952989, "check_suite_id": 8611635614, "check_suite_node_id": "CS_kwDOF9hP9c8AAAACAUshng", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/3184250176", "pull_requests": [{"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/14", "id": 984835098, "number": 14, "head": {"ref": "feature/branch_5", "sha": "f71e5f6894578148d52b487dff07e55804fd9cfd", "repo": {"id": 400052213, "url": "https://api.github.com/repos/airbytehq/integration-test", "name": "integration-test"}}, "base": {"ref": "master", "sha": "a12c9379604f7b32e54e5459122aa48473f806ee", "repo": {"id": 400052213, "url": "https://api.github.com/repos/airbytehq/integration-test", "name": "integration-test"}}}], "created_at": "2022-10-04T17:41:18Z", "updated_at": "2023-11-08T19:58:29Z", "actor": {"login": "grubberr", "id": 195743, "node_id": "MDQ6VXNlcjE5NTc0Mw==", "avatar_url": "https://avatars.githubusercontent.com/u/195743?v=4", "gravatar_id": "", "url": "https://api.github.com/users/grubberr", "html_url": "https://github.com/grubberr", "followers_url": "https://api.github.com/users/grubberr/followers", "following_url": "https://api.github.com/users/grubberr/following{/other_user}", "gists_url": "https://api.github.com/users/grubberr/gists{/gist_id}", "starred_url": "https://api.github.com/users/grubberr/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/grubberr/subscriptions", "organizations_url": "https://api.github.com/users/grubberr/orgs", "repos_url": "https://api.github.com/users/grubberr/repos", "events_url": "https://api.github.com/users/grubberr/events{/privacy}", "received_events_url": "https://api.github.com/users/grubberr/received_events", "type": "User", "site_admin": false}, "run_attempt": 1, "referenced_workflows": [], "run_started_at": "2022-10-04T17:41:18Z", "triggering_actor": {"login": "grubberr", "id": 195743, "node_id": "MDQ6VXNlcjE5NTc0Mw==", "avatar_url": "https://avatars.githubusercontent.com/u/195743?v=4", "gravatar_id": "", "url": "https://api.github.com/users/grubberr", "html_url": "https://github.com/grubberr", "followers_url": "https://api.github.com/users/grubberr/followers", "following_url": "https://api.github.com/users/grubberr/following{/other_user}", "gists_url": "https://api.github.com/users/grubberr/gists{/gist_id}", "starred_url": "https://api.github.com/users/grubberr/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/grubberr/subscriptions", "organizations_url": "https://api.github.com/users/grubberr/orgs", "repos_url": "https://api.github.com/users/grubberr/repos", "events_url": "https://api.github.com/users/grubberr/events{/privacy}", "received_events_url": "https://api.github.com/users/grubberr/received_events", "type": "User", "site_admin": false}, "jobs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/jobs", "logs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/logs", "check_suite_url": "https://api.github.com/repos/airbytehq/integration-test/check-suites/8611635614", "artifacts_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/artifacts", "cancel_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/cancel", "rerun_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/rerun", "previous_attempt_url": null, "workflow_url": "https://api.github.com/repos/airbytehq/integration-test/actions/workflows/22952989", "head_commit": {"id": "f71e5f6894578148d52b487dff07e55804fd9cfd", "tree_id": "bb78ec62be8c5c640010e7c897f40932ce59e725", "message": "file_5.txt updated\n\nSigned-off-by: Sergey Chvalyuk ", "timestamp": "2022-10-04T17:41:08Z", "author": {"name": "Sergey Chvalyuk", "email": "grubberr@gmail.com"}, "committer": {"name": "Sergey Chvalyuk", "email": "grubberr@gmail.com"}}, "repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}, "head_repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}}, "emitted_at": 1699644824401} -{"stream": "workflow_jobs", "data": {"id": 8705992587, "run_id": 3184250176, "workflow_name": "Pull Request Labeler", "head_branch": "feature/branch_5", "run_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176", "run_attempt": 1, "node_id": "CR_kwDOF9hP9c8AAAACBurniw", "head_sha": "f71e5f6894578148d52b487dff07e55804fd9cfd", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/jobs/8705992587", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/3184250176/job/8705992587", "status": "completed", "conclusion": "success", "created_at": "2022-10-04T17:41:20Z", "started_at": "2022-10-04T17:41:27Z", "completed_at": "2022-10-04T17:41:30Z", "name": "triage", "steps": [], "check_run_url": "https://api.github.com/repos/airbytehq/integration-test/check-runs/8705992587", "labels": ["ubuntu-latest"], "runner_id": 1, "runner_name": "Hosted Agent", "runner_group_id": 2, "runner_group_name": "GitHub Actions", "repository": "airbytehq/integration-test"}, "emitted_at": 1699646006344} +{"stream": "workflow_runs", "data": {"id": 4871166142, "name": "Pull Request Labeler", "node_id": "WFR_kwLOF9hP9c8AAAABIlgYvg", "head_branch": "arsenlosenko/test-pending-comments-in-pr", "head_sha": "47c7a128f28791f657265eb89cdf7ab28a0ff51b", "path": ".github/workflows/labeler.yml", "display_title": "Update .gitignore", "run_number": 4, "event": "pull_request_target", "status": "completed", "conclusion": "success", "workflow_id": 22952989, "check_suite_id": 12643387080, "check_suite_node_id": "CS_kwDOF9hP9c8AAAAC8ZrGyA", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/4871166142", "pull_requests": [], "created_at": "2023-05-03T11:05:23Z", "updated_at": "2023-05-03T11:05:36Z", "actor": {"login": "arsenlosenko", "id": 20901439, "node_id": "MDQ6VXNlcjIwOTAxNDM5", "avatar_url": "https://avatars.githubusercontent.com/u/20901439?v=4", "gravatar_id": "", "url": "https://api.github.com/users/arsenlosenko", "html_url": "https://github.com/arsenlosenko", "followers_url": "https://api.github.com/users/arsenlosenko/followers", "following_url": "https://api.github.com/users/arsenlosenko/following{/other_user}", "gists_url": "https://api.github.com/users/arsenlosenko/gists{/gist_id}", "starred_url": "https://api.github.com/users/arsenlosenko/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/arsenlosenko/subscriptions", "organizations_url": "https://api.github.com/users/arsenlosenko/orgs", "repos_url": "https://api.github.com/users/arsenlosenko/repos", "events_url": "https://api.github.com/users/arsenlosenko/events{/privacy}", "received_events_url": "https://api.github.com/users/arsenlosenko/received_events", "type": "User", "site_admin": false}, "run_attempt": 1, "referenced_workflows": [], "run_started_at": "2023-05-03T11:05:23Z", "triggering_actor": {"login": "arsenlosenko", "id": 20901439, "node_id": "MDQ6VXNlcjIwOTAxNDM5", "avatar_url": "https://avatars.githubusercontent.com/u/20901439?v=4", "gravatar_id": "", "url": "https://api.github.com/users/arsenlosenko", "html_url": "https://github.com/arsenlosenko", "followers_url": "https://api.github.com/users/arsenlosenko/followers", "following_url": "https://api.github.com/users/arsenlosenko/following{/other_user}", "gists_url": "https://api.github.com/users/arsenlosenko/gists{/gist_id}", "starred_url": "https://api.github.com/users/arsenlosenko/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/arsenlosenko/subscriptions", "organizations_url": "https://api.github.com/users/arsenlosenko/orgs", "repos_url": "https://api.github.com/users/arsenlosenko/repos", "events_url": "https://api.github.com/users/arsenlosenko/events{/privacy}", "received_events_url": "https://api.github.com/users/arsenlosenko/received_events", "type": "User", "site_admin": false}, "jobs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/jobs", "logs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/logs", "check_suite_url": "https://api.github.com/repos/airbytehq/integration-test/check-suites/12643387080", "artifacts_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/artifacts", "cancel_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/cancel", "rerun_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/rerun", "previous_attempt_url": null, "workflow_url": "https://api.github.com/repos/airbytehq/integration-test/actions/workflows/22952989", "head_commit": {"id": "47c7a128f28791f657265eb89cdf7ab28a0ff51b", "tree_id": "3cc1c41924b3cb67150684024877f6e02d283afb", "message": "Update .gitignore", "timestamp": "2023-05-03T11:04:11Z", "author": {"name": "Arsen Losenko", "email": "20901439+arsenlosenko@users.noreply.github.com"}, "committer": {"name": "Arsen Losenko", "email": "20901439+arsenlosenko@users.noreply.github.com"}}, "repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}, "head_repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}}, "emitted_at": 1700586521273} +{"stream": "workflow_jobs", "data": {"id": 13199605689, "run_id": 4871166142, "workflow_name": "Pull Request Labeler", "head_branch": "arsenlosenko/test-pending-comments-in-pr", "run_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142", "run_attempt": 1, "node_id": "CR_kwDOF9hP9c8AAAADEsH_uQ", "head_sha": "47c7a128f28791f657265eb89cdf7ab28a0ff51b", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/jobs/13199605689", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/4871166142/job/13199605689", "status": "completed", "conclusion": "success", "created_at": "2023-05-03T11:05:25Z", "started_at": "2023-05-03T11:05:30Z", "completed_at": "2023-05-03T11:05:34Z", "name": "triage", "steps": [{"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1, "started_at": "2023-05-03T14:05:30.000+03:00", "completed_at": "2023-05-03T14:05:31.000+03:00"}, {"name": "Run actions/labeler@v3", "status": "completed", "conclusion": "success", "number": 2, "started_at": "2023-05-03T14:05:32.000+03:00", "completed_at": "2023-05-03T14:05:32.000+03:00"}, {"name": "Complete job", "status": "completed", "conclusion": "success", "number": 3, "started_at": "2023-05-03T14:05:32.000+03:00", "completed_at": "2023-05-03T14:05:32.000+03:00"}], "check_run_url": "https://api.github.com/repos/airbytehq/integration-test/check-runs/13199605689", "labels": ["ubuntu-latest"], "runner_id": 4, "runner_name": "GitHub Actions 4", "runner_group_id": 2, "runner_group_name": "GitHub Actions", "repository": "airbytehq/integration-test"}, "emitted_at": 1700587195423} {"stream": "team_members", "data": {"login": "johnlafleur", "id": 68561602, "node_id": "MDQ6VXNlcjY4NTYxNjAy", "avatar_url": "https://avatars.githubusercontent.com/u/68561602?v=4", "gravatar_id": "", "url": "https://api.github.com/users/johnlafleur", "html_url": "https://github.com/johnlafleur", "followers_url": "https://api.github.com/users/johnlafleur/followers", "following_url": "https://api.github.com/users/johnlafleur/following{/other_user}", "gists_url": "https://api.github.com/users/johnlafleur/gists{/gist_id}", "starred_url": "https://api.github.com/users/johnlafleur/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/johnlafleur/subscriptions", "organizations_url": "https://api.github.com/users/johnlafleur/orgs", "repos_url": "https://api.github.com/users/johnlafleur/repos", "events_url": "https://api.github.com/users/johnlafleur/events{/privacy}", "received_events_url": "https://api.github.com/users/johnlafleur/received_events", "type": "User", "site_admin": false, "organization": "airbytehq", "team_slug": "airbyte-eng"}, "emitted_at": 1698750584444} {"stream": "team_memberships", "data": {"state": "active", "role": "member", "url": "https://api.github.com/organizations/59758427/team/4559297/memberships/johnlafleur", "organization": "airbytehq", "team_slug": "airbyte-core", "username": "johnlafleur"}, "emitted_at": 1698757985640} {"stream": "issue_timeline_events", "data": {"repository": "airbytehq/integration-test", "issue_number": 6, "labeled": {"id": 5219398390, "node_id": "MDEyOkxhYmVsZWRFdmVudDUyMTkzOTgzOTA=", "url": "https://api.github.com/repos/airbytehq/integration-test/issues/events/5219398390", "actor": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "event": "labeled", "commit_id": null, "commit_url": null, "created_at": "2021-08-27T15:43:58Z", "label": {"name": "critical", "color": "ededed"}, "performed_via_github_app": null}, "milestoned": {"id": 5219398392, "node_id": "MDE1Ok1pbGVzdG9uZWRFdmVudDUyMTkzOTgzOTI=", "url": "https://api.github.com/repos/airbytehq/integration-test/issues/events/5219398392", "actor": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "event": "milestoned", "commit_id": null, "commit_url": null, "created_at": "2021-08-27T15:43:58Z", "milestone": {"title": "main"}, "performed_via_github_app": null}, "commented": {"url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments/907296167", "html_url": "https://github.com/airbytehq/integration-test/issues/6#issuecomment-907296167", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/6", "id": 907296167, "node_id": "IC_kwDOF9hP9c42FD2n", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "created_at": "2021-08-27T15:43:59Z", "updated_at": "2021-08-27T15:43:59Z", "author_association": "CONTRIBUTOR", "body": "comment for issues https://api.github.com/repos/airbytehq/integration-test/issues/6/comments", "reactions": {"url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments/907296167/reactions", "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0}, "performed_via_github_app": null, "event": "commented", "actor": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}}}, "emitted_at": 1695815681406} diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index aeb654a7ec8f..0ab538e21a6c 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.5.3 + dockerImageTag: 1.5.4 dockerRepository: airbyte/source-github documentationUrl: https://docs.airbyte.com/integrations/sources/github githubIssueLabel: source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 200babf62f8e..fac84f7ed531 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -9,7 +9,8 @@ import pendulum import requests -from airbyte_cdk.models import SyncMode +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode +from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException @@ -1606,8 +1607,13 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter yield from super().read_records(stream_slice=stream_slice, **kwargs) except HTTPError as e: if e.response.status_code == requests.codes.ACCEPTED: - self.logger.info(f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`.") - yield + yield AirbyteMessage( + type=MessageType.LOG, + log=AirbyteLogMessage( + level=Level.INFO, + message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.", + ), + ) else: raise e diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index ce9675c5d784..87d9c3478cd3 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -10,11 +10,11 @@ import pytest import requests import responses -from airbyte_cdk.models import SyncMode +from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException, UserDefinedBackoffException from requests import HTTPError from responses import matchers -from source_github import constants +from source_github import SourceGithub, constants from source_github.streams import ( Branches, Collaborators, @@ -1369,21 +1369,50 @@ def test_stream_contributor_activity_parse_empty_response(caplog): @responses.activate def test_stream_contributor_activity_accepted_response(caplog): - repository_args = { - "page_size_for_large_streams": 20, - "repositories": ["airbytehq/airbyte"], - } - stream = ContributorActivity(**repository_args) + responses.add( + responses.GET, + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte"}, + status=200, + ) + responses.add( + responses.GET, + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte", "default_branch": "default_branch"}, + status=200, + ) + responses.add( + responses.GET, + "https://api.github.com/repos/airbytehq/test_airbyte/branches?per_page=100", + json={}, + status=200, + ) resp = responses.add( responses.GET, - "https://api.github.com/repos/airbytehq/airbyte/stats/contributors", + "https://api.github.com/repos/airbytehq/test_airbyte/stats/contributors?per_page=100", body="", status=202, ) + + source = SourceGithub() + configured_catalog = { + "streams": [ + { + "stream": {"name": "contributor_activity", "json_schema": {}, "supported_sync_modes": ["full_refresh"],"source_defined_primary_key": [["id"]]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] + } + catalog = ConfiguredAirbyteCatalog.parse_obj(configured_catalog) + config = {"access_token": "test_token", "repository": "airbytehq/test_airbyte"} + logger_mock = MagicMock() + with patch("time.sleep", return_value=0): - list(read_full_refresh(stream)) + records = list(source.read(config=config, logger=logger_mock, catalog=catalog, state={})) + + assert records[2].log.message == "Syncing `ContributorActivity` stream isn't available for repository `airbytehq/test_airbyte`." assert resp.call_count == 6 - assert "Syncing `ContributorActivity` stream isn't available for repository `airbytehq/airbyte`." in caplog.messages @responses.activate diff --git a/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json b/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json index 0b148260c015..709efd036a5b 100644 --- a/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json @@ -304,6 +304,8 @@ "https://drive.google.com/drive/folders/1Xaz0vXXXX2enKnNYU5qSt9NS70gvMyYn" ], "order": 0, + "pattern": "^https://drive.google.com/.+", + "pattern_descriptor": "https://drive.google.com/drive/folders/MY-FOLDER-ID", "type": "string" }, "credentials": { diff --git a/airbyte-integrations/connectors/source-google-drive/metadata.yaml b/airbyte-integrations/connectors/source-google-drive/metadata.yaml index f7eb6998beff..e561f16c6a0d 100644 --- a/airbyte-integrations/connectors/source-google-drive/metadata.yaml +++ b/airbyte-integrations/connectors/source-google-drive/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: file connectorType: source definitionId: 9f8dda77-1048-4368-815b-269bf54ee9b8 - dockerImageTag: 0.0.2 + dockerImageTag: 0.0.3 dockerRepository: airbyte/source-google-drive githubIssueLabel: source-google-drive icon: google-drive.svg diff --git a/airbyte-integrations/connectors/source-google-drive/setup.py b/airbyte-integrations/connectors/source-google-drive/setup.py index af0e32a9949e..ed7492559cd9 100644 --- a/airbyte-integrations/connectors/source-google-drive/setup.py +++ b/airbyte-integrations/connectors/source-google-drive/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk[file-based]>=0.53.5", + "airbyte-cdk[file-based]>=0.53.8", "google-api-python-client==2.104.0", "google-auth-httplib2==0.1.1", "google-auth-oauthlib==1.1.0", diff --git a/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py b/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py index 7c2a60b27b82..00a360e0640b 100644 --- a/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py +++ b/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py @@ -55,6 +55,8 @@ class Config: description="URL for the folder you want to sync. Using individual streams and glob patterns, it's possible to only sync a subset of all files located in the folder.", examples=["https://drive.google.com/drive/folders/1Xaz0vXXXX2enKnNYU5qSt9NS70gvMyYn"], order=0, + pattern="^https://drive.google.com/.+", + pattern_descriptor="https://drive.google.com/drive/folders/MY-FOLDER-ID", ) credentials: Union[OAuthCredentials, ServiceAccountCredentials] = Field( diff --git a/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py b/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py index 2b6fa5f9cccc..dd786360f7a3 100644 --- a/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py +++ b/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py @@ -16,6 +16,7 @@ from google.oauth2 import credentials, service_account from googleapiclient.discovery import build from googleapiclient.http import MediaIoBaseDownload +from source_google_drive.utils import get_folder_id from .spec import SourceGoogleDriveSpec @@ -86,7 +87,7 @@ def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: lo Get all files matching the specified glob patterns. """ service = self.google_drive_service - root_folder_id = self._get_folder_id(self.config.folder_url) + root_folder_id = get_folder_id(self.config.folder_url) # ignore prefix argument as it's legacy only and this is a new connector prefixes = self.get_prefixes_from_globs(globs) @@ -141,21 +142,6 @@ def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: lo if request is None: break - def _get_folder_id(self, url): - # Regular expression pattern to check the URL structure and extract the ID - pattern = r"^https://drive\.google\.com/drive/folders/([a-zA-Z0-9_-]+)$" - - # Find the pattern in the URL - match = re.search(pattern, url) - - if match: - # The matched group is the ID - drive_id = match.group(1) - return drive_id - else: - # If no match is found - raise ValueError(f"Could not extract folder ID from {url}") - def _is_exportable_document(self, mime_type: str): """ Returns true if the given file is a Google App document that can be exported. diff --git a/airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py b/airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py new file mode 100644 index 000000000000..c0994802358b --- /dev/null +++ b/airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from urllib.parse import urlparse + + +def get_folder_id(url_string: str) -> str: + """ + Extract the folder ID from a Google Drive folder URL. + + Takes the last path segment of the URL, which is the folder ID (ignoring trailing slashes and query parameters). + """ + try: + parsed_url = urlparse(url_string) + if parsed_url.scheme != "https" or parsed_url.netloc != "drive.google.com": + raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/") + path_segments = list(filter(None, parsed_url.path.split("/"))) + if path_segments[-2] != "folders" or len(path_segments) < 3: + raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/") + return path_segments[-1] + except Exception: + raise ValueError("Folder URL is invalid") diff --git a/airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py b/airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py new file mode 100644 index 000000000000..8dcb7e52e223 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + + +import pytest +from source_google_drive.utils import get_folder_id + + +@pytest.mark.parametrize( + "input, output, raises", + [ + ("https://drive.google.com/drive/folders/1q2w3e4r5t6y7u8i9o0p", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/folders/1q2w3e4r5t6y7u8i9o0p/", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/folders/1q2w3e4r5t6y7u8i9o0p?usp=link_sharing", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p/", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p?usp=link_sharing", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p#abc", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://docs.google.com/document/d/fsgfjdsh", None, True), + ("https://drive.google.com/drive/my-drive", None, True), + ("http://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p/", None, True), + ("https://drive.google.com/", None, True), + ] +) +def test_get_folder_id(input, output, raises): + if raises: + with pytest.raises(ValueError): + get_folder_id(input) + else: + assert get_folder_id(input) == output \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl index 0a60185b5dbf..d90034355c7a 100644 --- a/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl @@ -1,7 +1,7 @@ -{"stream": "users", "data": {"id": "17841408147298757", "biography": "Airbyte is the new open-source data integration platform that consolidates your data into your warehouses.", "profile_picture_url": "https://scontent-iev1-1.xx.fbcdn.net/v/t51.2885-15/153169696_890787328349641_8382928081987798464_n.jpg?_nc_cat=111&_nc_sid=7d201b&_nc_ohc=uQq3P1OLNOYAX_JjmNN&_nc_ht=scontent-iev1-1.xx&edm=AL-3X8kEAAAA&oh=00_AfBI4aspXBrxU-bYTD-qnPWh7ex05YFFAOl_24u7JxLYrw&oe=6558D73E", "username": "airbytehq", "followers_count": 1253, "name": "Jean Lafleur", "ig_id": 8070063576, "media_count": 258, "follows_count": 14, "website": "https://www.airbyte.io/", "page_id": "144706962067225"}, "emitted_at": 1700004246764} -{"stream": "media", "data": {"id": "17884386203808767", "media_product_type": "REELS", "shortcode": "CtZs0Y3v2lx", "permalink": "https://www.instagram.com/reel/CtZs0Y3v2lx/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/B34BFFBB0614049AD69F066D153FDD8C_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=107&vs=986202625710684_1200838240&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CMzRCRkZCQjA2MTQwNDlBRDY5RjA2NkQxNTNGREQ4Q192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dDQm9HQlV3a2JxUWwtY0JBRnZGTnFBUkdQeHpicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJtDf4euHnbtAFQIoAkMzLBdAUBtDlYEGJRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfAJ_4aiqzpxj20QL_aetXfmjmA8nRmz27vnAzpiARGK5w&oe=6555EADA&_nc_sid=1d576d", "timestamp": "2023-06-12T19:20:02+0000", "media_type": "VIDEO", "caption": "Terraform Explained Part 1\n.\n.\n.\n#airbyte #dataengineering #tech #terraform #cloud #cloudengineer #coding #reels", "comments_count": 2, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 9, "ig_id": "3123724930722523505", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/353022694_609901831117241_2447211336606431614_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=1ZTHPkRhzl8AX-hZcw_&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfAzQkg0OB_775OS9F7QSmHxKMrjBSNFi8Rx24OISWSTTQ&oe=655888CE", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200656} -{"stream": "media", "data": {"id": "17864256500936159", "media_product_type": "REELS", "shortcode": "CscAR5EsRgA", "permalink": "https://www.instagram.com/reel/CscAR5EsRgA/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/BE4F848CC97FBA35A1AE1B1150B989A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=110&vs=6290041361087047_1877877688&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CRTRGODQ4Q0M5N0ZCQTM1QTFBRTFCMTE1MEI5ODlBN192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dEaE94aFJJdk1BWGZaWURBQXQyS0FLWWxOSlhicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrD%2B6LaRwf1AFQIoAkMzLBdARDmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfB3i72i3aoV6KoK_SkI7W93z4rQLdbYHatg-KzPo0ADCg&oe=655556A6&_nc_sid=1d576d", "timestamp": "2023-05-19T20:08:33+0000", "media_type": "VIDEO", "caption": "When and why you should be using Rust for Data Engineering! \n\n#rust #airbyte #coding #programming #tech #dataengineering #data", "comments_count": 0, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 7, "ig_id": "3106359072491902976", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347441626_604256678433845_716271787932876577_n.jpg?_nc_cat=108&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=2ACJfSHiIRkAX8S0ZFU&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfDJSGXmZXkQnQZmkrVUi4nadhEddZxH5LUNtELipGu4Dw&oe=655947D9", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200657} -{"stream": "media", "data": {"id": "17964324206288599", "media_product_type": "REELS", "shortcode": "CsUe2iqpQif", "permalink": "https://www.instagram.com/reel/CsUe2iqpQif/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/274503D36EA0F6E79A7CF3797A8D5985_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNTc2LmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=106&vs=1336282350269744_3931649106&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC8yNzQ1MDNEMzZFQTBGNkU3OUE3Q0YzNzk3QThENTk4NV92aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dQdzNzaFRId3VlSlBFWURBSDFmTjUzcUNhd0JicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrDwmtqO44lAFQIoAkMzLBdAIewIMSbpeRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfB1HP_4v5ndbtfq_6eWT0cxo0vqzO9F6mu5ZS-q4IRDzg&oe=65558FF2&_nc_sid=1d576d", "timestamp": "2023-05-16T22:01:45+0000", "media_type": "VIDEO", "caption": "We've all been there right? \ud83e\udd23\n\n#airbyte #data #dataengineering #datascience #dataanalytics #tech #softwareengineer", "comments_count": 0, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 13, "ig_id": "3104241732634871967", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347429218_1848940842145573_5975413208994727174_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=3He_36rMQuYAX9Pz0NM&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfCAgX5HrHW8grC2x_VzJyCf2lUTViJCmwNy0uStHB-YFg&oe=6559347C", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200657} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_city", "date": "2023-11-14T08:00:00+0000", "value": {"London, England": 8, "Sydney, New South Wales": 19, "Algiers, Algiers Province": 4, "Casablanca, Grand Casablanca": 4, "S\u00e3o Paulo, S\u00e3o Paulo (state)": 14, "Rio de Janeiro, Rio de Janeiro (state)": 5, "Hong Kong, Hong Kong": 4, "Berlin, Berlin": 7, "Kolkata, West Bengal": 4, "Phoenix, Arizona": 3, "Tulsa, Oklahoma": 4, "Lagos, Lagos State": 18, "Skopje, Municipality of Centar (Skopje)": 4, "Ahmedabad, Gujarat": 4, "Moscow, Moscow": 5, "Karachi, Sindh": 4, "Bogot\u00e1, Distrito Especial": 5, "Dar es Salaam, Dar es Salaam": 7, "Jakarta, Jakarta": 10, "Accra, Greater Accra Region": 4, "Buenos Aires, Ciudad Aut\u00f3noma de Buenos Aires": 9, "Melbourne, Victoria": 7, "Delhi, Delhi": 6, "Gurugram, Haryana": 6, "Kuala Lumpur, Kuala Lumpur": 4, "Los Angeles, California": 5, "Lima, Lima Region": 5, "Istanbul, Istanbul Province": 9, "Abuja, Federal Capital Territory": 5, "Chennai, Tamil Nadu": 6, "Bangkok, Bangkok": 5, "Mexico City, Distrito Federal": 7, "Cape Town, Western Cape": 5, "San Francisco, California": 6, "Greater Noida, Uttar Pradesh": 3, "Tehran, Tehran Province": 4, "New York, New York": 13, "Cairo, Cairo Governorate": 4, "Santiago, Santiago Metropolitan Region": 6, "Dubai, Dubai": 8, "Mumbai, Maharashtra": 8, "Bangalore, Karnataka": 18, "Singapore, Singapore": 6, "Hyderabad, Telangana": 7, "San Diego, California": 6}}, "emitted_at": 1700004246978} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_country", "date": "2023-11-14T08:00:00+0000", "value": {"DE": 31, "HK": 4, "FI": 5, "TW": 5, "RU": 9, "TZ": 8, "FR": 11, "SA": 8, "BR": 64, "SE": 6, "MA": 6, "SG": 6, "DZ": 6, "ID": 29, "GB": 45, "CA": 24, "US": 264, "GH": 4, "EG": 10, "AE": 9, "CH": 7, "IN": 125, "ZA": 16, "IQ": 6, "CL": 9, "IR": 12, "GR": 6, "IT": 19, "MX": 24, "MY": 9, "CO": 11, "ES": 12, "VE": 9, "AR": 23, "AT": 4, "TH": 7, "AU": 35, "PE": 5, "PH": 7, "NG": 30, "TN": 6, "PK": 10, "PL": 5, "TR": 10, "NL": 13}}, "emitted_at": 1700004246980} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_gender_age", "date": "2023-11-14T08:00:00+0000", "value": {"F.18-24": 11, "F.25-34": 75, "F.35-44": 72, "F.45-54": 17, "F.55-64": 1, "F.65+": 2, "M.13-17": 2, "M.18-24": 50, "M.25-34": 367, "M.35-44": 226, "M.45-54": 83, "M.55-64": 20, "M.65+": 12, "U.18-24": 18, "U.25-34": 67, "U.35-44": 43, "U.45-54": 19, "U.55-64": 5}}, "emitted_at": 1700004246980} +{"stream": "users", "data": {"id": "17841408147298757", "website": "https://www.airbyte.io/", "ig_id": 8070063576, "followers_count": 1252, "name": "Jean Lafleur", "media_count": 258, "username": "airbytehq", "follows_count": 14, "biography": "Airbyte is the new open-source data integration platform that consolidates your data into your warehouses.", "profile_picture_url": "https://scontent-iev1-1.xx.fbcdn.net/v/t51.2885-15/153169696_890787328349641_8382928081987798464_n.jpg?_nc_cat=111&_nc_sid=7d201b&_nc_ohc=DFFn_25gYVMAX8nPfUd&_nc_ht=scontent-iev1-1.xx&edm=AL-3X8kEAAAA&oh=00_AfBHQPJ5aiFU1qw88d3gTF5jmg-Rpd5TX_gxAQt3jrSA4g&oe=655CCBBE", "page_id": "144706962067225"}, "emitted_at": 1700230802579} +{"stream": "media", "data": {"id": "17884386203808767", "caption": "Terraform Explained Part 1\n.\n.\n.\n#airbyte #dataengineering #tech #terraform #cloud #cloudengineer #coding #reels", "ig_id": "3123724930722523505", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/B34BFFBB0614049AD69F066D153FDD8C_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=107&vs=986202625710684_1200838240&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CMzRCRkZCQjA2MTQwNDlBRDY5RjA2NkQxNTNGREQ4Q192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dDQm9HQlV3a2JxUWwtY0JBRnZGTnFBUkdQeHpicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJtDf4euHnbtAFQIoAkMzLBdAUBtDlYEGJRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfBPpWnNa8TFbux-TpRO48bJGSkaIKPFOnmXhcv39jLd_A&oe=6559369A&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CtZs0Y3v2lx", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/353022694_609901831117241_2447211336606431614_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=1ZTHPkRhzl8AX-hZcw_&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfBdTKQTru0U2JNSqNnuPN0cWYv1u6o6t6u3EHIFteUV7w&oe=655C7D4E", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CtZs0Y3v2lx/", "timestamp": "2023-06-12T19:20:02+00:00", "like_count": 9, "comments_count": 2, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757119} +{"stream": "media", "data": {"id": "17864256500936159", "caption": "When and why you should be using Rust for Data Engineering! \n\n#rust #airbyte #coding #programming #tech #dataengineering #data", "ig_id": "3106359072491902976", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/BE4F848CC97FBA35A1AE1B1150B989A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=110&vs=6290041361087047_1877877688&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CRTRGODQ4Q0M5N0ZCQTM1QTFBRTFCMTE1MEI5ODlBN192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dEaE94aFJJdk1BWGZaWURBQXQyS0FLWWxOSlhicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrD%2B6LaRwf1AFQIoAkMzLBdARDmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfC6GeTJWR8KJZ3-eb1-faBZ8P8G8AFyswEDdD4gFzmPMg&oe=65594B26&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CscAR5EsRgA", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347441626_604256678433845_716271787932876577_n.jpg?_nc_cat=108&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=jLyY4sWj0v0AX-iadbF&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfA-x6QyIXxT7o_lEwDH0k7tDb_bgCGeP61AseCpluCtPA&oe=655D3C59", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CscAR5EsRgA/", "timestamp": "2023-05-19T20:08:33+00:00", "like_count": 7, "comments_count": 0, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757120} +{"stream": "media", "data": {"id": "17964324206288599", "caption": "We've all been there right? \ud83e\udd23\n\n#airbyte #data #dataengineering #datascience #dataanalytics #tech #softwareengineer", "ig_id": "3104241732634871967", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/274503D36EA0F6E79A7CF3797A8D5985_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNTc2LmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=106&vs=1336282350269744_3931649106&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC8yNzQ1MDNEMzZFQTBGNkU3OUE3Q0YzNzk3QThENTk4NV92aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dQdzNzaFRId3VlSlBFWURBSDFmTjUzcUNhd0JicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrDwmtqO44lAFQIoAkMzLBdAIewIMSbpeRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfACHaQfoSJ_vMXbm4Xw3gmWnG_vnJgUsIYUePDdtIUS-w&oe=6558DBB2&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CsUe2iqpQif", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347429218_1848940842145573_5975413208994727174_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=Y6VzeGH_9lkAX_wkzpd&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfDil0e2W7Iqq0-d7rf9JkdOluS7U2C3nhK17EfQ3c07fw&oe=655D28FC", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CsUe2iqpQif/", "timestamp": "2023-05-16T22:01:45+00:00", "like_count": 13, "comments_count": 0, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757120} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_city", "date": "2023-11-17T08:00:00+00:00", "value": {"London, England": 7, "Sydney, New South Wales": 19, "Atlanta, Georgia": 4, "Algiers, Algiers Province": 4, "Caracas, Capital District": 4, "S\u00e3o Paulo, S\u00e3o Paulo (state)": 14, "Rio de Janeiro, Rio de Janeiro (state)": 5, "Hong Kong, Hong Kong": 4, "Berlin, Berlin": 8, "Kolkata, West Bengal": 5, "Tulsa, Oklahoma": 4, "Lagos, Lagos State": 16, "Dili, Timor-Leste": 3, "Ahmedabad, Gujarat": 4, "Skopje, Municipality of Centar (Skopje)": 4, "Moscow, Moscow": 5, "Karachi, Sindh": 4, "Bogot\u00e1, Distrito Especial": 5, "Dar es Salaam, Dar es Salaam": 7, "Jakarta, Jakarta": 10, "Accra, Greater Accra Region": 4, "Buenos Aires, Ciudad Aut\u00f3noma de Buenos Aires": 9, "Melbourne, Victoria": 7, "Gurugram, Haryana": 6, "Delhi, Delhi": 6, "Kuala Lumpur, Kuala Lumpur": 4, "Los Angeles, California": 5, "Lima, Lima Region": 4, "Istanbul, Istanbul Province": 9, "Chennai, Tamil Nadu": 6, "Abuja, Federal Capital Territory": 7, "Bangkok, Bangkok": 5, "Mexico City, Distrito Federal": 7, "Cape Town, Western Cape": 5, "San Francisco, California": 6, "Tehran, Tehran Province": 4, "New York, New York": 14, "Cairo, Cairo Governorate": 4, "Santiago, Santiago Metropolitan Region": 6, "Dubai, Dubai": 8, "Mumbai, Maharashtra": 8, "Bangalore, Karnataka": 18, "Singapore, Singapore": 6, "Hyderabad, Telangana": 7, "San Diego, California": 6}}, "emitted_at": 1700230802791} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_country", "date": "2023-11-17T08:00:00+00:00", "value": {"DE": 31, "HK": 4, "TW": 5, "FI": 5, "RU": 9, "TZ": 8, "FR": 10, "SA": 8, "BR": 64, "SE": 6, "SG": 6, "MA": 6, "DZ": 6, "ID": 29, "GB": 45, "CA": 24, "US": 264, "GH": 4, "EG": 10, "AE": 9, "CH": 7, "IN": 125, "ZA": 16, "IQ": 6, "CL": 9, "IR": 12, "GR": 6, "IT": 19, "MX": 24, "MY": 9, "CO": 11, "ES": 13, "VE": 9, "AR": 23, "AT": 4, "TH": 7, "AU": 35, "PE": 4, "PH": 7, "NG": 30, "TN": 6, "PK": 10, "PL": 5, "TR": 10, "NL": 13}}, "emitted_at": 1700230802792} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_gender_age", "date": "2023-11-17T08:00:00+00:00", "value": {"F.18-24": 11, "F.25-34": 75, "F.35-44": 72, "F.45-54": 17, "F.55-64": 1, "F.65+": 2, "M.13-17": 2, "M.18-24": 50, "M.25-34": 365, "M.35-44": 228, "M.45-54": 83, "M.55-64": 20, "M.65+": 12, "U.18-24": 18, "U.25-34": 67, "U.35-44": 42, "U.45-54": 19, "U.55-64": 5}}, "emitted_at": 1700230802792} diff --git a/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json b/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json index cfed60215dc4..f3fbd6e9dc22 100644 --- a/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json @@ -10,7 +10,8 @@ "description": "The date from which you'd like to replicate data for User Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "examples": ["2017-01-25T00:00:00Z"], - "type": "string" + "type": "string", + "format": "date-time" }, "access_token": { "title": "Access Token", diff --git a/airbyte-integrations/connectors/source-instagram/metadata.yaml b/airbyte-integrations/connectors/source-instagram/metadata.yaml index 2e1403daea66..13fb9d4bc74a 100644 --- a/airbyte-integrations/connectors/source-instagram/metadata.yaml +++ b/airbyte-integrations/connectors/source-instagram/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: 6acf6b55-4f1e-4fca-944e-1a3caef8aba8 - dockerImageTag: 1.0.15 + dockerImageTag: 2.0.0 dockerRepository: airbyte/source-instagram githubIssueLabel: source-instagram icon: instagram.svg @@ -19,6 +19,13 @@ data: oss: enabled: true releaseStage: generally_available + releases: + breakingChanges: + 2.0.0: + message: + This release introduces a default primary key for the streams UserLifetimeInsights and UserInsights. + Additionally, the format of timestamp fields has been updated in the UserLifetimeInsights, UserInsights, Media and Stories streams to include timezone information. + upgradeDeadline: "2023-12-03" suggestedStreams: streams: - media diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json index 4185de5f66cb..03c77796f5a0 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json @@ -53,7 +53,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] @@ -94,7 +95,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json index 7fc7fa7b40a3..876edf95ea41 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json @@ -47,7 +47,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json index fe98eafcccbf..91bc309d8eb6 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json @@ -9,7 +9,8 @@ }, "date": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "follower_count": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json index eb9bb57fc720..4cb5092f5ace 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json @@ -9,7 +9,8 @@ }, "date": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "metric": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/source.py b/airbyte-integrations/connectors/source-instagram/source_instagram/source.py index 73ce1fb5b5f4..4a41d013c1a9 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/source.py +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/source.py @@ -18,7 +18,7 @@ class ConnectorConfig(BaseModel): class Config: title = "Source Instagram" - start_date: Optional[str] = Field( + start_date: Optional[datetime] = Field( description="The date from which you'd like to replicate data for User Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date.", pattern="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", examples=["2017-01-25T00:00:00Z"], diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py b/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py index 9c07d98bb083..bf5d39de1e1c 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py @@ -11,6 +11,7 @@ import pendulum from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import IncrementalMixin, Stream +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer from cached_property import cached_property from facebook_business.adobjects.igmedia import IGMedia from facebook_business.exceptions import FacebookRequestError @@ -19,6 +20,24 @@ from .common import remove_params_from_url +class DatetimeTransformerMixin: + transformer: TypeTransformer = TypeTransformer(TransformConfig.CustomSchemaNormalization) + + @staticmethod + @transformer.registerCustomTransform + def custom_transform_datetime_rfc3339(original_value, field_schema): + """ + Transform datetime string to RFC 3339 format + """ + if original_value and field_schema.get("format") == "date-time" and field_schema.get("airbyte_type") == "timestamp_with_timezone": + # Parse the ISO format timestamp + dt = pendulum.parse(original_value) + + # Convert to RFC 3339 format + return dt.to_rfc3339_string() + return original_value + + class InstagramStream(Stream, ABC): """Base stream class""" @@ -121,10 +140,10 @@ def read_records( yield self.transform(record) -class UserLifetimeInsights(InstagramStream): +class UserLifetimeInsights(DatetimeTransformerMixin, InstagramStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights""" - primary_key = None + primary_key = ["business_account_id", "metric", "date"] LIFETIME_METRICS = ["audience_city", "audience_country", "audience_gender_age", "audience_locale"] period = "lifetime" @@ -156,7 +175,7 @@ def request_params( return params -class UserInsights(InstagramIncrementalStream): +class UserInsights(DatetimeTransformerMixin, InstagramIncrementalStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights""" METRICS_BY_PERIOD = { @@ -176,7 +195,7 @@ class UserInsights(InstagramIncrementalStream): "lifetime": ["online_followers"], } - primary_key = None + primary_key = ["business_account_id", "date"] cursor_field = "date" # For some metrics we can only get insights not older than 30 days, it is Facebook policy @@ -295,7 +314,7 @@ def _state_has_legacy_format(self, state: Mapping[str, Any]) -> bool: return False -class Media(InstagramStream): +class Media(DatetimeTransformerMixin, InstagramStream): """Children objects can only be of the media_type == "CAROUSEL_ALBUM". And children object does not support INVALID_CHILDREN_FIELDS fields, so they are excluded when trying to get child objects to avoid the error @@ -403,7 +422,7 @@ def _get_insights(self, item, account_id) -> Optional[MutableMapping[str, Any]]: raise error -class Stories(InstagramStream): +class Stories(DatetimeTransformerMixin, InstagramStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/stories""" def read_records( diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py index 7b9fd1db150e..a065d01b77cf 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py @@ -35,6 +35,7 @@ def config_fixture(): def some_config_fixture(account_id): return {"start_date": "2021-01-23T00:00:00Z", "access_token": "unknown_token"} + @fixture(scope="session", name="some_config_future_date") def some_config_future_date_fixture(account_id): return {"start_date": "2030-01-23T00:00:00Z", "access_token": "unknown_token"} diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py index 2cdca11b4f8f..add26ad1a33f 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py @@ -31,11 +31,14 @@ def test_check_connection_empty_config(api): assert not ok assert error_msg + def test_check_connection_invalid_config_future_date(api, some_config_future_date): ok, error_msg = SourceInstagram().check_connection(logger, config=some_config_future_date) assert not ok assert error_msg + + def test_check_connection_no_date_config(api, some_config): some_config.pop("start_date") ok, error_msg = SourceInstagram().check_connection(logger, config=some_config) diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py index 39fc889e7e48..19470cb9c22b 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py @@ -9,6 +9,7 @@ from airbyte_cdk.models import SyncMode from facebook_business import FacebookAdsApi, FacebookSession from source_instagram.streams import ( + DatetimeTransformerMixin, InstagramStream, Media, MediaInsights, @@ -32,15 +33,11 @@ def test_clear_url(config): def test_state_outdated(api, config): - assert UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format( - {"state": MagicMock()} - ) + assert UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format({"state": MagicMock()}) def test_state_is_not_outdated(api, config): - assert not UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format( - {"state": {}} - ) + assert not UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format({"state": {}}) def test_media_get_children(api, requests_mock, some_config): @@ -208,9 +205,9 @@ def test_user_lifetime_insights_read(api, config, user_insight_data, requests_mo @pytest.mark.parametrize( "values,expected", [ - ({"end_time": "test_end_time", "value": "test_value"}, {"date": "test_end_time", "value": "test_value"}), + ({"end_time": "2020-05-04T07:00:00+0000", "value": "test_value"}, {"date": "2020-05-04T07:00:00+0000", "value": "test_value"}), ({"value": "test_value"}, {"date": None, "value": "test_value"}), - ({"end_time": "test_end_time"}, {"date": "test_end_time", "value": None}), + ({"end_time": "2020-05-04T07:00:00+0000"}, {"date": "2020-05-04T07:00:00+0000", "value": None}), ({}, {"date": None, "value": None}), ], ids=[ @@ -363,3 +360,22 @@ def test_exit_gracefully(api, config, requests_mock, caplog): assert not records assert requests_mock.call_count == 6 # 4 * 1 per `metric_to_period` map + 1 `summary` request + 1 `business_account_id` request assert "Stopping syncing stream 'user_insights'" in caplog.text + + +@pytest.mark.parametrize( + "original_value, field_schema, expected", + [ + ("2020-01-01T12:00:00Z", {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, "2020-01-01T12:00:00+00:00"), + ("2020-05-04T07:00:00+0000", {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, "2020-05-04T07:00:00+00:00"), + (None, {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, None), + ("2020-01-01T12:00:00", {"format": "date-time", "airbyte_type": "timestamp_without_timezone"}, "2020-01-01T12:00:00"), + ("2020-01-01T14:00:00", {"format": "date-time"}, "2020-01-01T14:00:00"), + ("2020-02-03T12:00:00", {"type": "string"}, "2020-02-03T12:00:00"), + ], +) +def test_custom_transform_datetime_rfc3339(original_value, field_schema, expected): + # Call the static method + result = DatetimeTransformerMixin.custom_transform_datetime_rfc3339(original_value, field_schema) + + # Assert the result matches the expected output + assert result == expected diff --git a/airbyte-integrations/connectors/source-mailchimp/metadata.yaml b/airbyte-integrations/connectors/source-mailchimp/metadata.yaml index 11ff4b1bbbcc..ef06fb27bda0 100644 --- a/airbyte-integrations/connectors/source-mailchimp/metadata.yaml +++ b/airbyte-integrations/connectors/source-mailchimp/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: b03a9f3e-22a5-11eb-adc1-0242ac120002 - dockerImageTag: 0.8.2 + dockerImageTag: 0.8.3 dockerRepository: airbyte/source-mailchimp documentationUrl: https://docs.airbyte.com/integrations/sources/mailchimp githubIssueLabel: source-mailchimp diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json index fe6dcf6a599b..34e513022879 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json @@ -138,7 +138,7 @@ "description": "The number of unique opens divided by the total number of successful deliveries." }, "last_open": { - "type": "string", + "type": ["null", "string"], "format": "date-time", "title": "Last Open", "description": "The date and time of the last recorded open in ISO 8601 format." diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py index d5239e7f3ecf..27df31f5b05c 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py @@ -274,21 +274,28 @@ class Reports(IncrementalMailChimpStream): cursor_field = "send_time" data_field = "reports" + @staticmethod + def remove_empty_datetime_fields(record: Mapping[str, Any]) -> Mapping[str, Any]: + """ + In some cases, the 'clicks.last_click' and 'opens.last_open' fields are returned as an empty string, + which causes validation errors on the `date-time` format. + To avoid this, we remove the fields if they are empty. + """ + clicks = record.get("clicks", {}) + opens = record.get("opens", {}) + if not clicks.get("last_click"): + clicks.pop("last_click", None) + if not opens.get("last_open"): + opens.pop("last_open", None) + return record + def path(self, **kwargs) -> str: return "reports" def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - response = super().parse_response(response, **kwargs) - - # In some cases, the 'last_click' field is returned as an empty string, - # which causes validation errors on the `date-time` format. - # To avoid this, we remove the field if it is empty. for record in response: - clicks = record.get("clicks", {}) - if not clicks.get("last_click"): - clicks.pop("last_click", None) - yield record + yield self.remove_empty_datetime_fields(record) class Segments(MailChimpListSubStream): diff --git a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py index 021f50470920..094eb4fe0bf5 100644 --- a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py @@ -10,7 +10,7 @@ import responses from airbyte_cdk.models import SyncMode from requests.exceptions import HTTPError -from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Segments +from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Reports, Segments from utils import read_full_refresh, read_incremental @@ -413,3 +413,39 @@ def test_403_error_handling( # Handle non-403 error except HTTPError as e: assert e.response.status_code == status_code + +@pytest.mark.parametrize( + "record, expected_return", + [ + ( + {"clicks": {"last_click": ""}, "opens": {"last_open": ""}}, + {"clicks": {}, "opens": {}}, + ), + ( + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": ""}}, + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {}}, + ), + ( + {"clicks": {"last_click": ""}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + {"clicks": {}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + + ), + ( + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + ), + ], + ids=[ + "last_click and last_open empty", + "last_click empty", + "last_open empty", + "last_click and last_open not empty" + ] +) +def test_reports_remove_empty_datetime_fields(auth, record, expected_return): + """ + Tests that the Reports stream removes the 'clicks' and 'opens' fields from the response + when they are empty strings + """ + stream = Reports(authenticator=auth) + assert stream.remove_empty_datetime_fields(record) == expected_return, f"Expected: {expected_return}, Actual: {stream.remove_empty_datetime_fields(record)}" diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle b/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle index f2031037e47e..c27161e3af74 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle @@ -4,13 +4,11 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() - configurations.all { resolutionStrategy { force libs.jooq @@ -26,6 +24,7 @@ dependencies { implementation project(':airbyte-integrations:connectors:source-mssql') implementation libs.jooq + testImplementation testFixtures(project(':airbyte-integrations:connectors:source-mssql')) testImplementation 'org.apache.commons:commons-lang3:3.11' testImplementation libs.testcontainers.mssqlserver testImplementation 'org.hamcrest:hamcrest-all:1.3' diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties b/airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml b/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml index e91c1a8ccebb..91f70101f0e4 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: database connectorType: source definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 - dockerImageTag: 3.0.0 + dockerImageTag: 3.0.1 dockerRepository: airbyte/source-mssql-strict-encrypt githubIssueLabel: source-mssql icon: mssql.svg diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java index 8c727973ef1d..8687b6c81822 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java @@ -19,7 +19,7 @@ public class MssqlSourceStrictEncrypt extends SpecModifyingSource implements Sou private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSourceStrictEncrypt.class); public MssqlSourceStrictEncrypt() { - super(MssqlSource.sshWrappedSource()); + super(MssqlSource.sshWrappedSource(new MssqlSource())); } @Override diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java index 69ec87ddd4c6..c584e76113cd 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java @@ -5,94 +5,48 @@ package io.airbyte.integrations.source.mssql_strict_encrypt; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.mssql.MsSQLContainerFactory; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConnectorSpecification; -import java.sql.SQLException; import java.util.HashMap; import java.util.Map; -import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class MssqlStrictEncryptSourceAcceptanceTest extends SourceAcceptanceTest { protected static final String SCHEMA_NAME = "dbo"; protected static final String STREAM_NAME = "id_and_name"; - protected static MSSQLServerContainer db; - protected JsonNode config; - @AfterAll - public static void closeContainer() { - if (db != null) { - db.close(); - db.stop(); - } - } + private MsSQLTestDatabase testdb; @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { - if (db == null) { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - db.start(); - } - - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(db)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(db)) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - final String dbName = "db_" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - try (final DSLContext dslContext = DSLContextFactory.create( - configWithoutDbName.get(JdbcUtils.USERNAME_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;encrypt=true;trustServerCertificate=true;", - db.getHost(), - db.getFirstMappedPort()), - null)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES " + - "(1,'picard', '2124-03-04T01:01:01Z'), " + - "(2, 'crusher', '2124-03-04T01:01:01Z'), " + - "(3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "encrypted_trust_server_certificate"))); - } - - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MsSQLContainerFactory().shared("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + testdb = new MsSQLTestDatabase(container); + testdb = testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1,'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); } @Override - protected void tearDown(final TestDestinationEnv testEnv) throws Exception {} + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } @Override protected String getImageName() { @@ -106,7 +60,9 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")) + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java index 4f17ea3e7b32..2aac6a760c84 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java @@ -8,127 +8,67 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; -import io.airbyte.integrations.source.mssql.MssqlSource; +import io.airbyte.integrations.source.mssql.MsSQLContainerFactory; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; import io.airbyte.protocol.models.v0.CatalogHelpers; import io.airbyte.protocol.models.v0.ConnectorSpecification; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.JDBCType; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.function.Function; -import javax.sql.DataSource; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.MSSQLServerContainer; -public class MssqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +public class MssqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - private static MSSQLServerContainer dbContainer; - private static DataSource dataSource; - private JsonNode config; - - @BeforeAll - static void init() { + static { // In mssql, timestamp is generated automatically, so we need to use // the datetime type instead so that we can set the value manually. COL_TIMESTAMP_TYPE = "DATETIME"; - - if (dbContainer == null) { - dbContainer = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - dbContainer.start(); - } - } - - @BeforeEach - public void setup() throws Exception { - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, dbContainer.getHost()) - .put(JdbcUtils.PORT_KEY, dbContainer.getFirstMappedPort()) - .put(JdbcUtils.USERNAME_KEY, dbContainer.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, dbContainer.getPassword()) - .build()); - - dataSource = DataSourceFactory.create( - configWithoutDbName.get(JdbcUtils.USERNAME_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;encrypt=true;trustServerCertificate=true;", - dbContainer.getHost(), - dbContainer.getFirstMappedPort())); - - try { - database = new DefaultJdbcDatabase(dataSource); - - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - database.execute(ctx -> ctx.createStatement().execute(String.format("CREATE DATABASE %s;", dbName))); - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "encrypted_trust_server_certificate"))); - - super.setup(); - } finally { - DataSourceFactory.close(dataSource); - } - } - - @AfterAll - public static void cleanUp() throws Exception { - dbContainer.close(); } @Override - public boolean supportsSchemas() { - return true; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1"); } - @Override - public JsonNode getConfig() { - return config; + protected JsonNode config() { + return testdb.testConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")) + .build(); } @Override - public Function getToDatabaseConfigFunction() { - return new MssqlSource()::toDatabaseConfig; - } - - @Override - public String getDriverClass() { - return MssqlSource.DRIVER_CLASS; + protected MssqlSourceStrictEncrypt source() { + return new MssqlSourceStrictEncrypt(); } @Override - public AbstractJdbcSource getJdbcSource() { - return new MssqlSource(); + protected MsSQLTestDatabase createTestDatabase() { + final var container = new MsSQLContainerFactory().shared("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + final var testdb = new MsSQLTestDatabase(container); + return testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized(); } @Override - public Source getSource() { - return new MssqlSourceStrictEncrypt(); + public boolean supportsSchemas() { + return true; } @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = SshHelpers.injectSshIntoSpec(Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class)); diff --git a/airbyte-integrations/connectors/source-mssql/build.gradle b/airbyte-integrations/connectors/source-mssql/build.gradle index e31c0c4c7ead..d2c14de9a601 100644 --- a/airbyte-integrations/connectors/source-mssql/build.gradle +++ b/airbyte-integrations/connectors/source-mssql/build.gradle @@ -4,7 +4,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } @@ -15,7 +15,7 @@ configurations.all { } } -airbyteJavaConnector.addCdkDependencies() + application { mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource' @@ -25,12 +25,14 @@ application { dependencies { implementation libs.postgresql - implementation libs.debezium.sqlserver implementation 'com.microsoft.sqlserver:mssql-jdbc:10.2.1.jre8' implementation 'org.codehaus.plexus:plexus-utils:3.4.2' testImplementation 'org.apache.commons:commons-lang3:3.11' - testImplementation libs.testcontainers.mssqlserver testImplementation 'org.hamcrest:hamcrest-all:1.3' + testImplementation 'org.awaitility:awaitility:4.2.0' + + testImplementation libs.testcontainers.mssqlserver + testFixturesImplementation libs.testcontainers.mssqlserver } diff --git a/airbyte-integrations/connectors/source-mssql/gradle.properties b/airbyte-integrations/connectors/source-mssql/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql/metadata.yaml b/airbyte-integrations/connectors/source-mssql/metadata.yaml index 781e6b7cf1e7..dbb3a32e9a52 100644 --- a/airbyte-integrations/connectors/source-mssql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mssql/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 - dockerImageTag: 3.0.0 + dockerImageTag: 3.0.1 dockerRepository: airbyte/source-mssql documentationUrl: https://docs.airbyte.com/integrations/sources/mssql githubIssueLabel: source-mssql diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java index 26175fb66b13..2e27ebc2b948 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java @@ -13,6 +13,7 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.SyncMode; +import java.time.Duration; import java.util.Properties; import java.util.stream.Collectors; import org.codehaus.plexus.util.StringUtils; @@ -30,6 +31,11 @@ public class MssqlCdcHelper { private static final String CDC_SNAPSHOT_ISOLATION_FIELD = "snapshot_isolation"; private static final String CDC_DATA_TO_SYNC_FIELD = "data_to_sync"; + private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L); + + // Test execution latency is lower when heartbeats are more frequent. + private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L); + public enum ReplicationMethod { STANDARD, CDC @@ -160,6 +166,14 @@ static Properties getDebeziumProperties(final JdbcDatabase database, final Confi props.setProperty("schema.include.list", getSchema(catalog)); props.setProperty("database.names", config.get(JdbcUtils.DATABASE_KEY).asText()); + final Duration heartbeatInterval = + (database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean()) + ? HEARTBEAT_INTERVAL_IN_TESTS + : HEARTBEAT_INTERVAL; + props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis())); + // TODO: enable heartbeats in MS SQL Server. + props.setProperty("heartbeat.interval.ms", "0"); + if (config.has("ssl_method")) { final JsonNode sslConfig = config.get("ssl_method"); final String sslMethod = sslConfig.get("ssl_method").asText(); diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index 61a2c4957ce1..5ce64b942485 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -30,7 +30,7 @@ import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource; import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; -import io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; import io.airbyte.cdk.integrations.debezium.internals.mssql.MssqlCdcTargetPosition; import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; @@ -88,8 +88,8 @@ SELECT CAST(IIF(EXISTS(SELECT TOP 1 1 FROM "%s"."%s" WHERE "%s" IS NULL), 1, 0) public static final String CDC_DEFAULT_CURSOR = "_ab_cdc_cursor"; private List schemas; - public static Source sshWrappedSource() { - return new SshWrappedSource(new MssqlSource(), JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + public static Source sshWrappedSource(MssqlSource source) { + return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); } public MssqlSource() { @@ -451,11 +451,16 @@ public List> getIncrementalIterators( final JsonNode sourceConfig = database.getSourceConfig(); if (MssqlCdcHelper.isCdc(sourceConfig) && isAnyStreamIncrementalSyncMode(catalog)) { LOGGER.info("using CDC: {}", true); - final Duration firstRecordWaitTime = FirstRecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); - final AirbyteDebeziumHandler handler = - new AirbyteDebeziumHandler<>(sourceConfig, - MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText()), true, firstRecordWaitTime, - OptionalInt.empty()); + final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + final Duration subsequentRecordWaitTime = RecordWaitTimeUtil.getSubsequentRecordWaitTime(sourceConfig); + final var targetPosition = MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText()); + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>( + sourceConfig, + targetPosition, + true, + firstRecordWaitTime, + subsequentRecordWaitTime, + OptionalInt.empty()); final MssqlCdcConnectorMetadataInjector mssqlCdcConnectorMetadataInjector = MssqlCdcConnectorMetadataInjector.getInstance(emittedAt); @@ -565,7 +570,7 @@ private void readSsl(final JsonNode sslMethod, final List additionalPara } public static void main(final String[] args) throws Exception { - final Source source = MssqlSource.sshWrappedSource(); + final Source source = MssqlSource.sshWrappedSource(new MssqlSource()); LOGGER.info("starting source: {}", MssqlSource.class); new IntegrationRunner(source).run(args); LOGGER.info("completed source: {}", MssqlSource.class); diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java index cb6457935a88..914f294ed515 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java @@ -4,18 +4,14 @@ package io.airbyte.integrations.source.mssql; -import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.protocol.models.JsonSchemaType; -import org.jooq.DSLContext; -import org.testcontainers.containers.MSSQLServerContainer; public abstract class AbstractMssqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { - protected static MSSQLServerContainer container; - protected JsonNode config; - protected DSLContext dslContext; + protected MsSQLTestDatabase testdb; @Override protected String getNameSpace() { @@ -28,14 +24,11 @@ protected String getImageName() { } @Override - protected JsonNode getConfig() { - return config; + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); } - protected static final String DB_NAME = "comprehensive"; - - protected static final String CREATE_TABLE_SQL = - "USE " + DB_NAME + "\nCREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; + protected static final String CREATE_TABLE_SQL = "CREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; @Override protected void initTests() { diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java index a1e355617284..3c5073d32c59 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java @@ -5,12 +5,7 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; @@ -25,91 +20,50 @@ import io.airbyte.protocol.models.v0.ConnectorSpecification; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.HashMap; -import java.util.Objects; -import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.DSLContext; -import org.testcontainers.containers.JdbcDatabaseContainer; -import org.testcontainers.containers.MSSQLServerContainer; -import org.testcontainers.containers.Network; public abstract class AbstractSshMssqlSourceAcceptanceTest extends SourceAcceptanceTest { private static final String STREAM_NAME = "dbo.id_and_name"; private static final String STREAM_NAME2 = "dbo.starships"; - private static final Network network = Network.newNetwork(); - private static JsonNode config; - private String dbName; - private MSSQLServerContainer db; - private final SshBastionContainer bastion = new SshBastionContainer(); public abstract SshTunnel.TunnelMethod getTunnelMethod(); - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - startTestContainers(); - config = bastion.getTunnelConfig(getTunnelMethod(), getMSSQLDbConfigBuilder(db), false); - populateDatabaseTestData(); - } - - public ImmutableMap.Builder getMSSQLDbConfigBuilder(final JdbcDatabaseContainer db) { - dbName = "db_" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); - return ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, Objects.requireNonNull(db.getContainerInfo().getNetworkSettings() - .getNetworks() - .get(((Network.NetworkImpl) network).getName()) - .getIpAddress())) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .put(JdbcUtils.PORT_KEY, db.getExposedPorts().get(0)) - .put(JdbcUtils.DATABASE_KEY, dbName); - } - - private Database getDatabaseFromConfig(final JsonNode config) { - final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - db.getHost(), - db.getFirstMappedPort()), - null); - return new Database(dslContext); - } - - private void startTestContainers() { - bastion.initAndStartBastion(network); - initAndStartJdbcContainer(); - } + protected MsSQLTestDatabase testdb; + protected SshBastionContainer bastion; - private void initAndStartJdbcContainer() { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2017-latest") - .withNetwork(network) - .acceptLicense(); - db.start(); + @Override + protected JsonNode getConfig() { + try { + return testdb.integrationTestConfigBuilder() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false)) + .build(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } - private void populateDatabaseTestData() throws Exception { - SshTunnel.sshWrap( - getConfig(), - JdbcUtils.HOST_LIST_KEY, - JdbcUtils.PORT_LIST_KEY, - mangledConfig -> { - getDatabaseFromConfig(mangledConfig).query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("ALTER DATABASE %s SET AUTO_CLOSE OFF WITH NO_WAIT;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', '2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - }); + @Override + protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2017-latest", "withNetwork"); + testdb = testdb + .with("ALTER DATABASE %s SET AUTO_CLOSE OFF WITH NO_WAIT;", testdb.getDatabaseName()) + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - bastion.stopAndCloseContainers(db); + bastion.close(); + testdb.close(); } @Override @@ -122,11 +76,6 @@ protected ConnectorSpecification getSpec() throws Exception { return SshHelpers.getSpecAndInjectSsh(); } - @Override - protected JsonNode getConfig() { - return config; - } - @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() { return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java index c36789482e36..141d4163f209 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java @@ -5,18 +5,10 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -26,32 +18,15 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.util.List; -import java.util.Map; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class CdcMssqlSourceAcceptanceTest extends SourceAcceptanceTest { private static final String SCHEMA_NAME = "dbo"; private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "starships"; - private static final String TEST_USER_PASSWORD = "testerjester[1]"; private static final String CDC_ROLE_NAME = "cdc_selector"; - public static MSSQLServerContainer container; - private String dbName; - private String testUserName; - private JsonNode config; - private Database database; - private DSLContext dslContext; - @AfterAll - public static void closeContainer() { - if (container != null) { - container.close(); - container.stop(); - } - } + private MsSQLTestDatabase testdb; @Override protected String getImageName() { @@ -65,7 +40,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); } @Override @@ -103,123 +81,40 @@ protected JsonNode getState() { } @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws InterruptedException { - if (container == null) { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work - container.start(); - } - - dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - testUserName = Strings.addRandomSuffix("test", "_", 5).toLowerCase(); - - final JsonNode replicationConfig = Jsons.jsonNode(Map.of( - "method", "CDC", - "data_to_sync", "Existing and New", - "initial_waiting_seconds", 5, - "snapshot_isolation", "Snapshot")); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, testUserName) - .put(JdbcUtils.PASSWORD_KEY, TEST_USER_PASSWORD) - .put("replication_method", replicationConfig) - .put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))) - .build()); - - dslContext = DSLContextFactory.create(DataSourceFactory.create( - container.getUsername(), - container.getPassword(), - container.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - database = new Database(dslContext); - - executeQuery("CREATE DATABASE " + dbName + ";"); - executeQuery("ALTER DATABASE " + dbName + "\n\tSET ALLOW_SNAPSHOT_ISOLATION ON"); - executeQuery("USE " + dbName + "\n" + "EXEC sys.sp_cdc_enable_db"); - - setupTestUser(); - revokeAllPermissions(); - createAndPopulateTables(); - grantCorrectPermissions(); - } - - private void setupTestUser() { - executeQuery("USE " + dbName); - executeQuery("CREATE LOGIN " + testUserName + " WITH PASSWORD = '" + TEST_USER_PASSWORD + "';"); - executeQuery("CREATE USER " + testUserName + " FOR LOGIN " + testUserName + ";"); - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL FROM " + testUserName + " CASCADE;"); - executeQuery("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO " + testUserName + ";\""); - } - - private void createAndPopulateTables() throws InterruptedException { - executeQuery(String.format("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", - SCHEMA_NAME, STREAM_NAME)); - executeQuery(String.format("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", - SCHEMA_NAME, STREAM_NAME)); - executeQuery(String.format("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", - SCHEMA_NAME, STREAM_NAME2)); - executeQuery(String.format("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", - SCHEMA_NAME, STREAM_NAME2)); - - // sometimes seeing an error that we can't enable cdc on a table while sql server agent is still - // spinning up - // solving with a simple while retry loop - boolean failingToStart = true; - int retryNum = 0; - final int maxRetries = 10; - while (failingToStart) { - try { - // enabling CDC on each table - final String[] tables = {STREAM_NAME, STREAM_NAME2}; - for (final String table : tables) { - executeQuery(String.format( - "EXEC sys.sp_cdc_enable_table\n" - + "\t@source_schema = N'%s',\n" - + "\t@source_name = N'%s', \n" - + "\t@role_name = N'%s',\n" - + "\t@supports_net_changes = 0", - SCHEMA_NAME, table, CDC_ROLE_NAME)); - } - failingToStart = false; - } catch (final Exception e) { - if (retryNum >= maxRetries) { - throw e; - } else { - retryNum++; - Thread.sleep(10000); // 10 seconds - } - } - } - } - - private void grantCorrectPermissions() { - executeQuery(String.format("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testUserName)); - executeQuery(String.format("USE %s;\n" + "GRANT SELECT ON SCHEMA :: [%s] TO %s", dbName, "cdc", testUserName)); - executeQuery(String.format("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName)); - } - - private void executeQuery(final String query) { - try { - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected void setupEnvironment(final TestDestinationEnv environment) { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest", "withAgent"); + final var enableCdcSqlFmt = """ + EXEC sys.sp_cdc_enable_table + \t@source_schema = N'%s', + \t@source_name = N'%s', + \t@role_name = N'%s', + \t@supports_net_changes = 0"""; + testdb + .withSnapshotIsolation() + .withCdc() + .withWaitUntilAgentRunning() + // create tables + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME) + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2) + // populate tables + .with("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", SCHEMA_NAME, STREAM_NAME) + .with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", SCHEMA_NAME, STREAM_NAME2) + // enable cdc on tables for designated role + .with(enableCdcSqlFmt, SCHEMA_NAME, STREAM_NAME, CDC_ROLE_NAME) + .with(enableCdcSqlFmt, SCHEMA_NAME, STREAM_NAME2, CDC_ROLE_NAME) + .withWaitUntilMaxLsnAvailable() + // revoke user permissions + .with("REVOKE ALL FROM %s CASCADE;", testdb.getUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testdb.getUserName()) + // grant user permissions + .with("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testdb.getUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testdb.getUserName()); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); + testdb.close(); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java index 67a7cafa9798..43393443805f 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java @@ -5,73 +5,25 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import java.util.Map; -import org.testcontainers.containers.MSSQLServerContainer; public class CdcMssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest { @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.close(); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); } @Override - protected Database setupDatabase() throws Exception { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work - container.start(); - - final JsonNode replicationConfig = Jsons.jsonNode(Map.of( - "method", "CDC", - "data_to_sync", "Existing and New", - "initial_waiting_seconds", 5, - "snapshot_isolation", "Snapshot")); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, DB_NAME) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationConfig) - .put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))) - .build()); - - dslContext = DSLContextFactory.create(DataSourceFactory.create( - container.getUsername(), - container.getPassword(), - container.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - final Database database = new Database(dslContext); - - executeQuery("CREATE DATABASE " + DB_NAME + ";"); - executeQuery("ALTER DATABASE " + DB_NAME + "\n\tSET ALLOW_SNAPSHOT_ISOLATION ON"); - executeQuery("USE " + DB_NAME + "\n" + "EXEC sys.sp_cdc_enable_db"); - - return database; - } - - private void executeQuery(final String query) { - try { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected Database setupDatabase() { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest", "withAgent") + .withSnapshotIsolation() + .withCdc(); + return testdb.getDatabase(); } @Override @@ -81,39 +33,39 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc } private void enableCdcOnAllTables() { - executeQuery("USE " + DB_NAME + "\n" - + "DECLARE @TableName VARCHAR(100)\n" - + "DECLARE @TableSchema VARCHAR(100)\n" - + "DECLARE CDC_Cursor CURSOR FOR\n" - + " SELECT * FROM ( \n" - + " SELECT Name,SCHEMA_NAME(schema_id) AS TableSchema\n" - + " FROM sys.objects\n" - + " WHERE type = 'u'\n" - + " AND is_ms_shipped <> 1\n" - + " ) CDC\n" - + "OPEN CDC_Cursor\n" - + "FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema\n" - + "WHILE @@FETCH_STATUS = 0\n" - + " BEGIN\n" - + " DECLARE @SQL NVARCHAR(1000)\n" - + " DECLARE @CDC_Status TINYINT\n" - + " SET @CDC_Status=(SELECT COUNT(*)\n" - + " FROM cdc.change_tables\n" - + " WHERE Source_object_id = OBJECT_ID(@TableSchema+'.'+@TableName))\n" - + " --IF CDC is not enabled on Table, Enable CDC\n" - + " IF @CDC_Status <> 1\n" - + " BEGIN\n" - + " SET @SQL='EXEC sys.sp_cdc_enable_table\n" - + " @source_schema = '''+@TableSchema+''',\n" - + " @source_name = ''' + @TableName\n" - + " + ''',\n" - + " @role_name = null;'\n" - + " EXEC sp_executesql @SQL\n" - + " END\n" - + " FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema\n" - + "END\n" - + "CLOSE CDC_Cursor\n" - + "DEALLOCATE CDC_Cursor"); + testdb.with(""" + DECLARE @TableName VARCHAR(100) + DECLARE @TableSchema VARCHAR(100) + DECLARE CDC_Cursor CURSOR FOR + SELECT * FROM ( + SELECT Name,SCHEMA_NAME(schema_id) AS TableSchema + FROM sys.objects + WHERE type = 'u' + AND is_ms_shipped <> 1 + ) CDC + OPEN CDC_Cursor + FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema + WHILE @@FETCH_STATUS = 0 + BEGIN + DECLARE @SQL NVARCHAR(1000) + DECLARE @CDC_Status TINYINT + SET @CDC_Status=(SELECT COUNT(*) + FROM cdc.change_tables + WHERE Source_object_id = OBJECT_ID(@TableSchema+'.'+@TableName)) + --IF CDC is not enabled on Table, Enable CDC + IF @CDC_Status <> 1 + BEGIN + SET @SQL='EXEC sys.sp_cdc_enable_table + @source_schema = '''+@TableSchema+''', + @source_name = ''' + @TableName + + ''', + @role_name = null;' + EXEC sp_executesql @SQL + END + FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema + END + CLOSE CDC_Cursor + DEALLOCATE CDC_Cursor"""); } @Override diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java index 90f0095602d6..526ea54602ab 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java @@ -5,19 +5,10 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -25,61 +16,28 @@ import io.airbyte.protocol.models.v0.ConnectorSpecification; import java.sql.SQLException; import java.util.HashMap; -import java.util.Map; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class MssqlSourceAcceptanceTest extends SourceAcceptanceTest { protected static final String SCHEMA_NAME = "dbo"; protected static final String STREAM_NAME = "id_and_name"; - protected static MSSQLServerContainer db; - protected JsonNode config; - @AfterAll - public static void closeContainer() { - if (db != null) { - db.close(); - db.stop(); - } - } + protected MsSQLTestDatabase testdb; @Override protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { - if (db == null) { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - db.start(); - } - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(db)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(db)) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES " + - "(1,'picard', '2124-03-04T01:01:01Z'), " + - "(2, 'crusher', '2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); } @Override - protected void tearDown(final TestDestinationEnv testEnv) throws Exception {} + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } @Override protected String getImageName() { @@ -93,7 +51,9 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .build(); } @Override @@ -111,19 +71,4 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } - private static DSLContext getDslContext(final JsonNode config) { - return DSLContextFactory.create(DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - db.getHost(), - db.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - } - - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); - } - } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java index be6d6d9167a0..93abd3355758 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java @@ -5,70 +5,21 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import java.util.Map; -import org.jooq.DSLContext; -import org.testcontainers.containers.MSSQLServerContainer; public class MssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest { @Override - protected Database setupDatabase() throws Exception { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest") - .acceptLicense(); - container.start(); - - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .build()); - - dslContext = getDslContext(configWithoutDbName); - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", DB_NAME)); - ctx.fetch(String.format("USE %s;", DB_NAME)); - return null; - }); - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, DB_NAME); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); - - return database; - } - - private static DSLContext getDslContext(final JsonNode config) { - return DSLContextFactory.create(DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - } - - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); + protected Database setupDatabase() { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + return testdb.getDatabase(); } @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.stop(); - container.close(); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java index 6db7a7a48bff..397b36494870 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java @@ -5,80 +5,32 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import java.sql.SQLException; import java.util.Map; -import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class SslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest { - @AfterAll - public static void closeContainer() { - if (db != null) { - db.close(); - db.stop(); - } - } - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { - if (db == null) { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - db.start(); - } - - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(db)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(db)) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - final String dbName = "db_" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES " + - "(1,'picard', '2124-03-04T01:01:01Z'), " + - "(2, 'crusher', '2124-03-04T01:01:01Z'), " + - "(3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "encrypted_trust_server_certificate"))); - } - - private DSLContext getDslContext(final JsonNode baseConfig) { - return DSLContextFactory.create( - baseConfig.get(JdbcUtils.USERNAME_KEY).asText(), - baseConfig.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;encrypt=true;trustServerCertificate=true;", - db.getHost(), - db.getFirstMappedPort()), - null); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")) + .build(); } - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MsSQLContainerFactory().shared("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + testdb = new MsSQLTestDatabase(container); + testdb = testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java index 29f1e91fc7a5..73ccccee6c35 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java @@ -9,9 +9,9 @@ import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR; import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO; import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN; -import static io.airbyte.integrations.source.mssql.MssqlSource.DRIVER_CLASS; import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET; import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY; +import static org.awaitility.Awaitility.await; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -24,218 +24,157 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.debezium.CdcSourceTest; import io.airbyte.cdk.integrations.debezium.internals.mssql.MssqlCdcTargetPosition; +import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.AirbyteStateMessage; import io.airbyte.protocol.models.v0.AirbyteStream; import io.airbyte.protocol.models.v0.SyncMode; import io.debezium.connector.sqlserver.Lsn; -import java.sql.SQLException; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Optional; import javax.sql.DataSource; -import org.jooq.DSLContext; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MSSQLServerContainer; +import org.testcontainers.utility.DockerImageName; -public class CdcMssqlSourceTest extends CdcSourceTest { - - private static final String CDC_ROLE_NAME = "cdc_selector"; - private static final String TEST_USER_PASSWORD = "testerjester[1]"; - public static MSSQLServerContainer container; - - private String testUserName; - private String dbName; - private String dbNamewithDot; - private Database database; - private JdbcDatabase testJdbcDatabase; - private MssqlSource source; - private JsonNode config; - private DSLContext dslContext; - private DataSource dataSource; - private DataSource testDataSource; - - @BeforeEach - public void setup() throws SQLException { - init(); - setupTestUser(); - revokeAllPermissions(); - super.setup(); - grantCorrectPermissions(); - } - - @BeforeAll - public static void createContainer() { - if (container == null) { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work - container.start(); - } - } +public class CdcMssqlSourceTest extends CdcSourceTest { - @AfterAll - public static void closeContainer() { - if (container != null) { - container.close(); - container.stop(); - } - } - - private void init() { - dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - testUserName = Strings.addRandomSuffix("test", "_", 5).toLowerCase(); - dbNamewithDot = Strings.addRandomSuffix("db", ".", 10).toLowerCase(); - source = new MssqlSource(); - - final JsonNode replicationConfig = Jsons.jsonNode(Map.of( - "method", "CDC", - "data_to_sync", "Existing and New", - "initial_waiting_seconds", INITIAL_WAITING_SECONDS, - "snapshot_isolation", "Snapshot")); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(MODELS_SCHEMA, MODELS_SCHEMA + "_random")) - .put(JdbcUtils.USERNAME_KEY, testUserName) - .put(JdbcUtils.PASSWORD_KEY, TEST_USER_PASSWORD) - .put("replication_method", replicationConfig) - .put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))) - .build()); + static private final String CDC_ROLE_NAME = "cdc_selector"; - dataSource = DataSourceFactory.create( - container.getUsername(), - container.getPassword(), - DRIVER_CLASS, - String.format("jdbc:sqlserver://%s:%d", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")); - - testDataSource = DataSourceFactory.create( - testUserName, - TEST_USER_PASSWORD, - DRIVER_CLASS, - String.format("jdbc:sqlserver://%s:%d", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")); + static private final String TEST_USER_NAME_PREFIX = "cdc_test_user"; - dslContext = DSLContextFactory.create(dataSource, null); + // Deliberately do not share this test container, as we're going to mutate the global SQL Server + // state. + static private final MSSQLServerContainer UNSHARED_CONTAINER = new MsSQLContainerFactory() + .createNewContainer(DockerImageName.parse("mcr.microsoft.com/mssql/server:2022-latest")); - database = new Database(dslContext); - - testJdbcDatabase = new DefaultJdbcDatabase(testDataSource); - - executeQuery("CREATE DATABASE " + dbName + ";"); - executeQuery("CREATE DATABASE [" + dbNamewithDot + "];"); - switchSnapshotIsolation(true, dbName); - } + private DataSource testDataSource; - private void switchSnapshotIsolation(final Boolean on, final String db) { - final String onOrOff = on ? "ON" : "OFF"; - executeQuery("ALTER DATABASE " + db + "\n\tSET ALLOW_SNAPSHOT_ISOLATION " + onOrOff); + @BeforeAll + static public void beforeAll() { + new MsSQLContainerFactory().withAgent(UNSHARED_CONTAINER); + UNSHARED_CONTAINER.start(); } - private void setupTestUser() { - executeQuery("USE " + dbName); - executeQuery("CREATE LOGIN " + testUserName + " WITH PASSWORD = '" + TEST_USER_PASSWORD + "';"); - executeQuery("CREATE USER " + testUserName + " FOR LOGIN " + testUserName + ";"); + @AfterAll + static void afterAll() { + UNSHARED_CONTAINER.close(); } - private void revokeAllPermissions() { - executeQuery("REVOKE ALL FROM " + testUserName + " CASCADE;"); - executeQuery("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO " + testUserName + ";\""); + private String testUserName() { + return testdb.withNamespace(TEST_USER_NAME_PREFIX); } - private void alterPermissionsOnSchema(final Boolean grant, final String schema) { - final String grantOrRemove = grant ? "GRANT" : "REVOKE"; - executeQuery(String.format("USE %s;\n" + "%s SELECT ON SCHEMA :: [%s] TO %s", dbName, grantOrRemove, schema, testUserName)); + @Override + protected MsSQLTestDatabase createTestDatabase() { + final var testdb = new MsSQLTestDatabase(UNSHARED_CONTAINER); + return testdb + .withConnectionProperty("encrypt", "false") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .withSnapshotIsolation() + .withCdc() + .withWaitUntilAgentRunning(); } - private void grantCorrectPermissions() { - alterPermissionsOnSchema(true, MODELS_SCHEMA); - alterPermissionsOnSchema(true, MODELS_SCHEMA + "_random"); - alterPermissionsOnSchema(true, "cdc"); - executeQuery(String.format("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName)); + @Override + protected MssqlSource source() { + final var source = new MssqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } @Override - public String createSchemaQuery(final String schemaName) { - return "CREATE SCHEMA " + schemaName; + protected JsonNode config() { + return testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testUserName()) + .with(JdbcUtils.PASSWORD_KEY, testdb.getPassword()) + .withSchemas(modelsSchema(), randomSchema()) + .withCdcReplication() + .withoutSsl() + .build(); } - // TODO : Delete this Override when MSSQL supports individual table snapshot @Override - public void newTableSnapshotTest() { - // Do nothing + @BeforeEach + protected void setup() { + super.setup(); + + // Enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access. + final var enableCdcSqlFmt = """ + EXEC sys.sp_cdc_enable_table + \t@source_schema = N'%s', + \t@source_name = N'%s', + \t@role_name = N'%s', + \t@supports_net_changes = 0"""; + testdb + .with(enableCdcSqlFmt, modelsSchema(), MODELS_STREAM_NAME, CDC_ROLE_NAME) + .with(enableCdcSqlFmt, randomSchema(), RANDOM_TABLE_NAME, CDC_ROLE_NAME); + + // Create a test user to be used by the source, with proper permissions. + testdb + .with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName()) + .with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName()) + .with("REVOKE ALL FROM %s CASCADE;", testUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", modelsSchema(), testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", randomSchema(), testUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName()) + .with("USE [master]") + .with("GRANT VIEW SERVER STATE TO %s", testUserName()) + .with("USE [%s]", testdb.getDatabaseName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName()); + + testDataSource = DataSourceFactory.create( + testUserName(), + testdb.getPassword(), + testdb.getDatabaseDriver().getDriverClassName(), + testdb.getJdbcUrl(), + Map.of("encrypt", "false")); } @Override - protected String randomTableSchema() { - return MODELS_SCHEMA + "_random"; + @AfterEach + protected void tearDown() { + try { + DataSourceFactory.close(testDataSource); + } catch (Exception e) { + throw new RuntimeException(e); + } + super.tearDown(); + } - private void switchCdcOnDatabase(final Boolean enable, final String db) { - final String storedProc = enable ? "sys.sp_cdc_enable_db" : "sys.sp_cdc_disable_db"; - executeQuery("USE [" + db + "]\n" + "EXEC " + storedProc); + private JdbcDatabase testDatabase() { + return new DefaultJdbcDatabase(testDataSource); } + // TODO : Delete this Override when MSSQL supports individual table snapshot @Override - public void createTable(final String schemaName, final String tableName, final String columnClause) { - switchCdcOnDatabase(true, dbName); - super.createTable(schemaName, tableName, columnClause); - - // sometimes seeing an error that we can't enable cdc on a table while sql server agent is still - // spinning up - // solving with a simple while retry loop - boolean failingToStart = true; - int retryNum = 0; - final int maxRetries = 10; - while (failingToStart) { - try { - executeQuery(String.format( - "EXEC sys.sp_cdc_enable_table\n" - + "\t@source_schema = N'%s',\n" - + "\t@source_name = N'%s', \n" - + "\t@role_name = N'%s',\n" - + "\t@supports_net_changes = 0", - schemaName, tableName, CDC_ROLE_NAME)); // enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access - failingToStart = false; - } catch (final Exception e) { - if (retryNum >= maxRetries) { - throw e; - } else { - retryNum++; - try { - Thread.sleep(10000); // 10 seconds - } catch (final InterruptedException ex) { - throw new RuntimeException(ex); - } - } - } - } + public void newTableSnapshotTest() { + // Do nothing } @Override - public String columnClause(final Map columnsWithDataType, final Optional primaryKey) { + protected String columnClause(final Map columnsWithDataType, final Optional primaryKey) { final StringBuilder columnClause = new StringBuilder(); int i = 0; for (final Map.Entry column : columnsWithDataType.entrySet()) { @@ -254,59 +193,42 @@ public String columnClause(final Map columnsWithDataType, final return columnClause.toString(); } - @AfterEach - public void tearDown() { - try { - dslContext.close(); - DataSourceFactory.close(dataSource); - DataSourceFactory.close(testDataSource); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - @Test void testAssertCdcEnabledInDb() { // since we enable cdc in setup, assert that we successfully pass this first - assertDoesNotThrow(() -> source.assertCdcEnabledInDb(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertCdcEnabledInDb(config(), testDatabase())); // then disable cdc and assert the check fails - switchCdcOnDatabase(false, dbName); - assertThrows(RuntimeException.class, () -> source.assertCdcEnabledInDb(config, testJdbcDatabase)); + testdb.withoutCdc(); + assertThrows(RuntimeException.class, () -> source().assertCdcEnabledInDb(config(), testDatabase())); } @Test void testAssertCdcSchemaQueryable() { // correct access granted by setup so assert check passes - assertDoesNotThrow(() -> source.assertCdcSchemaQueryable(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertCdcSchemaQueryable(config(), testDatabase())); // now revoke perms and assert that check fails - alterPermissionsOnSchema(false, "cdc"); - assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class, () -> source.assertCdcSchemaQueryable(config, testJdbcDatabase)); - } - - private void switchSqlServerAgentAndWait(final Boolean start) throws InterruptedException { - final String startOrStop = start ? "START" : "STOP"; - executeQuery(String.format("EXEC xp_servicecontrol N'%s',N'SQLServerAGENT';", startOrStop)); - Thread.sleep(15 * 1000); // 15 seconds to wait for change of agent state + testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class, + () -> source().assertCdcSchemaQueryable(config(), testDatabase())); } @Test - void testAssertSqlServerAgentRunning() throws InterruptedException { - executeQuery(String.format("USE master;\n" + "GRANT VIEW SERVER STATE TO %s", testUserName)); + void testAssertSqlServerAgentRunning() { + testdb.withAgentStopped().withWaitUntilAgentStopped(); // assert expected failure if sql server agent stopped - switchSqlServerAgentAndWait(false); - assertThrows(RuntimeException.class, () -> source.assertSqlServerAgentRunning(testJdbcDatabase)); + assertThrows(RuntimeException.class, () -> source().assertSqlServerAgentRunning(testDatabase())); // assert success if sql server agent running - switchSqlServerAgentAndWait(true); - assertDoesNotThrow(() -> source.assertSqlServerAgentRunning(testJdbcDatabase)); + testdb.withAgentStarted().withWaitUntilAgentRunning(); + assertDoesNotThrow(() -> source().assertSqlServerAgentRunning(testDatabase())); } @Test void testAssertSnapshotIsolationAllowed() { // snapshot isolation enabled by setup so assert check passes - assertDoesNotThrow(() -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertSnapshotIsolationAllowed(config(), testDatabase())); // now disable snapshot isolation and assert that check fails - switchSnapshotIsolation(false, dbName); - assertThrows(RuntimeException.class, () -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); + testdb.withoutSnapshotIsolation(); + assertThrows(RuntimeException.class, () -> source().assertSnapshotIsolationAllowed(config(), testDatabase())); } @Test @@ -317,10 +239,11 @@ void testAssertSnapshotIsolationDisabled() { // set snapshot_isolation level to "Read Committed" to disable snapshot .put("snapshot_isolation", "Read Committed") .build()); + final var config = config(); Jsons.replaceNestedValue(config, List.of("replication_method"), replicationConfig); - assertDoesNotThrow(() -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); - switchSnapshotIsolation(false, dbName); - assertDoesNotThrow(() -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertSnapshotIsolationAllowed(config, testDatabase())); + testdb.withoutSnapshotIsolation(); + assertDoesNotThrow(() -> source().assertSnapshotIsolationAllowed(config, testDatabase())); } // Ensure the CDC check operations are included when CDC is enabled @@ -328,47 +251,52 @@ void testAssertSnapshotIsolationDisabled() { @Test void testCdcCheckOperations() throws Exception { // assertCdcEnabledInDb - switchCdcOnDatabase(false, dbName); - AirbyteConnectionStatus status = getSource().check(getConfig()); + testdb.withoutCdc(); + AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - switchCdcOnDatabase(true, dbName); + testdb.withCdc(); // assertCdcSchemaQueryable - alterPermissionsOnSchema(false, "cdc"); - status = getSource().check(getConfig()); + testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - alterPermissionsOnSchema(true, "cdc"); + testdb.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + // assertSqlServerAgentRunning - executeQuery(String.format("USE master;\n" + "GRANT VIEW SERVER STATE TO %s", testUserName)); - switchSqlServerAgentAndWait(false); - status = getSource().check(getConfig()); + + testdb.withAgentStopped().withWaitUntilAgentStopped(); + status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - switchSqlServerAgentAndWait(true); + testdb.withAgentStarted().withWaitUntilAgentRunning(); // assertSnapshotIsolationAllowed - switchSnapshotIsolation(false, dbName); - status = getSource().check(getConfig()); + testdb.withoutSnapshotIsolation(); + status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); } @Test void testCdcCheckOperationsWithDot() throws Exception { - // assertCdcEnabledInDb and validate escape with special character - switchCdcOnDatabase(true, dbNamewithDot); - final AirbyteConnectionStatus status = getSource().check(getConfig()); + final String dbNameWithDot = testdb.getDatabaseName().replace("_", "."); + testdb.with("CREATE DATABASE [%s];", dbNameWithDot) + .with("USE [%s]", dbNameWithDot) + .with("EXEC sys.sp_cdc_enable_db;"); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED); } // todo: check LSN returned is actually the max LSN // todo: check we fail as expected under certain conditions @Test - void testGetTargetPosition() throws InterruptedException { - Thread.sleep(10 * 1000); // Sleeping because sometimes the db is not yet completely ready and the lsn is not found + void testGetTargetPosition() { // check that getTargetPosition returns higher Lsn after inserting new row - final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testJdbcDatabase, dbName).targetLsn; - executeQuery(String.format("USE %s; INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", - dbName, MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car")); - Thread.sleep(15 * 1000); // 15 seconds to wait for Agent capture job to log cdc change - final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testJdbcDatabase, dbName).targetLsn; - assertTrue(secondLsn.compareTo(firstLsn) > 0); + testdb.withWaitUntilMaxLsnAvailable(); + final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn; + testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", + modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car"); + // Wait for Agent capture job to log CDC change. + await().atMost(Duration.ofSeconds(45)).until(() -> { + final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn; + return secondLsn.compareTo(firstLsn) > 0; + }); } @Override @@ -382,24 +310,12 @@ protected void removeCDCColumns(final ObjectNode data) { @Override protected MssqlCdcTargetPosition cdcLatestTargetPosition() { - try { - // Sleeping because sometimes the db is not yet completely ready and the lsn is not found - Thread.sleep(5000); - } catch (final InterruptedException e) { - throw new RuntimeException(e); - } + testdb.withWaitUntilMaxLsnAvailable(); final JdbcDatabase jdbcDatabase = new StreamingJdbcDatabase( - DataSourceFactory.create(config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DRIVER_CLASS, - String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asInt(), - dbName), - Map.of("encrypt", "false")), + testDataSource, new MssqlSourceOperations(), AdaptiveStreamingQueryConfig::new); - return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, dbName); + return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, testdb.getDatabaseName()); } @Override @@ -451,21 +367,6 @@ protected void addCdcDefaultCursorField(final AirbyteStream stream) { } } - @Override - protected Source getSource() { - return new MssqlSource(); - } - - @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected Database getDatabase() { - return database; - } - @Override protected void assertExpectedStateMessages(final List stateMessages) { assertEquals(1, stateMessages.size()); diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java index 9c7dc3259757..37bd7ff3c770 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java @@ -9,173 +9,117 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.CatalogHelpers; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.JDBCType; import java.util.Collections; import java.util.List; -import java.util.Map; -import javax.sql.DataSource; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.MSSQLServerContainer; -public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; protected static final String PASSWORD_WITHOUT_PERMISSION = "password_3435!"; - private static MSSQLServerContainer dbContainer; - private JsonNode config; - @BeforeAll - static void init() { + static { // In mssql, timestamp is generated automatically, so we need to use // the datetime type instead so that we can set the value manually. COL_TIMESTAMP_TYPE = "DATETIME2"; - - dbContainer = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - dbContainer.start(); } @Override - protected DataSource getDataSource(final JsonNode jdbcConfig) { - final Map connectionProperties = JdbcUtils.parseJdbcParameters(jdbcConfig, JdbcUtils.CONNECTION_PROPERTIES_KEY, - getJdbcParameterDelimiter()); - connectionProperties.put("encrypt", "false"); - return DataSourceFactory.create( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - getDriverClass(), - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - connectionProperties); - } - - @BeforeEach - public void setup() throws Exception { - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, dbContainer.getHost()) - .put(JdbcUtils.PORT_KEY, dbContainer.getFirstMappedPort()) - .put(JdbcUtils.USERNAME_KEY, dbContainer.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, dbContainer.getPassword()) - .build()); - - final DataSource dataSource = DataSourceFactory.create( - configWithoutDbName.get(JdbcUtils.USERNAME_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - configWithoutDbName.get(JdbcUtils.HOST_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PORT_KEY).asInt()), - Map.of("encrypt", "false")); - - try { - final JdbcDatabase database = new DefaultJdbcDatabase(dataSource); - - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - database.execute(ctx -> ctx.createStatement().execute(String.format("CREATE DATABASE %s;", dbName))); - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); - - super.setup(); - } finally { - DataSourceFactory.close(dataSource); - } - } - - @AfterAll - public static void cleanUp() throws Exception { - dbContainer.close(); + protected JsonNode config() { + return testdb.testConfigBuilder() + .withoutSsl() + .build(); } @Override - public boolean supportsSchemas() { - return true; + protected MssqlSource source() { + return new MssqlSource(); } @Override - public JsonNode getConfig() { - return Jsons.clone(config); + protected MsSQLTestDatabase createTestDatabase() { + return MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest"); } @Override - public AbstractJdbcSource getJdbcSource() { - return new MssqlSource(); + public boolean supportsSchemas() { + return true; } @Override - public String getDriverClass() { - return MssqlSource.DRIVER_CLASS; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1"); } @Test void testCheckIncorrectPasswordFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage()); } @Test public void testCheckIncorrectUsernameFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage()); } @Test public void testCheckIncorrectHostFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectPortFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectDataBaseFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage()); } @Test public void testUserHasNoPermissionToDataBase() throws Exception { - database.execute(ctx -> ctx.createStatement() - .execute(String.format("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION))); + final var config = config(); + maybeSetShorterConnectionTimeout(config); + testdb.with("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage()); } @Override diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java index 53637fae11fd..c14c3cad4d61 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java @@ -9,17 +9,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; @@ -29,67 +20,44 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.Collections; import java.util.List; -import java.util.Map; -import org.jooq.DSLContext; import org.junit.jupiter.api.*; -import org.testcontainers.containers.MSSQLServerContainer; class MssqlSourceTest { - private static final String DB_NAME = "dbo"; private static final String STREAM_NAME = "id_and_name"; private static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(Lists.newArrayList(CatalogHelpers.createAirbyteStream( STREAM_NAME, - DB_NAME, + "dbo", Field.of("id", JsonSchemaType.INTEGER), Field.of("name", JsonSchemaType.STRING), Field.of("born", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(List.of(List.of("id"))))); - private JsonNode configWithoutDbName; - private JsonNode config; - - private static MSSQLServerContainer db; - - @BeforeAll - static void init() { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - db.start(); - } + private MsSQLTestDatabase testdb; // how to interact with the mssql test container manaully. // 1. exec into mssql container (not the test container container) // 2. /opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "A_Str0ng_Required_Password" @BeforeEach - void setup() throws SQLException { - configWithoutDbName = getConfig(db); - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', '2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } + void setup() { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest") + .with("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', " + + "'2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); + } - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); + @AfterEach + void cleanUp() { + testdb.close(); } - @AfterAll - static void cleanUp() { - db.stop(); - db.close(); + private JsonNode getConfig() { + return testdb.testConfigBuilder() + .withoutSsl() + .build(); } // if a column in mssql is used as a primary key and in a separate index the discover query returns @@ -97,82 +65,43 @@ static void cleanUp() { // this tests that this de-duplication is successful. @Test void testDiscoverWithPk() throws Exception { - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("USE %s;", config.get(JdbcUtils.DATABASE_KEY))); - ctx.execute("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);"); - ctx.execute("CREATE INDEX i1 ON id_and_name (id);"); - return null; - }); - } - - final AirbyteCatalog actual = new MssqlSource().discover(config); + testdb + .with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);") + .with("CREATE INDEX i1 ON id_and_name (id);"); + final AirbyteCatalog actual = new MssqlSource().discover(getConfig()); assertEquals(CATALOG, actual); } @Test @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") public void testTableWithNullCursorValueShouldThrowException() throws Exception { - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("USE %s;", config.get(JdbcUtils.DATABASE_KEY))); - ctx.execute("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL"); - ctx.execute("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)"); - return null; - }); - - ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode( - SyncMode.INCREMENTAL) - .withCursorField(Lists.newArrayList("id")) - .withDestinationSyncMode(DestinationSyncMode.APPEND) - .withSyncMode(SyncMode.INCREMENTAL) - .withStream(CatalogHelpers.createAirbyteStream( - STREAM_NAME, - DB_NAME, - Field.of("id", JsonSchemaType.INTEGER), - Field.of("name", JsonSchemaType.STRING), - Field.of("born", JsonSchemaType.STRING)) - .withSupportedSyncModes( - Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of("id")))); - - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams( - Collections.singletonList(configuredAirbyteStream)); - - final Throwable throwable = catchThrowable(() -> MoreIterators.toSet( - new MssqlSource().read(config, catalog, null))); - assertThat(throwable).isInstanceOf(ConfigErrorException.class) - .hasMessageContaining( - "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}"); - } - } - - private JsonNode getConfig(final MSSQLServerContainer db) { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, db.getHost()) - .put(JdbcUtils.PORT_KEY, db.getFirstMappedPort()) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - } - - private static DSLContext getDslContext(final JsonNode config) { - return DSLContextFactory.create(DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asInt()), - Map.of("encrypt", "false")), null); - } - - public static Database getDatabase(final DSLContext dslContext) { - // todo (cgardens) - rework this abstraction so that we do not have to pass a null into the - // constructor. at least explicitly handle it, even if the impl doesn't change. - return new Database(dslContext); + testdb + .with("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL") + .with("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)"); + + ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode( + SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.INTEGER), + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING)) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams( + Collections.singletonList(configuredAirbyteStream)); + + final Throwable throwable = catchThrowable(() -> MoreIterators.toSet( + new MssqlSource().read(getConfig(), catalog, null))); + assertThat(throwable).isInstanceOf(ConfigErrorException.class) + .hasMessageContaining( + "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}"); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java new file mode 100644 index 000000000000..1a44218cc5c5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.testutils.ContainerFactory; +import org.testcontainers.containers.MSSQLServerContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +public class MsSQLContainerFactory implements ContainerFactory> { + + @Override + public MSSQLServerContainer createNewContainer(DockerImageName imageName) { + return new MSSQLServerContainer<>(imageName.asCompatibleSubstituteFor("mcr.microsoft.com/mssql/server")).acceptLicense(); + } + + @Override + public Class getContainerClass() { + return MSSQLServerContainer.class; + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(MSSQLServerContainer container) { + container.withNetwork(Network.newNetwork()); + } + + public void withAgent(MSSQLServerContainer container) { + container.addEnv("MSSQL_AGENT_ENABLED", "True"); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java new file mode 100644 index 000000000000..3060ea513adc --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.testutils.TestDatabase; +import io.debezium.connector.sqlserver.Lsn; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.MSSQLServerContainer; + +public class MsSQLTestDatabase extends TestDatabase, MsSQLTestDatabase, MsSQLTestDatabase.MsSQLConfigBuilder> { + + static private final Logger LOGGER = LoggerFactory.getLogger(MsSQLTestDatabase.class); + + static public final int MAX_RETRIES = 60; + + static public MsSQLTestDatabase in(String imageName, String... methods) { + final var container = new MsSQLContainerFactory().shared(imageName, methods); + final var testdb = new MsSQLTestDatabase(container); + return testdb + .withConnectionProperty("encrypt", "false") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized(); + } + + public MsSQLTestDatabase(MSSQLServerContainer container) { + super(container); + } + + public MsSQLTestDatabase withSnapshotIsolation() { + return with("ALTER DATABASE %s SET ALLOW_SNAPSHOT_ISOLATION ON;", getDatabaseName()); + } + + public MsSQLTestDatabase withoutSnapshotIsolation() { + return with("ALTER DATABASE %s SET ALLOW_SNAPSHOT_ISOLATION OFF;", getDatabaseName()); + } + + public MsSQLTestDatabase withCdc() { + return with("EXEC sys.sp_cdc_enable_db;"); + } + + public MsSQLTestDatabase withoutCdc() { + return with("EXEC sys.sp_cdc_disable_db;"); + } + + public MsSQLTestDatabase withAgentStarted() { + return with("EXEC master.dbo.xp_servicecontrol N'START', N'SQLServerAGENT';"); + } + + public MsSQLTestDatabase withAgentStopped() { + return with("EXEC master.dbo.xp_servicecontrol N'STOP', N'SQLServerAGENT';"); + } + + public MsSQLTestDatabase withWaitUntilAgentRunning() { + waitForAgentState(true); + return self(); + } + + public MsSQLTestDatabase withWaitUntilAgentStopped() { + waitForAgentState(false); + return self(); + } + + private void waitForAgentState(final boolean running) { + final String expectedValue = running ? "Running." : "Stopped."; + LOGGER.debug("Waiting for SQLServerAgent state to change to '{}'.", expectedValue); + for (int i = 0; i < MAX_RETRIES; i++) { + try { + final var r = query(ctx -> ctx.fetch("EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';").get(0)); + if (expectedValue.equalsIgnoreCase(r.getValue(0).toString())) { + LOGGER.debug("SQLServerAgent state is '{}', as expected.", expectedValue); + return; + } + LOGGER.debug("Retrying, SQLServerAgent state {} does not match expected '{}'.", r, expectedValue); + } catch (SQLException e) { + LOGGER.debug("Retrying agent state query after catching exception {}.", e.getMessage()); + } + try { + Thread.sleep(1_000); // Wait one second between retries. + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException("Exhausted retry attempts while polling for agent state"); + } + + public MsSQLTestDatabase withWaitUntilMaxLsnAvailable() { + LOGGER.debug("Waiting for max LSN to become available for database {}.", getDatabaseName()); + for (int i = 0; i < MAX_RETRIES; i++) { + try { + final var maxLSN = query(ctx -> ctx.fetch("SELECT sys.fn_cdc_get_max_lsn();").get(0).get(0, byte[].class)); + if (maxLSN != null) { + LOGGER.debug("Max LSN available for database {}: {}", getDatabaseName(), Lsn.valueOf(maxLSN)); + return self(); + } + LOGGER.debug("Retrying, max LSN still not available for database {}.", getDatabaseName()); + } catch (SQLException e) { + LOGGER.warn("Retrying max LSN query after catching exception {}", e.getMessage()); + } + try { + Thread.sleep(1_000); // Wait one second between retries. + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException("Exhausted retry attempts while polling for max LSN availability"); + } + + @Override + public String getPassword() { + return "S00p3rS33kr3tP4ssw0rd!"; + } + + @Override + public String getJdbcUrl() { + return String.format("jdbc:sqlserver://%s:%d", getContainer().getHost(), getContainer().getFirstMappedPort()); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of( + mssqlCmd(Stream.of(String.format("CREATE DATABASE %s", getDatabaseName()))), + mssqlCmd(Stream.of( + String.format("USE %s", getDatabaseName()), + String.format("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", getUserName(), getPassword(), getDatabaseName()), + String.format("ALTER SERVER ROLE [sysadmin] ADD MEMBER %s", getUserName()), + String.format("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", getUserName(), getUserName()), + String.format("ALTER ROLE [db_owner] ADD MEMBER %s", getUserName())))); + } + + /** + * Don't drop anything when closing the test database. Instead, if cleanup is required, call + * {@link #dropDatabaseAndUser()} explicitly. Implicit cleanups may result in deadlocks and so + * aren't really worth it. + */ + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + public void dropDatabaseAndUser() { + execInContainer(mssqlCmd(Stream.of( + String.format("USE master"), + String.format("ALTER DATABASE %s SET single_user WITH ROLLBACK IMMEDIATE", getDatabaseName()), + String.format("DROP DATABASE %s", getDatabaseName())))); + } + + public Stream mssqlCmd(Stream sql) { + return Stream.of("/opt/mssql-tools/bin/sqlcmd", + "-U", getContainer().getUsername(), + "-P", getContainer().getPassword(), + "-Q", sql.collect(Collectors.joining("; ")), + "-b", "-e"); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.MSSQLSERVER; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.DEFAULT; + } + + @Override + public MsSQLConfigBuilder configBuilder() { + return new MsSQLConfigBuilder(this); + } + + static public class MsSQLConfigBuilder extends ConfigBuilder { + + protected MsSQLConfigBuilder(MsSQLTestDatabase testDatabase) { + super(testDatabase); + } + + public MsSQLConfigBuilder withCdcReplication() { + return with("replication_method", Map.of( + "method", "CDC", + "data_to_sync", "Existing and New", + "initial_waiting_seconds", DEFAULT_CDC_REPLICATION_INITIAL_WAIT.getSeconds(), + "snapshot_isolation", "Snapshot")); + } + + public MsSQLConfigBuilder withSchemas(String... schemas) { + return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas)); + } + + @Override + public MsSQLConfigBuilder withoutSsl() { + return withSsl(Map.of("ssl_method", "unencrypted")); + } + + @Override + public MsSQLConfigBuilder withSsl(Map sslMode) { + return with("ssl_method", sslMode); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle b/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle index a94da305be1e..61323c304a86 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle @@ -4,12 +4,12 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() + configurations.all { resolutionStrategy { @@ -26,6 +26,7 @@ dependencies { implementation project(':airbyte-integrations:connectors:source-mysql') implementation libs.jooq + testImplementation testFixtures(project(':airbyte-integrations:connectors:source-mysql')) testImplementation libs.junit.jupiter.system.stubs testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation libs.testcontainers.mysql diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties b/airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml b/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml index 885c843f1f95..7101b9be25a2 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: database connectorType: source definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad - dockerImageTag: 3.1.5 + dockerImageTag: 3.1.8 dockerRepository: airbyte/source-mysql-strict-encrypt githubIssueLabel: source-mysql icon: mysql.svg diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java index aff5350f30d7..05583c81ddaa 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java @@ -41,7 +41,11 @@ public class MySqlStrictEncryptSource extends SpecModifyingSource implements Sou "
  • Verify Identity - Always connect with SSL. Verify both CA and Hostname.
  • Read more in the docs."; MySqlStrictEncryptSource() { - super(MySqlSource.sshWrappedSource()); + this(new MySqlSource()); + } + + MySqlStrictEncryptSource(MySqlSource source) { + super(MySqlSource.sshWrappedSource(source)); } @Override diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java deleted file mode 100644 index d69c5a0ff714..000000000000 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.mysql_strict_encrypt; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.MySqlUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.ssh.SshHelpers; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; - -public abstract class AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest extends MySqlStrictEncryptSourceAcceptanceTest { - - protected static MySqlUtils.Certificate certs; - protected static final String PASSWORD = "Passw0rd"; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - addTestData(container); - certs = MySqlUtils.getCertificate(container, true); - - final var sslMode = getSslConfig(); - final var innerContainerAddress = SshHelpers.getInnerContainerAddress(container); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, innerContainerAddress.left) - .put(JdbcUtils.PORT_KEY, innerContainerAddress.right) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - } - - public abstract ImmutableMap getSslConfig(); - - private void addTestData(final MySQLContainer container) throws Exception { - final var outerContainerAddress = SshHelpers.getOuterContainerAddress(container); - try (final DSLContext dslContext = DSLContextFactory.create( - container.getUsername(), - container.getPassword(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s", - outerContainerAddress.left, - outerContainerAddress.right, - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } - } - -} diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java index 5673d28039c1..c0efd449d2c0 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java @@ -4,17 +4,29 @@ package io.airbyte.integrations.source.mysql_strict_encrypt; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.JdbcUtils; +import java.util.stream.Stream; -public class MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest extends AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest { +public class MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest extends MySqlStrictEncryptSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; + + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates"); + } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCaCertificate()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java index 332d95266fcc..6df92b5e507d 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java @@ -4,19 +4,31 @@ package io.airbyte.integrations.source.mysql_strict_encrypt; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.JdbcUtils; +import java.util.stream.Stream; -public class MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest extends AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest { +public class MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest extends MySqlStrictEncryptSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; + + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates", "withClientCertificate"); + } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java index 90ae08a90fa0..35fafab62790 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java @@ -4,22 +4,19 @@ package io.airbyte.integrations.source.mysql_strict_encrypt; -import static io.airbyte.integrations.source.mysql.MySqlSource.SSL_PARAMETERS; - import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -29,72 +26,40 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.util.HashMap; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) +import java.util.stream.Stream; + public class MySqlStrictEncryptSourceAcceptanceTest extends SourceAcceptanceTest { - @SystemStub - public EnvironmentVariables environmentVariables; private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "public.starships"; - protected MySQLContainer container; - protected JsonNode config; + protected MySQLTestDatabase testdb; @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MySQLContainerFactory().shared("mysql:8.0", extraContainerFactoryMethods().toArray(String[]::new)); + testdb = new MySQLTestDatabase(container) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); + } - var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "required") - .build(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - - try (final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s?%s", - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText(), - String.join("&", SSL_PARAMETERS)), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } + protected Stream extraContainerFactoryMethods() { + return Stream.empty(); + } + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + testdb.close(); } @Override @@ -109,7 +74,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSsl(ImmutableMap.of(JdbcUtils.MODE_KEY, "required")) + .withStandardReplication() + .build(); } @Override @@ -120,7 +88,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), @@ -129,7 +97,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME2), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME2), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java index 7f42cdf5040a..a0381a0dc7c9 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java @@ -8,43 +8,33 @@ * Copyright (c) 2023 Airbyte, Inc., all rights reserved. */ -import static io.airbyte.integrations.source.mysql.MySqlSource.SSL_PARAMETERS; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.STATE_TYPE_KEY; import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.MySqlUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.Source; -import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; -import io.airbyte.cdk.integrations.base.ssh.SshTunnel; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.integrations.source.mysql.MySqlSource; import io.airbyte.integrations.source.mysql.internal.models.CursorBasedStatus; import io.airbyte.integrations.source.mysql.internal.models.InternalModels.StateType; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.AirbyteMessage.Type; import io.airbyte.protocol.models.v0.AirbyteRecordMessage; @@ -59,128 +49,52 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import org.testcontainers.containers.Network; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) -class MySqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - - @SystemStub - private EnvironmentVariables environmentVariables; - - protected static final String TEST_USER = "test"; - protected static final String TEST_PASSWORD = "test"; - protected static MySQLContainer container; - private static final SshBastionContainer bastion = new SshBastionContainer(); - private static final Network network = Network.newNetwork(); - - protected Database database; - protected DSLContext dslContext; - - @BeforeAll - static void init() throws SQLException { - container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD); - container.start(); - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", container.getPassword()); - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - } - @BeforeEach - public void setup() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, Strings.addRandomSuffix("db", "_", 10)) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s?%s", - container.getHost(), - container.getFirstMappedPort(), - String.join("&", SSL_PARAMETERS)), - SQLDialect.MYSQL); - database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - return null; - }); +class MySqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - super.setup(); - } - - @AfterEach - void tearDownMySql() throws Exception { - dslContext.close(); - super.tearDown(); - } - - @AfterAll - static void cleanUp() { - container.close(); - } - - // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public boolean supportsSchemas() { - return false; + protected JsonNode config() { + return testdb.testConfigBuilder().build(); } @Override - public MySqlSource getJdbcSource() { - return new MySqlSource(); + protected MySqlStrictEncryptSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return new MySqlStrictEncryptSource(source); } @Override - public Source getSource() { - return new MySqlStrictEncryptSource(); + protected MySQLTestDatabase createTestDatabase() { + final var container = new MySQLContainerFactory().shared("mysql:8.0"); + return new MySQLTestDatabase(container) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized(); } @Override - public String getDriverClass() { - return MySqlSource.DRIVER_CLASS; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1000"); } + // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public JsonNode getConfig() { - return Jsons.clone(config); + public boolean supportsSchemas() { + return false; } @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = SshHelpers.injectSshIntoSpec(Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class)); assertEquals(expected, actual); @@ -216,176 +130,42 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME))))); } - @Test - void testStrictSSLUnsecuredNoTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "preferred") - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "NO_TUNNEL") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertTrue(actual.getMessage().contains("Unsecured connection not allowed")); - } - - @Test - void testStrictSSLSecuredNoTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "NO_TUNNEL") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertFalse(actual.getMessage().contains("Unsecured connection not allowed")); - } - - @Test - void testStrictSSLSecuredWithTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "SSH_KEY_AUTH") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration.")); - } - - @Test - void testStrictSSLUnsecuredWithTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "preferred") - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "SSH_KEY_AUTH") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration.")); - } - - @Test - void testCheckWithSSlModeDisabled() throws Exception { - try (final MySQLContainer db = new MySQLContainer<>("mysql:8.0").withNetwork(network)) { - bastion.initAndStartBastion(network); - db.start(); - final JsonNode configWithSSLModeDisabled = bastion.getTunnelConfig(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, Objects.requireNonNull(db.getContainerInfo() - .getNetworkSettings() - .getNetworks() - .entrySet().stream() - .findFirst() - .get().getValue().getIpAddress())) - .put(JdbcUtils.PORT_KEY, db.getExposedPorts().get(0)) - .put(JdbcUtils.DATABASE_KEY, db.getDatabaseName()) - .put(JdbcUtils.SCHEMAS_KEY, List.of("public")) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .put(JdbcUtils.SSL_MODE_KEY, Map.of(JdbcUtils.MODE_KEY, "disable")), false); - - final AirbyteConnectionStatus actual = source.check(configWithSSLModeDisabled); - assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus()); - } finally { - bastion.stopAndClose(); - } - } - @Test void testReadMultipleTablesIncrementally() throws Exception { + final var config = config(); ((ObjectNode) config).put("sync_checkpoint_records", 1); final String namespace = getDefaultNamespace(); final String streamOneName = TABLE_NAME + "one"; // Create a fresh first table - database.query(connection -> { - connection.fetch(String.format("USE %s;", getDefaultNamespace())); - connection.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at VARCHAR(200) NOT NULL\n" - + ");", streamOneName)); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - return null; - }); + testdb.with(""" + CREATE TABLE %s ( + id int PRIMARY KEY, + name VARCHAR(200) NOT NULL, + updated_at VARCHAR(200) NOT NULL + );""", streamOneName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", + getFullyQualifiedTableName(streamOneName)); // Create a fresh second table final String streamTwoName = TABLE_NAME + "two"; final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); // Insert records into second table - database.query(ctx -> { - ctx.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at DATE NOT NULL\n" - + ");", streamTwoName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (40,'Jean Luc','2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (41, 'Groot', '2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (42, 'Thanos','2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with(""" + CREATE TABLE %s ( + id int PRIMARY KEY, + name VARCHAR(200) NOT NULL, + updated_at DATE NOT NULL + );""", streamTwoName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (40,'Jean Luc','2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')", + streamTwoFullyQualifiedName); // Create records list that we expect to see in the state message final List streamTwoExpectedRecords = Arrays.asList( createRecord(streamTwoName, namespace, ImmutableMap.of( @@ -416,7 +196,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Perform initial sync final List messagesFromFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, null)); + .toList(source().read(config, configuredCatalog, null)); final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); @@ -483,7 +263,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // - stream two state being the Primary Key state before the final emitted state before the cursor // switch final List messagesFromSecondSyncWithMixedStates = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); @@ -510,21 +290,13 @@ void testReadMultipleTablesIncrementally() throws Exception { // Add some data to each table and perform a third read. // Expect to see all records be synced via cursorBased method and not primaryKey - - database.query(ctx -> { - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (4,'Hooper','2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (43, 'Iron Man', '2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with("INSERT INTO %s(id, name, updated_at) VALUES (4,'Hooper','2006-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')", + streamTwoFullyQualifiedName); final List messagesFromThirdSync = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); @@ -615,13 +387,13 @@ protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configure protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_5, COL_NAME, "data", @@ -629,7 +401,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String final DbStreamState state = new CursorBasedStatus() .withStateType(StateType.CURSOR_BASED) .withVersion(2L) - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5") @@ -641,7 +413,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String @Override protected List getTestMessages() { - return getTestMessages(streamName); + return getTestMessages(streamName()); } protected List getTestMessages(final String streamName) { diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java new file mode 100644 index 000000000000..66f6713dabec --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql_strict_encrypt; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@Execution(ExecutionMode.CONCURRENT) +public class MySqlStrictEncryptSslTest { + + private MySQLTestDatabase createTestDatabase(String... containerFactoryMethods) { + final var container = new MySQLContainerFactory().shared("mysql:8.0", containerFactoryMethods); + return new MySQLTestDatabase(container) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized(); + } + + @Test + void testStrictSSLUnsecuredNoTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build()) + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "preferred") + .build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Unsecured connection not allowed"), actual.getMessage()); + } + } + + @Test + void testStrictSSLSecuredNoTunnel() throws Exception { + final String PASSWORD = "Passw0rd"; + try (final var testdb = createTestDatabase("withRootAndServerCertificates", "withClientCertificate")) { + final var config = testdb.testConfigBuilder() + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build()) + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Failed to create keystore for Client certificate"), actual.getMessage()); + } + } + + @Test + void testStrictSSLSecuredWithTunnel() throws Exception { + final String PASSWORD = "Passw0rd"; + try (final var testdb = createTestDatabase("withRootAndServerCertificates", "withClientCertificate")) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage()); + } + } + + @Test + void testStrictSSLUnsecuredWithTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "preferred") + .build()) + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage()); + } + } + + @Test + void testCheckWithSslModeDisabled() throws Exception { + try (final var testdb = createTestDatabase("withNetwork")) { + try (final SshBastionContainer bastion = new SshBastionContainer()) { + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); + final var config = testdb.integrationTestConfigBuilder() + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false)) + .withoutSsl() + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus()); + } + } + } + +} diff --git a/airbyte-integrations/connectors/source-mysql/build.gradle b/airbyte-integrations/connectors/source-mysql/build.gradle index 95c505b52167..f90f12b2c5d6 100644 --- a/airbyte-integrations/connectors/source-mysql/build.gradle +++ b/airbyte-integrations/connectors/source-mysql/build.gradle @@ -7,7 +7,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } @@ -18,7 +18,7 @@ configurations.all { } } -airbyteJavaConnector.addCdkDependencies() + application { mainClass = 'io.airbyte.integrations.source.mysql.MySqlSource' @@ -35,7 +35,7 @@ dependencies { testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation libs.junit.jupiter.system.stubs testImplementation libs.testcontainers.mysql - + testFixturesImplementation libs.testcontainers.mysql performanceTestJavaImplementation project(':airbyte-integrations:connectors:source-mysql') } diff --git a/airbyte-integrations/connectors/source-mysql/gradle.properties b/airbyte-integrations/connectors/source-mysql/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mysql/metadata.yaml b/airbyte-integrations/connectors/source-mysql/metadata.yaml index 58a82a25439c..8443347774a0 100644 --- a/airbyte-integrations/connectors/source-mysql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mysql/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad - dockerImageTag: 3.1.7 + dockerImageTag: 3.1.8 dockerRepository: airbyte/source-mysql documentationUrl: https://docs.airbyte.com/integrations/sources/mysql githubIssueLabel: source-mysql diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java index a0be6d6305e2..15bc34eefdcf 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java @@ -26,7 +26,10 @@ public class MySqlCdcProperties { private static final Logger LOGGER = LoggerFactory.getLogger(MySqlCdcProperties.class); - private static final Duration HEARTBEAT_FREQUENCY = Duration.ofSeconds(10); + private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L); + + // Test execution latency is lower when heartbeats are more frequent. + private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L); public static Properties getDebeziumProperties(final JdbcDatabase database) { final JsonNode sourceConfig = database.getSourceConfig(); @@ -61,7 +64,12 @@ private static Properties commonProperties(final JdbcDatabase database) { props.setProperty("converters", "boolean, datetime"); props.setProperty("boolean.type", CustomMySQLTinyIntOneToBooleanConverter.class.getName()); props.setProperty("datetime.type", MySQLDateTimeConverter.class.getName()); - props.setProperty("heartbeat.interval.ms", Long.toString(HEARTBEAT_FREQUENCY.toMillis())); + + final Duration heartbeatInterval = + (database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean()) + ? HEARTBEAT_INTERVAL_IN_TESTS + : HEARTBEAT_INTERVAL; + props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis())); // For CDC mode, the user cannot provide timezone arguments as JDBC parameters - they are // specifically defined in the replication_method diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index 6ff8a47884c2..b942d468cdc6 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -34,7 +34,7 @@ import io.airbyte.cdk.integrations.base.IntegrationRunner; import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource; -import io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.JdbcDataSourceUtils; import io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils; @@ -46,8 +46,6 @@ import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory; import io.airbyte.cdk.integrations.util.HostPortResolver; import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.map.MoreMaps; @@ -116,15 +114,12 @@ public class MySqlSource extends AbstractJdbcSource implements Source "useSSL=true", "requireSSL=true"); - private final FeatureFlags featureFlags; - - public static Source sshWrappedSource() { - return new SshWrappedSource(new MySqlSource(), JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + public static Source sshWrappedSource(MySqlSource source) { + return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); } public MySqlSource() { super(DRIVER_CLASS, MySqlStreamingQueryConfig::new, new MySqlSourceOperations()); - this.featureFlags = new EnvVariableFeatureFlags(); } private static AirbyteStream overrideSyncModes(final AirbyteStream stream) { @@ -182,7 +177,7 @@ public List> getCheckOperations(final J checkOperations.addAll(CdcConfigurationHelper.getCheckOperations()); checkOperations.add(database -> { - FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(config); + RecordWaitTimeUtil.checkFirstRecordWaitTime(config); CdcConfigurationHelper.checkServerTimeZoneConfig(config); }); } @@ -530,7 +525,7 @@ public static Map parseJdbcParameters(final String jdbcPropertie } public static void main(final String[] args) throws Exception { - final Source source = MySqlSource.sshWrappedSource(); + final Source source = MySqlSource.sshWrappedSource(new MySqlSource()); LOGGER.info("starting source: {}", MySqlSource.class); new IntegrationRunner(source).run(args); LOGGER.info("completed source: {}", MySqlSource.class); diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java index fd61596d5b28..8ca08abb0ffd 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java @@ -17,7 +17,7 @@ import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; -import io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlCdcPosition; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlCdcTargetPosition; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlDebeziumStateUtil; @@ -84,7 +84,8 @@ public static List> getCdcReadIterators(fi final Instant emittedAt, final String quoteString) { final JsonNode sourceConfig = database.getSourceConfig(); - final Duration firstRecordWaitTime = FirstRecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + final Duration subsequentRecordWaitTime = RecordWaitTimeUtil.getSubsequentRecordWaitTime(sourceConfig); LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds()); // Determine the streams that need to be loaded via primary key sync. final List> initialLoadIterator = new ArrayList<>(); @@ -149,8 +150,13 @@ public static List> getCdcReadIterators(fi } // Build the incremental CDC iterators. - final AirbyteDebeziumHandler handler = - new AirbyteDebeziumHandler<>(sourceConfig, MySqlCdcTargetPosition.targetPosition(database), true, firstRecordWaitTime, OptionalInt.empty()); + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>( + sourceConfig, + MySqlCdcTargetPosition.targetPosition(database), + true, + firstRecordWaitTime, + subsequentRecordWaitTime, + OptionalInt.empty()); final Supplier> incrementalIteratorSupplier = () -> handler.getIncrementalIterators(catalog, new MySqlCdcSavedInfoFetcher(stateToBeUsed), diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java index 1ea69c06ccec..14004596d669 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java @@ -4,11 +4,11 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import com.fasterxml.jackson.databind.JsonNode; import com.mysql.cj.MysqlType; -import io.airbyte.cdk.db.Database; import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import java.io.File; import java.io.IOException; @@ -21,31 +21,26 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.testcontainers.containers.MySQLContainer; public abstract class AbstractMySqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { protected static final Logger LOGGER = LoggerFactory.getLogger(AbstractMySqlSourceDatatypeTest.class); - protected MySQLContainer container; - protected JsonNode config; + protected MySQLTestDatabase testdb; @Override - protected JsonNode getConfig() { - return config; + protected String getNameSpace() { + return testdb.getDatabaseName(); } @Override - protected String getImageName() { - return "airbyte/source-mysql:dev"; + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); } @Override - protected abstract Database setupDatabase() throws Exception; - - @Override - protected String getNameSpace() { - return container.getDatabaseName(); + protected String getImageName() { + return "airbyte/source-mysql:dev"; } @Override diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java deleted file mode 100644 index bef3c97c250f..000000000000 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.io.airbyte.integration_tests.sources; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.MySqlUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import java.io.IOException; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; - -public abstract class AbstractMySqlSslCertificateSourceAcceptanceTest extends MySqlSourceAcceptanceTest { - - protected static MySqlUtils.Certificate certs; - protected static final String PASSWORD = "Passw0rd"; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - addTestData(container); - certs = getCertificates(); - - var sslMode = getSslConfig(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - } - - public abstract MySqlUtils.Certificate getCertificates() throws IOException, InterruptedException; - - public abstract ImmutableMap getSslConfig(); - - private void addTestData(MySQLContainer container) throws Exception { - try (final DSLContext dslContext = DSLContextFactory.create( - container.getUsername(), - container.getPassword(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s", - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } - } - -} diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java index c5db1157f5c2..a5d57eeb336c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java @@ -10,6 +10,8 @@ import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.protocol.models.Field; @@ -28,19 +30,22 @@ public abstract class AbstractSshMySqlSourceAcceptanceTest extends SourceAccepta private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "starships"; - protected static JsonNode config; + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + private JsonNode config; public abstract Path getConfigFilePath(); @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { + protected void setupEnvironment(final TestDestinationEnv environment) { config = Jsons.deserialize(IOs.readFile(getConfigFilePath())); } @Override - protected void tearDown(final TestDestinationEnv testEnv) { - - } + protected void tearDown(final TestDestinationEnv testEnv) {} @Override protected String getImageName() { diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java index 9d6ad4b2163d..357ccc336ace 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java @@ -4,98 +4,25 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; public class CDCMySqlDatatypeAccuracyTest extends MySqlDatatypeAccuracyTest { - private DSLContext dslContext; - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - super.tearDown(testEnv); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withCdcReplication() + .with("snapshot_mode", "initial_only") + .build(); } @Override - protected Database setupDatabase() throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("snapshot_mode", "initial_only") - .put("is_test", true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL); - final Database database = new Database(dslContext); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - revokeAllPermissions(); - grantCorrectPermissions(); - - return database; - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); - } - - private void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); - } - - private void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected Database setupDatabase() { + testdb = MySQLTestDatabase.in("mysql:8.0").withoutStrictMode().withCdcPermissions(); + return testdb.getDatabase(); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java index 1dc469ed3b60..54f2ea9c1ca1 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java @@ -4,47 +4,42 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.AirbyteStateMessage; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import java.util.List; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) -public class CdcBinlogsMySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; +public class CdcBinlogsMySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - private DSLContext dslContext; private JsonNode stateAfterFirstSync; @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.close(); + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withCdcReplication() + .build(); + } + + @Override + protected Database setupDatabase() { + testdb = MySQLTestDatabase.in("mysql:8.0").withoutStrictMode().withCdcPermissions(); + return testdb.getDatabase(); } @Override @@ -57,11 +52,10 @@ protected List runRead(final ConfiguredAirbyteCatalog configured @Override protected void postSetup() throws Exception { - final Database database = setupDatabase(); - initTests(); + final var database = testdb.getDatabase(); for (final TestDataHolder test : testDataHolders) { database.query(ctx -> { - ctx.fetch(test.getCreateSqlQuery()); + ctx.execute("TRUNCATE TABLE " + test.getNameWithTestPrefix() + ";"); return null; }); } @@ -84,75 +78,6 @@ protected void postSetup() throws Exception { } } - @Override - protected Database setupDatabase() throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL); - final Database database = new Database(dslContext); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - revokeAllPermissions(); - grantCorrectPermissions(); - - return database; - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); - } - - private void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); - } - - private void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - @Override public boolean testCatalog() { return true; diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java index dad29d00d111..230f34ca13fe 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java @@ -4,108 +4,32 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; -@ExtendWith(SystemStubsExtension.class) public class CdcInitialSnapshotMySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - private DSLContext dslContext; - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.close(); + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); } @Override - protected Database setupDatabase() throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("snapshot_mode", "initial_only") - .put("is_test", true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL); - final Database database = new Database(dslContext); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - revokeAllPermissions(); - grantCorrectPermissions(); - - return database; - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withCdcReplication() + .with("snapshot_mode", "initial_only") + .build(); } - private void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); - } - - private void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + @Override + protected Database setupDatabase() { + testdb = MySQLTestDatabase.in("mysql:8.0").withoutStrictMode().withCdcPermissions(); + return testdb.getDatabase(); } @Override diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java index 5bd26ef502ec..1db8613696c5 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java @@ -4,26 +4,21 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; @@ -37,25 +32,20 @@ import io.airbyte.protocol.models.v0.SyncMode; import java.util.List; import java.util.stream.Collectors; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class CdcMySqlSourceAcceptanceTest extends SourceAcceptanceTest { - @SystemStub - protected EnvironmentVariables environmentVariables; - protected static final String STREAM_NAME = "id_and_name"; protected static final String STREAM_NAME2 = "starships"; - protected MySQLContainer container; - protected JsonNode config; + + protected MySQLTestDatabase testdb; + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } @Override protected String getImageName() { @@ -69,7 +59,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); } @Override @@ -80,7 +73,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSourceDefinedCursor(true) @@ -92,7 +85,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME2), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSourceDefinedCursor(true) @@ -107,70 +100,22 @@ protected JsonNode getState() { } @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); - - revokeAllPermissions(); - grantCorrectPermissions(); - createAndPopulateTables(); - } - - protected void createAndPopulateTables() { - executeQuery("CREATE TABLE id_and_name(id INTEGER PRIMARY KEY, name VARCHAR(200));"); - executeQuery( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - executeQuery("CREATE TABLE starships(id INTEGER PRIMARY KEY, name VARCHAR(200));"); - executeQuery( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - } - - protected void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); - } - - protected void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); + protected void setupEnvironment(final TestDestinationEnv environment) { + testdb = MySQLTestDatabase.in("mysql:8.0", extraContainerFactoryMethods().toArray(String[]::new)) + .withCdcPermissions() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); } - protected void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected Stream extraContainerFactoryMethods() { + return Stream.empty(); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + testdb.close(); } @Test @@ -195,7 +140,7 @@ public void testIncrementalSyncShouldNotFailIfBinlogIsDeleted() throws Exception final JsonNode latestState = Jsons.jsonNode(supportsPerStream() ? stateMessages : List.of(Iterables.getLast(stateMessages))); // RESET MASTER removes all binary log files that are listed in the index file, // leaving only a single, empty binary log file with a numeric suffix of .000001 - executeQuery("RESET MASTER;"); + testdb.with("RESET MASTER;"); assertEquals(6, filterRecords(runRead(configuredCatalog, latestState)).size()); } @@ -219,7 +164,7 @@ private ConfiguredAirbyteCatalog getConfiguredCatalogWithPartialColumns() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), Field.of("id", JsonSchemaType.NUMBER) /* no name field */) .withSourceDefinedCursor(true) @@ -231,7 +176,7 @@ private ConfiguredAirbyteCatalog getConfiguredCatalogWithPartialColumns() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME2), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), /* no name field */ Field.of("id", JsonSchemaType.NUMBER)) .withSourceDefinedCursor(true) @@ -241,13 +186,8 @@ private ConfiguredAirbyteCatalog getConfiguredCatalogWithPartialColumns() { } private void verifyFieldNotExist(final List records, final String stream, final String field) { - assertTrue(records.stream() - .filter(r -> { - return r.getStream().equals(stream) - && r.getData().get(field) != null; - }) - .collect(Collectors.toList()) - .isEmpty(), "Records contain unselected columns [%s:%s]".formatted(stream, field)); + assertTrue(records.stream().noneMatch(r -> r.getStream().equals(stream) && r.getData().get(field) != null), + "Records contain unselected columns [%s:%s]".formatted(stream, field)); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java index 58f1d1f3939f..16cc3ec29ba4 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java @@ -4,56 +4,27 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.MySqlUtils; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.testcontainers.containers.MySQLContainer; +import java.util.stream.Stream; public class CdcMySqlSslCaCertificateSourceAcceptanceTest extends CdcMySqlSourceAcceptanceTest { - private static MySqlUtils.Certificate certs; - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - certs = MySqlUtils.getCertificate(container, true); - - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", "Passw0rd") + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .build()) .build(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); + } - revokeAllPermissions(); - grantCorrectPermissions(); - createAndPopulateTables(); + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates"); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java index 5d8a02aef729..4f3691e8f9da 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java @@ -4,53 +4,31 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.testcontainers.containers.MySQLContainer; +import java.util.stream.Stream; public class CdcMySqlSslRequiredSourceAcceptanceTest extends CdcMySqlSourceAcceptanceTest { @Override - protected void setupEnvironment(final TestDestinationEnv environment) { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "required") + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withSsl(ImmutableMap.builder().put(JdbcUtils.MODE_KEY, "required").build()) .build(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); + } - revokeAllPermissions(); - grantCorrectPermissions(); - alterUserRequireSsl(); - createAndPopulateTables(); + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + super.setupEnvironment(environment); + testdb.with("ALTER USER %s REQUIRE SSL;", testdb.getUserName()); } - private void alterUserRequireSsl() { - executeQuery("ALTER USER " + container.getUsername() + " REQUIRE SSL;"); + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates", "withClientCertificate"); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java index 043290f95536..07597d1ab27c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java @@ -5,38 +5,42 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.mysql.cj.MysqlType; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class MySqlDatatypeAccuracyTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withStandardReplication() + .build(); + } @Override - protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + protected Database setupDatabase() { + final var sharedContainer = new MySQLContainerFactory().shared("mysql:8.0"); + testdb = new MySQLTestDatabase(sharedContainer) + .withConnectionProperty("zeroDateTimeBehavior", "convertToNull") + .initialized() + .withoutStrictMode(); + return testdb.getDatabase(); } private final Map> charsetsCollationsMap = Map.of( @@ -46,42 +50,6 @@ protected void tearDown(final TestDestinationEnv testEnv) { "binary", Arrays.asList("binary"), "CP1250", Arrays.asList("CP1250_general_ci", "cp1250_czech_cs")); - @Override - protected Database setupDatabase() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .build()); - - final Database database = new Database( - DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL, - Map.of("zeroDateTimeBehavior", "convertToNull"))); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - return database; - } - @Override public boolean testCatalog() { return true; diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java index 11e0c0676e88..3ec7d4ab6740 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java @@ -5,18 +5,14 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -26,66 +22,36 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.util.HashMap; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) +import java.util.stream.Stream; + public class MySqlSourceAcceptanceTest extends SourceAcceptanceTest { - @SystemStub - public EnvironmentVariables environmentVariables; + protected MySQLTestDatabase testdb; + private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "public.starships"; - protected MySQLContainer container; - protected JsonNode config; - @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .build()); - - try (final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } + testdb = MySQLTestDatabase.in("mysql:8.0", extraContainerFactoryMethods().toArray(String[]::new)) + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); + } + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + protected Stream extraContainerFactoryMethods() { + return Stream.empty(); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + testdb.close(); } @Override @@ -100,7 +66,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withoutSsl() + .build(); } @Override @@ -111,7 +80,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), @@ -120,7 +89,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME2), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME2), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java index 327b7e98af88..5b4f86eae403 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java @@ -5,68 +5,35 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import java.util.Map; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; -@ExtendWith(SystemStubsExtension.class) public class MySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); } @Override - protected Database setupDatabase() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .build()); - - final Database database = new Database( - DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL, - Map.of("zeroDateTimeBehavior", "convertToNull"))); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withStandardReplication() + .build(); + } - return database; + @Override + protected Database setupDatabase() { + final var sharedContainer = new MySQLContainerFactory().shared("mysql:8.0"); + testdb = new MySQLTestDatabase(sharedContainer) + .withConnectionProperty("zeroDateTimeBehavior", "convertToNull") + .initialized() + .withoutStrictMode(); + return testdb.getDatabase(); } @Override diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java index a22e7cfc6f9c..af217c88c7ea 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java @@ -4,24 +4,29 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.MySqlUtils; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import java.io.IOException; +import java.util.stream.Stream; -public class MySqlSslCaCertificateSourceAcceptanceTest extends AbstractMySqlSslCertificateSourceAcceptanceTest { +public class MySqlSslCaCertificateSourceAcceptanceTest extends MySqlSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; @Override - public MySqlUtils.Certificate getCertificates() throws IOException, InterruptedException { - return MySqlUtils.getCertificate(container, false); + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates"); } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCaCertificate()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java index af656c30c575..efccbe3702aa 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java @@ -4,26 +4,31 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.MySqlUtils; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import java.io.IOException; +import java.util.stream.Stream; -public class MySqlSslFullCertificateSourceAcceptanceTest extends AbstractMySqlSslCertificateSourceAcceptanceTest { +public class MySqlSslFullCertificateSourceAcceptanceTest extends MySqlSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; @Override - public MySqlUtils.Certificate getCertificates() throws IOException, InterruptedException { - return MySqlUtils.getCertificate(container, true); + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates", "withClientCertificate"); } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java index 21dd1aa40fd1..5f46e43808e4 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java @@ -6,65 +6,16 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; public class MySqlSslSourceAcceptanceTest extends MySqlSourceAcceptanceTest { @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - - var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "required") + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder().put(JdbcUtils.MODE_KEY, "required").build()) .build(); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - - try (final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s", - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java index 63e2a9b56ed5..7d5f060f34c2 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java @@ -4,26 +4,10 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.commons.features.EnvVariableFeatureFlags; import java.nio.file.Path; -import org.junit.jupiter.api.extension.ExtendWith; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class SshKeyMySqlSourceAcceptanceTest extends AbstractSshMySqlSourceAcceptanceTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - super.setupEnvironment(environment); - } - @Override public Path getConfigFilePath() { return Path.of("secrets/ssh-key-repl-config.json"); diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java index e49ea61e457e..1211c8269894 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java @@ -10,32 +10,14 @@ import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.integrations.source.mysql.MySqlSource; import java.nio.file.Path; -import java.util.List; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import org.testcontainers.containers.Network; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class SshPasswordMySqlSourceAcceptanceTest extends AbstractSshMySqlSourceAcceptanceTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - super.setupEnvironment(environment); - } - @Override public Path getConfigFilePath() { return Path.of("secrets/ssh-pwd-repl-config.json"); @@ -43,30 +25,23 @@ public Path getConfigFilePath() { @Test public void sshTimeoutExceptionMarkAsConfigErrorTest() throws Exception { - final SshBastionContainer bastion = new SshBastionContainer(); - final Network network = Network.newNetwork(); - // set up env - final MySQLContainer db = startTestContainers(bastion, network); - config = bastion.getTunnelConfig(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, bastion.getBasicDbConfigBuider(db, List.of("public")), true); - bastion.stopAndClose(); - final Source sshWrappedSource = MySqlSource.sshWrappedSource(); - final Exception exception = assertThrows(ConfigErrorException.class, () -> sshWrappedSource.discover(config)); - - final String expectedMessage = "Timed out while opening a SSH Tunnel. Please double check the given SSH configurations and try again."; - final String actualMessage = exception.getMessage(); - - assertTrue(actualMessage.contains(expectedMessage)); - } - - private MySQLContainer startTestContainers(final SshBastionContainer bastion, final Network network) { - bastion.initAndStartBastion(network); - return initAndStartJdbcContainer(network); - } - - private MySQLContainer initAndStartJdbcContainer(final Network network) { - final MySQLContainer db = new MySQLContainer<>("mysql:8.0").withNetwork(network); - db.start(); - return db; + try (final var testdb = MySQLTestDatabase.in("mysql:8.0", "withNetwork")) { + final SshBastionContainer bastion = new SshBastionContainer(); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); + final var config = testdb.integrationTestConfigBuilder() + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, true)) + .build(); + bastion.stopAndClose(); + + final Source sshWrappedSource = MySqlSource.sshWrappedSource(new MySqlSource()); + final Exception exception = assertThrows(ConfigErrorException.class, () -> sshWrappedSource.discover(config)); + + final String expectedMessage = + "Timed out while opening a SSH Tunnel. Please double check the given SSH configurations and try again."; + final String actualMessage = exception.getMessage(); + assertTrue(actualMessage.contains(expectedMessage)); + } } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java deleted file mode 100644 index 669e7e9144d0..000000000000 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java +++ /dev/null @@ -1,11 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.io.airbyte.integration_tests.sources.utils; - -public class TestConstants { - - public static final int INITIAL_CDC_WAITING_SECONDS = 10; - -} diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java index 293ec4d8f1ca..6bd939fd9e3c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java @@ -13,7 +13,6 @@ import static io.airbyte.integrations.source.mysql.MySqlSource.CDC_DEFAULT_CURSOR; import static io.airbyte.integrations.source.mysql.MySqlSource.CDC_LOG_FILE; import static io.airbyte.integrations.source.mysql.MySqlSource.CDC_LOG_POS; -import static io.airbyte.integrations.source.mysql.MySqlSource.DRIVER_CLASS; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.PRIMARY_KEY_STATE_TYPE; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.STATE_TYPE_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -30,16 +29,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Streams; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.debezium.CdcSourceTest; import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlCdcTargetPosition; import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; @@ -59,127 +55,68 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Properties; import java.util.Random; import java.util.Set; import java.util.stream.Collectors; -import javax.sql.DataSource; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Tag; -import org.junit.jupiter.api.Tags; +import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.Timeout; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) -public class CdcMysqlSourceTest extends CdcSourceTest { +@Order(1) +public class CdcMysqlSourceTest extends CdcSourceTest { - private static final String START_DB_CONTAINER_WITH_INVALID_TIMEZONE = "START-DB-CONTAINER-WITH-INVALID-TIMEZONE"; private static final String INVALID_TIMEZONE_CEST = "CEST"; - @SystemStub - private EnvironmentVariables environmentVariables; - - private static final String DB_NAME = MODELS_SCHEMA; - private MySQLContainer container; - private Database database; - private MySqlSource source; - private JsonNode config; private static final Random RANDOM = new Random(); - @BeforeEach - public void setup(final TestInfo testInfo) throws SQLException { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - init(testInfo); - revokeAllPermissions(); - grantCorrectPermissions(); - super.setup(); + @Override + protected MySQLTestDatabase createTestDatabase() { + return MySQLTestDatabase.in("mysql:8.0", "withInvalidTimezoneCEST").withCdcPermissions(); } - private void init(final TestInfo testInfo) { - container = new MySQLContainer<>("mysql:8.0"); - if (testInfo.getTags().contains(START_DB_CONTAINER_WITH_INVALID_TIMEZONE)) { - container.withEnv(Map.of("TZ", INVALID_TIMEZONE_CEST)); - } - container.start(); - source = new MySqlSource(); - database = new Database(DSLContextFactory.create( - "root", - "test", - DRIVER_CLASS, - String.format("jdbc:mysql://%s:%s", - container.getHost(), - container.getFirstMappedPort()), - SQLDialect.MYSQL)); - - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .put("server_time_zone", "America/Los_Angeles") - .build()); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put("host", container.getHost()) - .put("port", container.getFirstMappedPort()) - .put("database", DB_NAME) - .put("username", container.getUsername()) - .put("password", container.getPassword()) - .put("replication_method", replicationMethod) - .put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) - .put("is_test", true) - .build()); + @Override + protected MySqlSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .withCdcReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); } - private void revokeReplicationClientPermission() { - executeQuery("REVOKE REPLICATION CLIENT ON *.* FROM " + container.getUsername() + "@'%';"); + protected void purgeAllBinaryLogs() { + testdb.with("RESET MASTER;"); } - private void grantCorrectPermissions() { - executeQuery("GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " + container.getUsername() + "@'%';"); + @Override + protected String createSchemaSqlFmt() { + return "CREATE DATABASE IF NOT EXISTS %s;"; } - protected void purgeAllBinaryLogs() { - executeQuery("RESET MASTER;"); + @Override + protected String modelsSchema() { + return testdb.getDatabaseName(); } - @AfterEach - public void tearDown() { - try { - container.close(); - } catch (final Exception e) { - throw new RuntimeException(e); - } + @Override + protected String randomSchema() { + return testdb.getDatabaseName(); } @Override protected MySqlCdcTargetPosition cdcLatestTargetPosition() { - final DataSource dataSource = DataSourceFactory.create( - "root", - "test", - DRIVER_CLASS, - String.format("jdbc:mysql://%s:%s", - container.getHost(), - container.getFirstMappedPort()), - Collections.emptyMap()); - return MySqlCdcTargetPosition.targetPosition(new DefaultJdbcDatabase(dataSource)); + return MySqlCdcTargetPosition.targetPosition(new DefaultJdbcDatabase(testdb.getDataSource())); } @Override @@ -240,30 +177,10 @@ protected void addCdcDefaultCursorField(final AirbyteStream stream) { } } - @Override - protected Source getSource() { - return source; - } - - @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected Database getDatabase() { - return database; - } - - @Override - protected String randomTableSchema() { - return MODELS_SCHEMA; - } - @Test protected void syncWithReplicationClientPrivilegeRevokedFailsCheck() throws Exception { - revokeReplicationClientPermission(); - final AirbyteConnectionStatus status = getSource().check(getConfig()); + testdb.with("REVOKE REPLICATION CLIENT ON *.* FROM %s@'%%';", testdb.getUserName()); + final AirbyteConnectionStatus status = source().check(config()); final String expectedErrorMessage = "Please grant REPLICATION CLIENT privilege, so that binary log files are available" + " for CDC mode."; assertTrue(status.getStatus().equals(Status.FAILED)); @@ -283,8 +200,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -314,8 +231,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { purgeAllBinaryLogs(); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -338,10 +255,10 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { @Test protected void verifyCheckpointStatesByRecords() throws Exception { // We require a huge amount of records, otherwise Debezium will notify directly the last offset. - final int recordsToCreate = 20000; + final int recordsToCreate = 20_000; - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateMessages = extractStateMessages(dataFromFirstBatch); @@ -351,16 +268,14 @@ protected void verifyCheckpointStatesByRecords() throws Exception { assertExpectedStateMessages(stateMessages); for (int recordsCreated = 0; recordsCreated < recordsToCreate; recordsCreated++) { - final JsonNode record = - Jsons.jsonNode(ImmutableMap - .of(COL_ID, 200 + recordsCreated, COL_MAKE_ID, 1, COL_MODEL, - "F-" + recordsCreated)); + final JsonNode record = Jsons.jsonNode(ImmutableMap + .of(COL_ID, 200 + recordsCreated, COL_MAKE_ID, 1, COL_MODEL, "F-" + recordsCreated)); writeModelRecord(record); } final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateMessages.get(stateMessages.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); assertEquals(recordsToCreate, extractRecordMessages(dataFromSecondBatch).size()); @@ -449,14 +364,14 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); - if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomTableSchema()))) { + if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomSchema()))) { assertEquals(PRIMARY_KEY_STATE_TYPE, streamState.get(STATE_TYPE_KEY).asText()); - } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))) { + } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(testdb.getDatabaseName()))) { assertFalse(streamState.has(STATE_TYPE_KEY)); } else { throw new RuntimeException("Unknown stream"); @@ -474,8 +389,8 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); assertFalse(streamState.has(STATE_TYPE_KEY)); @@ -492,17 +407,16 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List read = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords = AutoCloseableIterators.toListAndClose(read); @@ -526,12 +440,12 @@ public void syncWouldWorkWithDBWithInvalidTimezone() throws Exception { @Test public void testCompositeIndexInitialLoad() throws Exception { // Simulate adding a composite index by modifying the catalog. - final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(CONFIGURED_CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); final List> primaryKeys = configuredCatalog.getStreams().get(0).getStream().getSourceDefinedPrimaryKey(); primaryKeys.add(List.of("make_id")); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); @@ -546,8 +460,8 @@ public void testCompositeIndexInitialLoad() throws Exception { // load, and // the last one indicating the cdc position we have synced until. final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(4))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, state); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final Set recordMessages2 = extractRecordMessages(actualRecords2); @@ -561,7 +475,7 @@ public void testCompositeIndexInitialLoad() throws Exception { @Test public void testTwoStreamSync() throws Exception { // Add another stream models_2 and read that one as well. - final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(CONFIGURED_CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); final List MODEL_RECORDS_2 = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), @@ -571,18 +485,18 @@ public void testTwoStreamSync() throws Exception { Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", + testdb.with(createTableSqlFmt(), testdb.getDatabaseName(), MODELS_STREAM_NAME + "_2", columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS_2) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", COL_ID, + writeRecords(recordJson, testdb.getDatabaseName(), MODELS_STREAM_NAME + "_2", COL_ID, COL_MAKE_ID, COL_MODEL); } final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() .withStream(CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + testdb.getDatabaseName(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)) @@ -595,8 +509,8 @@ public void testTwoStreamSync() throws Exception { streams.add(airbyteStream); configuredCatalog.withStreams(streams); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final Set recordMessages1 = extractRecordMessages(actualRecords1); @@ -658,13 +572,13 @@ public void testTwoStreamSync() throws Exception { recordMessages1, names, names, - MODELS_SCHEMA); + testdb.getDatabaseName()); - assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA), firstStreamInState); + assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(testdb.getDatabaseName()), firstStreamInState); // Triggering a sync with a primary_key state for 1 stream and complete state for other stream - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List stateMessages2 = extractStateMessages(actualRecords2); @@ -701,7 +615,7 @@ public void testTwoStreamSync() throws Exception { recordMessages2, names, names, - MODELS_SCHEMA); + testdb.getDatabaseName()); } /** @@ -714,8 +628,8 @@ public void testTwoStreamSync() throws Exception { @Test public void testCompressedSchemaHistory() throws Exception { createTablesToIncreaseSchemaHistorySize(); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final AirbyteStateMessage lastStateMessageFromFirstBatch = Iterables.getLast(extractStateMessages(dataFromFirstBatch)); @@ -737,8 +651,8 @@ public void testCompressedSchemaHistory() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, Jsons.jsonNode(Collections.singletonList(lastStateMessageFromFirstBatch))); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), Jsons.jsonNode(Collections.singletonList(lastStateMessageFromFirstBatch))); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); final AirbyteStateMessage lastStateMessageFromSecondBatch = Iterables.getLast(extractStateMessages(dataFromSecondBatch)); @@ -758,7 +672,7 @@ public void testCompressedSchemaHistory() throws Exception { private void createTablesToIncreaseSchemaHistorySize() { for (int i = 0; i <= 200; i++) { final String tableName = generateRandomStringOf32Characters(); - final StringBuilder createTableQuery = new StringBuilder("CREATE TABLE models_schema." + tableName + "("); + final StringBuilder createTableQuery = new StringBuilder("CREATE TABLE " + tableName + "("); String firstCol = null; for (int j = 1; j <= 250; j++) { final String columnName = generateRandomStringOf32Characters(); @@ -769,7 +683,7 @@ private void createTablesToIncreaseSchemaHistorySize() { createTableQuery.append(columnName).append(" INTEGER, "); } createTableQuery.append("PRIMARY KEY (").append(firstCol).append("));"); - executeQuery(createTableQuery.toString()); + testdb.with(createTableQuery.toString()); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java index ff2648974915..874c8293924a 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java @@ -8,6 +8,7 @@ * Copyright (c) 2023 Airbyte, Inc., all rights reserved. */ +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.STATE_TYPE_KEY; import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; @@ -19,18 +20,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.mysql.cj.MysqlType; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.source.mysql.internal.models.CursorBasedStatus; import io.airbyte.integrations.source.mysql.internal.models.InternalModels.StateType; @@ -52,187 +48,102 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.Connection; -import java.sql.DriverManager; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.concurrent.Callable; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) -class MySqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - - @SystemStub - private EnvironmentVariables environmentVariables; +@Order(2) +class MySqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; protected static final String PASSWORD_WITHOUT_PERMISSION = "new_password"; - protected static final String TEST_USER = "test"; - protected static final Callable TEST_PASSWORD = () -> "test"; - protected static MySQLContainer container; - - protected Database database; - protected DSLContext dslContext; - - @BeforeAll - static void init() throws Exception { - container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD.call()) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD.call()); - container.start(); - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD.call()); - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - } - @BeforeEach - public void setup() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, Strings.addRandomSuffix("db", "_", 10)) - .put(JdbcUtils.USERNAME_KEY, TEST_USER) - .put(JdbcUtils.PASSWORD_KEY, TEST_PASSWORD.call()) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asText()), - SQLDialect.MYSQL); - database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE DATABASE " + getDefaultNamespace()); - return null; - }); - - super.setup(); - } - - @AfterEach - void tearDownMySql() throws Exception { - dslContext.close(); - super.tearDown(); - } - - @AfterAll - static void cleanUp() { - container.close(); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder().build(); } - // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public boolean supportsSchemas() { - return false; + protected MySqlSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } @Override - public AbstractJdbcSource getJdbcSource() { - return new MySqlSource(); + protected MySQLTestDatabase createTestDatabase() { + return MySQLTestDatabase.in("mysql:8.0"); } @Override - public String getDriverClass() { - return MySqlSource.DRIVER_CLASS; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1000"); } + // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public JsonNode getConfig() { - return Jsons.clone(config); + protected boolean supportsSchemas() { + return false; } @Test void testReadMultipleTablesIncrementally() throws Exception { - ((ObjectNode) config).put("sync_checkpoint_records", 1); - final String namespace = getDefaultNamespace(); + final var config = config(); + ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1); final String streamOneName = TABLE_NAME + "one"; // Create a fresh first table - database.query(connection -> { - connection.fetch(String.format("USE %s;", getDefaultNamespace())); - connection.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at VARCHAR(200) NOT NULL\n" - + ");", streamOneName)); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - return null; - }); + testdb.with("CREATE TABLE %s (\n" + + " id int PRIMARY KEY,\n" + + " name VARCHAR(200) NOT NULL,\n" + + " updated_at VARCHAR(200) NOT NULL\n" + + ");", streamOneName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", + getFullyQualifiedTableName(streamOneName)); // Create a fresh second table final String streamTwoName = TABLE_NAME + "two"; final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); // Insert records into second table - database.query(ctx -> { - ctx.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at DATE NOT NULL\n" - + ");", streamTwoName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (40,'Jean Luc','2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (41, 'Groot', '2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (42, 'Thanos','2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with("CREATE TABLE %s (\n" + + " id int PRIMARY KEY,\n" + + " name VARCHAR(200) NOT NULL,\n" + + " updated_at DATE NOT NULL\n" + + ");", streamTwoName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (40,'Jean Luc','2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')", + streamTwoFullyQualifiedName); + // Create records list that we expect to see in the state message final List streamTwoExpectedRecords = Arrays.asList( - createRecord(streamTwoName, namespace, ImmutableMap.of( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( COL_ID, 40, COL_NAME, "Jean Luc", COL_UPDATED_AT, "2006-10-19")), - createRecord(streamTwoName, namespace, ImmutableMap.of( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( COL_ID, 41, COL_NAME, "Groot", COL_UPDATED_AT, "2006-10-19")), - createRecord(streamTwoName, namespace, ImmutableMap.of( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( COL_ID, 42, COL_NAME, "Thanos", COL_UPDATED_AT, "2006-10-19"))); // Prep and create a configured catalog to perform sync - final AirbyteStream streamOne = getAirbyteStream(streamOneName, namespace); - final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, namespace); + final AirbyteStream streamOne = getAirbyteStream(streamOneName, getDefaultNamespace()); + final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, getDefaultNamespace()); final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( new AirbyteCatalog().withStreams(List.of(streamOne, streamTwo))); @@ -245,7 +156,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Perform initial sync final List messagesFromFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, null)); + .toList(source().read(config, configuredCatalog, null)); final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); @@ -312,7 +223,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // - stream two state being the Primary Key state before the final emitted state before the cursor // switch final List messagesFromSecondSyncWithMixedStates = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); @@ -339,21 +250,13 @@ void testReadMultipleTablesIncrementally() throws Exception { // Add some data to each table and perform a third read. // Expect to see all records be synced via cursorBased method and not primaryKey - - database.query(ctx -> { - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (4,'Hooper','2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (43, 'Iron Man', '2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with("INSERT INTO %s(id, name, updated_at) VALUES (4,'Hooper','2006-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')", + streamTwoFullyQualifiedName); final List messagesFromThirdSync = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); @@ -386,7 +289,7 @@ void testReadMultipleTablesIncrementally() throws Exception { @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); assertEquals(expected, actual); @@ -402,16 +305,20 @@ void testSpec() throws Exception { */ @Test void testCheckIncorrectPasswordFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08001;")); + assertTrue(status.getMessage().contains("State code: 08001;"), status.getMessage()); } @Test public void testCheckIncorrectUsernameFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); // do not test for message since there seems to be flakiness where sometimes the test will get the // message with @@ -420,38 +327,45 @@ public void testCheckIncorrectUsernameFailure() throws Exception { @Test public void testCheckIncorrectHostFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectPortFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectDataBaseFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 42000; Error code: 1049;")); + assertTrue(status.getMessage().contains("State code: 42000; Error code: 1049;"), status.getMessage()); } @Test public void testUserHasNoPermissionToDataBase() throws Exception { - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD.call()); - connection.createStatement() - .execute("create user '" + USERNAME_WITHOUT_PERMISSION + "'@'%' IDENTIFIED BY '" + PASSWORD_WITHOUT_PERMISSION + "';\n"); - ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION); + final var config = config(); + maybeSetShorterConnectionTimeout(config); + final String usernameWithoutPermission = testdb.withNamespace(USERNAME_WITHOUT_PERMISSION); + testdb.with("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", usernameWithoutPermission, PASSWORD_WITHOUT_PERMISSION); + ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, usernameWithoutPermission); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08001;")); + assertTrue(status.getMessage().contains("State code: 08001;"), status.getMessage()); } @Override @@ -470,13 +384,13 @@ protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configure protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_5, COL_NAME, "data", @@ -484,7 +398,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String final DbStreamState state = new CursorBasedStatus() .withStateType(StateType.CURSOR_BASED) .withVersion(2L) - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5") @@ -501,7 +415,7 @@ protected boolean supportsPerStream() { @Override protected List getTestMessages() { - return getTestMessages(streamName); + return getTestMessages(streamName()); } protected List getTestMessages(final String streamName) { diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java index aac3f2efbd71..f9c8f288410f 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java @@ -4,15 +4,12 @@ package io.airbyte.integrations.source.mysql; -import static io.airbyte.integrations.source.mysql.MySqlSource.DRIVER_CLASS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.mysql.cj.MysqlType; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; import io.airbyte.cdk.db.jdbc.DateTimeConverter; import io.airbyte.commons.json.Jsons; import java.sql.Connection; @@ -27,280 +24,107 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; +import java.util.function.Function; +import java.util.function.IntFunction; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.MySQLContainer; public class MySqlSourceOperationsTest { - private final MySqlSourceOperations sqlSourceOperations = new MySqlSourceOperations(); - private MySQLContainer container; - private Database database; - - @BeforeEach - public void init() { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - database = new Database(DSLContextFactory.create( - "root", - "test", - DRIVER_CLASS, - String.format("jdbc:mysql://%s:%s", - container.getHost(), - container.getFirstMappedPort()), - SQLDialect.MYSQL)); - } - - @AfterEach - public void tearDown() { - try { - container.close(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - @Test public void dateColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_date"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " DATE);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final LocalDate cursorValue = LocalDate.of(2019, 1, i); - jsonNode.put("cursor_column", DateTimeConverter.convertToDate(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + cursorValue + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATE, DateTimeConverter.convertToDate(LocalDate.of(2019, 1, 1))); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); - - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATE, "2019-01-01T00:00:00Z"); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); + testImpl( + "DATE", + i -> LocalDate.of(2019, 1, i), + DateTimeConverter::convertToDate, + LocalDate::toString, + MysqlType.DATE, + DateTimeConverter.convertToDate(LocalDate.of(2019, 1, 1)), + "2019-01-01T00:00:00Z"); } @Test public void timeColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_time"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " TIME);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final LocalTime cursorValue = LocalTime.of(20, i, 0); - jsonNode.put("cursor_column", DateTimeConverter.convertToTime(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + cursorValue + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIME, DateTimeConverter.convertToTime(LocalTime.of(20, 1, 0))); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); - - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIME, "1970-01-01T20:01:00Z"); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } + testImpl( + "TIME", + i -> LocalTime.of(20, i, 0), + DateTimeConverter::convertToTime, + LocalTime::toString, + MysqlType.TIME, + DateTimeConverter.convertToTime(LocalTime.of(20, 1, 0)), + "1970-01-01T20:01:00Z"); } @Test public void dateTimeColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_datetime"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " DATETIME);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final LocalDateTime cursorValue = LocalDateTime.of(2019, i, 20, 3, 0, 0); - jsonNode.put("cursor_column", DateTimeConverter.convertToTimestamp(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + cursorValue + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATETIME, - DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0))); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); - - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATETIME, "2019-01-20T03:00:00.000000"); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); + testImpl( + "DATETIME", + i -> LocalDateTime.of(2019, i, 20, 3, 0, 0), + DateTimeConverter::convertToTimestamp, + LocalDateTime::toString, + MysqlType.DATETIME, + DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0)), + "2019-01-20T03:00:00.000000"); } @Test public void timestampColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_timestamp"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " timestamp);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final Instant cursorValue = Instant.ofEpochSecond(1660298508L).plusSeconds(i - 1); - jsonNode.put("cursor_column", DateTimeConverter.convertToTimestampWithTimezone(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + Timestamp.from(cursorValue) + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIMESTAMP, - DateTimeConverter.convertToTimestampWithTimezone(Instant.ofEpochSecond(1660298508L))); + testImpl( + "TIMESTAMP", + i -> Instant.ofEpochSecond(1660298508L).plusSeconds(i - 1), + DateTimeConverter::convertToTimestampWithTimezone, + r -> Timestamp.from(r).toString(), + MysqlType.TIMESTAMP, + DateTimeConverter.convertToTimestampWithTimezone(Instant.ofEpochSecond(1660298508L)), + Instant.ofEpochSecond(1660298508L).toString()); + } - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); + private void testImpl( + final String sqlType, + IntFunction recordBuilder, + Function airbyteRecordStringifier, + Function sqlRecordStringifier, + MysqlType mysqlType, + String initialCursorFieldValue, + // Test to check backward compatibility for connectors created before PR + // https://github.com/airbytehq/airbyte/pull/15504 + String backwardCompatibleInitialCursorFieldValue) + throws SQLException { + final var sqlSourceOperations = new MySqlSourceOperations(); + final String cursorColumn = "cursor_column"; + try (final var testdb = MySQLTestDatabase.in("mysql:8.0") + .with("CREATE TABLE cursor_table (id INTEGER PRIMARY KEY, %s %s);", cursorColumn, sqlType)) { + + final List expectedRecords = new ArrayList<>(); + for (int i = 1; i <= 4; i++) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + jsonNode.put("id", i); + final T cursorValue = recordBuilder.apply(i); + jsonNode.put("cursor_column", airbyteRecordStringifier.apply(cursorValue)); + testdb.with("INSERT INTO cursor_table VALUES (%d, '%s');", i, sqlRecordStringifier.apply(cursorValue)); + if (i >= 2) { + expectedRecords.add(jsonNode); } } - } - - Assertions.assertEquals(3, actualRecords.size()); - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIMESTAMP, Instant.ofEpochSecond(1660298508L).toString()); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); + try (final Connection connection = testdb.getContainer().createConnection("")) { + final PreparedStatement preparedStatement = connection.prepareStatement( + "SELECT * FROM " + testdb.getDatabaseName() + ".cursor_table WHERE " + cursorColumn + " > ?"); + for (final var initialValue : List.of(initialCursorFieldValue, backwardCompatibleInitialCursorFieldValue)) { + sqlSourceOperations.setCursorField(preparedStatement, 1, mysqlType, initialValue); + final List actualRecords = new ArrayList<>(); + try (final ResultSet resultSet = preparedStatement.executeQuery()) { + while (resultSet.next()) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { + sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); + } + actualRecords.add(jsonNode); + } } - actualRecords.add(jsonNode); + assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); } } } - Assertions.assertEquals(3, actualRecords.size()); - } - - protected void executeQuery(final String query) { - try { - database.query( - ctx -> ctx - .execute(query)); - } catch (final SQLException e) { - throw new RuntimeException(e); - } } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java index bcba45ab727c..747a66a8dd63 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java @@ -17,8 +17,9 @@ import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource.PrimaryKeyAttributesFromDb; import io.airbyte.commons.exceptions.ConfigErrorException; +import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; @@ -28,71 +29,36 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Properties; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testcontainers.containers.MySQLContainer; -import org.testcontainers.containers.output.Slf4jLogConsumer; public class MySqlSourceTests { - private static final Logger LOGGER = LoggerFactory.getLogger(MySqlSourceTests.class); - - private static final String TEST_USER = "test"; - private static final String TEST_PASSWORD = "test"; + public MySqlSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; + } @Test public void testSettingTimezones() throws Exception { - // start DB - try (final MySQLContainer container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD) - .withEnv("TZ", "Europe/Moscow") - .withLogConsumer(new Slf4jLogConsumer(LOGGER))) { - - container.start(); - - final Properties properties = new Properties(); - properties.putAll(ImmutableMap.of("user", "root", JdbcUtils.PASSWORD_KEY, TEST_PASSWORD, "serverTimezone", "Europe/Moscow")); - DriverManager.getConnection(container.getJdbcUrl(), properties); - final String dbName = Strings.addRandomSuffix("db", "_", 10); - final JsonNode config = getConfig(container, dbName, "serverTimezone=Europe/Moscow"); - - try (final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), properties)) { - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - } - final AirbyteConnectionStatus check = new MySqlSource().check(config); - assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, check.getStatus()); + try (final var testdb = MySQLTestDatabase.in("mysql:8.0", "withMoscowTimezone")) { + final var config = testdb.testConfigBuilder() + .with(JdbcUtils.JDBC_URL_PARAMS_KEY, "serverTimezone=Europe/Moscow") + .withoutSsl() + .build(); + final AirbyteConnectionStatus check = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, check.getStatus(), check.getMessage()); } } - private static JsonNode getConfig(final MySQLContainer dbContainer, final String dbName, final String jdbcParams) { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, dbContainer.getHost()) - .put(JdbcUtils.PORT_KEY, dbContainer.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, TEST_USER) - .put(JdbcUtils.PASSWORD_KEY, TEST_PASSWORD) - .put(JdbcUtils.JDBC_URL_PARAMS_KEY, jdbcParams) - .build()); - } - @Test void testJdbcUrlWithEscapedDatabaseName() { - final JsonNode jdbcConfig = new MySqlSource().toDatabaseConfig(buildConfigEscapingNeeded()); + final JsonNode jdbcConfig = source().toDatabaseConfig(buildConfigEscapingNeeded()); assertNotNull(jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText()); assertTrue(jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText().startsWith(EXPECTED_JDBC_ESCAPED_URL)); } @@ -109,95 +75,45 @@ private JsonNode buildConfigEscapingNeeded() { @Test @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") - public void testTableWithNullCursorValueShouldThrowException() throws SQLException { - try (final MySQLContainer db = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD)) { - db.start(); - final JsonNode config = getConfig(db, "test", ""); - try (Connection connection = DriverManager.getConnection(db.getJdbcUrl(), "root", config.get(JdbcUtils.PASSWORD_KEY).asText())) { - final ConfiguredAirbyteStream table = createTableWithNullValueCursor(connection); - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); - - final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(new MySqlSource().read(config, catalog, null))); - assertThat(throwable).isInstanceOf(ConfigErrorException.class) - .hasMessageContaining( - "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='test.null_cursor_table', cursorColumnName='id', cursorSqlType=INT, cause=Cursor column contains NULL value}"); - - } finally { - db.stop(); - } + public void testNullCursorValueShouldThrowException() { + try (final var testdb = MySQLTestDatabase.in("mysql:8.0") + .with("CREATE TABLE null_cursor_table(id INTEGER NULL);") + .with("INSERT INTO null_cursor_table(id) VALUES (1), (2), (NULL);") + .with("CREATE VIEW null_cursor_view(id) AS SELECT null_cursor_table.id FROM null_cursor_table;")) { + final var config = testdb.testConfigBuilder().withoutSsl().build(); + + final var tableStream = new ConfiguredAirbyteStream() + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + "null_cursor_table", + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + final var tableCatalog = new ConfiguredAirbyteCatalog().withStreams(List.of(tableStream)); + final var tableThrowable = catchThrowable(() -> MoreIterators.toSet(source().read(config, tableCatalog, null))); + assertThat(tableThrowable).isInstanceOf(ConfigErrorException.class).hasMessageContaining(NULL_CURSOR_EXCEPTION_MESSAGE_CONTAINS); + + final var viewStream = new ConfiguredAirbyteStream() + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + "null_cursor_view", + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + final var viewCatalog = new ConfiguredAirbyteCatalog().withStreams(List.of(viewStream)); + final var viewThrowable = catchThrowable(() -> MoreIterators.toSet(source().read(config, viewCatalog, null))); + assertThat(viewThrowable).isInstanceOf(ConfigErrorException.class).hasMessageContaining(NULL_CURSOR_EXCEPTION_MESSAGE_CONTAINS); } } - private ConfiguredAirbyteStream createTableWithNullValueCursor(final Connection connection) throws SQLException { - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE TABLE IF NOT EXISTS test.null_cursor_table(id INTEGER NULL)"); - connection.createStatement().execute("INSERT INTO test.null_cursor_table(id) VALUES (1), (2), (NULL)"); - - return new ConfiguredAirbyteStream().withSyncMode(SyncMode.INCREMENTAL) - .withCursorField(Lists.newArrayList("id")) - .withDestinationSyncMode(DestinationSyncMode.APPEND) - .withSyncMode(SyncMode.INCREMENTAL) - .withStream(CatalogHelpers.createAirbyteStream( - "null_cursor_table", - "test", - Field.of("id", JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of("id")))); - - } - - @Test - @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") - public void viewWithNullValueCursorShouldThrowException() throws SQLException { - try (final MySQLContainer db = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD)) { - db.start(); - final JsonNode config = getConfig(db, "test", ""); - try (Connection connection = DriverManager.getConnection(db.getJdbcUrl(), "root", config.get(JdbcUtils.PASSWORD_KEY).asText())) { - final ConfiguredAirbyteStream table = createViewWithNullValueCursor(connection); - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); - - final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(new MySqlSource().read(config, catalog, null))); - assertThat(throwable).isInstanceOf(ConfigErrorException.class) - .hasMessageContaining( - "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='test.test_view_null_cursor', cursorColumnName='id', cursorSqlType=INT, cause=Cursor column contains NULL value}"); - - } finally { - db.stop(); - } - } - } - - private ConfiguredAirbyteStream createViewWithNullValueCursor(final Connection connection) throws SQLException { - - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE TABLE IF NOT EXISTS test.test_table_null_cursor(id INTEGER NULL)"); - connection.createStatement().execute(""" - CREATE VIEW test_view_null_cursor(id) as - SELECT test_table_null_cursor.id - FROM test_table_null_cursor - """); - connection.createStatement().execute("INSERT INTO test.test_table_null_cursor(id) VALUES (1), (2), (NULL)"); - - return new ConfiguredAirbyteStream().withSyncMode(SyncMode.INCREMENTAL) - .withCursorField(Lists.newArrayList("id")) - .withDestinationSyncMode(DestinationSyncMode.APPEND) - .withSyncMode(SyncMode.INCREMENTAL) - .withStream(CatalogHelpers.createAirbyteStream( - "test_view_null_cursor", - "test", - Field.of("id", JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of("id")))); - - } + static private final String NULL_CURSOR_EXCEPTION_MESSAGE_CONTAINS = "The following tables have invalid columns " + + "selected as cursor, please select a column with a well-defined ordering with no null values as a cursor."; @Test void testParseJdbcParameters() { @@ -210,26 +126,12 @@ void testParseJdbcParameters() { @Test public void testJDBCSessionVariable() throws Exception { - // start DB - try (final MySQLContainer container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD) - .withLogConsumer(new Slf4jLogConsumer(LOGGER))) { - - container.start(); - final Properties properties = new Properties(); - properties.putAll(ImmutableMap.of("user", "root", JdbcUtils.PASSWORD_KEY, TEST_PASSWORD)); - DriverManager.getConnection(container.getJdbcUrl(), properties); - final String dbName = Strings.addRandomSuffix("db", "_", 10); - final JsonNode config = getConfig(container, dbName, "sessionVariables=MAX_EXECUTION_TIME=28800000"); - - try (final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), properties)) { - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - } - final AirbyteConnectionStatus check = new MySqlSource().check(config); + try (final var testdb = MySQLTestDatabase.in("mysql:8.0")) { + final var config = testdb.testConfigBuilder() + .with(JdbcUtils.JDBC_URL_PARAMS_KEY, "sessionVariables=MAX_EXECUTION_TIME=28800000") + .withoutSsl() + .build(); + final AirbyteConnectionStatus check = source().check(config); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, check.getStatus()); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java index 0386a18813b6..5d5ac314a928 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java @@ -4,49 +4,27 @@ package io.airbyte.integrations.source.mysql; -import static io.airbyte.integrations.source.mysql.MySqlSource.SSL_PARAMETERS; - -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; +import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +@Order(3) class MySqlSslJdbcSourceAcceptanceTest extends MySqlJdbcSourceAcceptanceTest { - @BeforeEach - public void setup() throws Exception { - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, Strings.addRandomSuffix("db", "_", 10)) - .put(JdbcUtils.USERNAME_KEY, TEST_USER) - .put(JdbcUtils.PASSWORD_KEY, TEST_PASSWORD.call()) - .put(JdbcUtils.SSL_KEY, true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s?%s", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asText(), - String.join("&", SSL_PARAMETERS)), - SQLDialect.MYSQL); - database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - ctx.fetch("SHOW STATUS LIKE 'Ssl_cipher'"); - return null; - }); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .with(JdbcUtils.SSL_KEY, true) + .build(); + } - super.setup(); + @Override + protected MySQLTestDatabase createTestDatabase() { + return new MySQLTestDatabase(new MySQLContainerFactory().shared("mysql:8.0")) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized() + .with("SHOW STATUS LIKE 'Ssl_cipher'"); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java new file mode 100644 index 000000000000..74c745cb7f7f --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql; + +import io.airbyte.cdk.testutils.ContainerFactory; +import java.io.IOException; +import java.io.UncheckedIOException; +import org.testcontainers.containers.MySQLContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +public class MySQLContainerFactory implements ContainerFactory> { + + @Override + public MySQLContainer createNewContainer(DockerImageName imageName) { + return new MySQLContainer<>(imageName.asCompatibleSubstituteFor("mysql")); + } + + @Override + public Class getContainerClass() { + return MySQLContainer.class; + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(MySQLContainer container) { + container.withNetwork(Network.newNetwork()); + } + + private static final String INVALID_TIMEZONE_CEST = "CEST"; + + public void withInvalidTimezoneCEST(MySQLContainer container) { + container.withEnv("TZ", INVALID_TIMEZONE_CEST); + } + + public void withMoscowTimezone(MySQLContainer container) { + container.withEnv("TZ", "Europe/Moscow"); + } + + public void withRootAndServerCertificates(MySQLContainer container) { + execInContainer(container, + "sed -i '31 a ssl' /etc/my.cnf", + "sed -i '32 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf", + "sed -i '33 a ssl-cert=/var/lib/mysql/server-cert.pem' /etc/my.cnf", + "sed -i '34 a ssl-key=/var/lib/mysql/server-key.pem' /etc/my.cnf", + "sed -i '35 a require_secure_transport=ON' /etc/my.cnf"); + } + + public void withClientCertificate(MySQLContainer container) { + execInContainer(container, + "sed -i '39 a [client]' /etc/mysql/my.cnf", + "sed -i '40 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf", + "sed -i '41 a ssl-cert=/var/lib/mysql/client-cert.pem' /etc/my.cnf", + "sed -i '42 a ssl-key=/var/lib/mysql/client-key.pem' /etc/my.cnf"); + } + + static private void execInContainer(MySQLContainer container, String... commands) { + container.start(); + try { + for (String command : commands) { + container.execInContainer("sh", "-c", command); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java new file mode 100644 index 000000000000..5f35def2b83f --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.testutils.TestDatabase; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.testcontainers.containers.MySQLContainer; + +public class MySQLTestDatabase extends + TestDatabase, MySQLTestDatabase, MySQLTestDatabase.MySQLConfigBuilder> { + + static public MySQLTestDatabase in(String imageName, String... methods) { + final var container = new MySQLContainerFactory().shared(imageName, methods); + return new MySQLTestDatabase(container).initialized(); + } + + public MySQLTestDatabase(MySQLContainer container) { + super(container); + } + + public MySQLTestDatabase withCdcPermissions() { + return this + .with("REVOKE ALL PRIVILEGES, GRANT OPTION FROM '%s';", getUserName()) + .with("GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO '%s';", getUserName()); + } + + public MySQLTestDatabase withoutStrictMode() { + // This disables strict mode in the DB and allows to insert specific values. + // For example, it's possible to insert date with zero values "2021-00-00" + return with("SET @@sql_mode=''"); + } + + static private final int MAX_CONNECTIONS = 1000; + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of(mysqlCmd(Stream.of( + String.format("SET GLOBAL max_connections=%d", MAX_CONNECTIONS), + String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER '%s' IDENTIFIED BY '%s'", getUserName(), getPassword()), + // Grant privileges also to the container's user, which is not root. + String.format("GRANT ALL PRIVILEGES ON *.* TO '%s', '%s' WITH GRANT OPTION", getUserName(), + getContainer().getUsername())))); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + return mysqlCmd(Stream.of( + String.format("DROP USER '%s'", getUserName()), + String.format("DROP DATABASE %s", getDatabaseName()))); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.MYSQL; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.MYSQL; + } + + @Override + public MySQLConfigBuilder configBuilder() { + return new MySQLConfigBuilder(this); + } + + public Stream mysqlCmd(Stream sql) { + return Stream.of("bash", "-c", String.format( + "set -o errexit -o pipefail; echo \"%s\" | mysql -v -v -v --user=root --password=test", + sql.collect(Collectors.joining("; ")))); + } + + static public class MySQLConfigBuilder extends ConfigBuilder { + + protected MySQLConfigBuilder(MySQLTestDatabase testDatabase) { + super(testDatabase); + } + + public MySQLConfigBuilder withStandardReplication() { + return with("replication_method", ImmutableMap.builder().put("method", "STANDARD").build()); + } + + public MySQLConfigBuilder withCdcReplication() { + return this + .with("is_test", true) + .with("replication_method", ImmutableMap.builder() + .put("method", "CDC") + .put("initial_waiting_seconds", 5) + .put("server_time_zone", "America/Los_Angeles") + .build()); + } + + } + + private String cachedCaCertificate; + private Certificates cachedCertificates; + + public synchronized String getCaCertificate() { + if (cachedCaCertificate == null) { + cachedCaCertificate = catFileInContainer("/var/lib/mysql/ca.pem"); + } + return cachedCaCertificate; + } + + public synchronized Certificates getCertificates() { + if (cachedCertificates == null) { + cachedCertificates = new Certificates( + catFileInContainer("/var/lib/mysql/ca.pem"), + catFileInContainer("/var/lib/mysql/client-cert.pem"), + catFileInContainer("/var/lib/mysql/client-key.pem")); + } + return cachedCertificates; + } + + public record Certificates(String caCertificate, String clientCertificate, String clientKey) {} + + private String catFileInContainer(String filePath) { + try { + return getContainer().execInContainer("sh", "-c", "cat " + filePath).getStdout().trim(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml b/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml index b5d777e36eda..1821fdddaddc 100644 --- a/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml +++ b/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml @@ -1,6 +1,6 @@ data: ab_internal: - ql: 400 + ql: 200 sl: 200 allowedHosts: hosts: diff --git a/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml b/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml index 768d1d550502..4eab013a5fad 100644 --- a/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml @@ -5,7 +5,8 @@ acceptance_tests: tests: - spec_path: source_pinterest/spec.json backward_compatibility_tests_config: - disable_for_version: "0.7.0" # removed non-working token based auth method + disable_for_version: "0.7.3" # added custom report + # disable_for_version: "0.7.0" # removed non-working token based auth method # disable_for_version: "0.5.0" # Add Pattern for "start_date" connection: tests: diff --git a/airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json b/airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json new file mode 100644 index 000000000000..c8991f049ddf --- /dev/null +++ b/airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json @@ -0,0 +1,18 @@ +{ + "client_id": "1111111", + "client_secret": "XXXX", + "refresh_token": "XXXXX" + "start_date": "2023-01-08", + "custom_reports": [{ + "name": "vadim_report", + "level": "AD_GROUP", + "granularity": "MONTH", + "click_window_days": 30, + "engagement_window_days": 30, + "view_window_days": 30, + "conversion_report_time": "TIME_OF_CONVERSION", + "attribution_types": ["INDIVIDUAL", "HOUSEHOLD"], + "columns": ["ADVERTISER_ID", "AD_ACCOUNT_ID", "AD_GROUP_ID", "CTR", "IMPRESSION_2"], + "start_date": "2023-01-08" + }] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json b/airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json new file mode 100644 index 000000000000..645099b98d0e --- /dev/null +++ b/airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json @@ -0,0 +1,15 @@ +{ + "streams": [ + { + "stream": { + "name": "custom_vadim_report", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + } + ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl index dbba2254d507..fb45fd3c024b 100644 --- a/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl @@ -20,4 +20,5 @@ {"stream": "ad_group_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "TOTAL_IMPRESSION_FREQUENCY": 1.0, "TOTAL_IMPRESSION_USER": 1.0, "DATE": "2023-10-29"}, "emitted_at": 1699895043538} {"stream": "ad_group_targeting_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "TARGETING_VALUE": "TWOCOLUMN_FEED", "TARGETING_TYPE": "FEED_TYPE", "DATE": "2023-10-29"}, "emitted_at": 1699895106949} {"stream": "pin_promotion_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "AD_ID": "687218400210", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "PIN_ID": 6.66743919837295e+17, "PIN_PROMOTION_ID": 687218400210.0, "TOTAL_IMPRESSION_FREQUENCY": 1.0, "TOTAL_IMPRESSION_USER": 1.0, "DATE": "2023-10-29"}, "emitted_at": 1699895200157} -{"stream": "pin_promotion_targeting_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "AD_ID": "687218400210", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "PIN_ID": 6.66743919837295e+17, "PIN_PROMOTION_ID": 687218400210.0, "TARGETING_VALUE": "Education > Subjects > Science > Applied Science > Technology", "TARGETING_TYPE": "TARGETED_INTEREST", "DATE": "2023-10-29"}, "emitted_at": 1699895289749} \ No newline at end of file +{"stream": "pin_promotion_targeting_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "AD_ID": "687218400210", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "PIN_ID": 6.66743919837295e+17, "PIN_PROMOTION_ID": 687218400210.0, "TARGETING_VALUE": "Education > Subjects > Science > Applied Science > Technology", "TARGETING_TYPE": "TARGETED_INTEREST", "DATE": "2023-10-29"}, "emitted_at": 1699895289749} +{"stream": "custom_vadim_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ID": "2680068678993", "IMPRESSION_2": 11.0, "DATE_RANGE": "2023-10-01 - 2023-10-31"}, "emitted_at": 1700158289892} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pinterest/metadata.yaml b/airbyte-integrations/connectors/source-pinterest/metadata.yaml index 4763dd34d13a..a0da1eb5a704 100644 --- a/airbyte-integrations/connectors/source-pinterest/metadata.yaml +++ b/airbyte-integrations/connectors/source-pinterest/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: 5cb7e5fe-38c2-11ec-8d3d-0242ac130003 - dockerImageTag: 0.7.2 + dockerImageTag: 0.8.1 dockerRepository: airbyte/source-pinterest connectorBuildOptions: baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c @@ -19,6 +19,17 @@ data: oss: enabled: true releaseStage: generally_available + suggestedStreams: + streams: + - campaign_analytics + - ad_account_analytics + - ad_analytics + - campaigns + - ad_accounts + - ads + - user_account_analytics + - ad_group_analytics + - ad_groups documentationUrl: https://docs.airbyte.com/integrations/sources/pinterest tags: - language:python diff --git a/airbyte-integrations/connectors/source-pinterest/setup.py b/airbyte-integrations/connectors/source-pinterest/setup.py index eac9cebacb4b..5da646d8e719 100644 --- a/airbyte-integrations/connectors/source-pinterest/setup.py +++ b/airbyte-integrations/connectors/source-pinterest/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2", "pendulum~=2.1.2"] +MAIN_REQUIREMENTS = ["airbyte-cdk", "pendulum~=2.1.2"] TEST_REQUIREMENTS = [ "pytest~=6.1", diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py index 98e4809bb296..04e85473ff21 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py @@ -8,6 +8,7 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional from urllib.parse import urljoin +import airbyte_cdk.sources.utils.casing as casing import backoff import requests from airbyte_cdk.models import SyncMode @@ -260,3 +261,53 @@ class KeywordReport(PinterestAnalyticsTargetingReportStream): @property def level(self): return "KEYWORD" + + +class CustomReport(PinterestAnalyticsTargetingReportStream): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self._custom_class_name = f"Custom_{self.config['name']}" + self._level = self.config["level"] + self.granularity = self.config["granularity"] + self.click_window_days = self.config["click_window_days"] + self.engagement_window_days = self.config["engagement_window_days"] + self.view_window_days = self.config["view_window_days"] + self.conversion_report_time = self.config["conversion_report_time"] + self.attribution_types = self.config["attribution_types"] + self.columns = self.config["columns"] + + @property + def level(self): + return self._level + + @property + def name(self) -> str: + """We override stream name to let the user change it via configuration.""" + name = self._custom_class_name or self.__class__.__name__ + return casing.camel_to_snake(name) + + def request_body_json(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Optional[Mapping]: + """Return the body of the API request in JSON format.""" + return { + "start_date": stream_slice["start_date"], + "end_date": stream_slice["end_date"], + "level": self.level, + "granularity": self.granularity, + "click_window_days": self.click_window_days, + "engagement_window_days": self.engagement_window_days, + "view_window_days": self.view_window_days, + "conversion_report_time": self.conversion_report_time, + "attribution_types": self.attribution_types, + "columns": self.columns, + } + + @property + def window_in_days(self): + """Docs: https://developers.pinterest.com/docs/api/v5/#operation/analytics/get_report""" + if self.granularity == "HOUR": + return 2 + elif self.level == "PRODUCT_ITEM": + return 31 + else: + return 185 diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json index 3f24d99ee067..4ab16b8d9676 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json @@ -116,6 +116,67 @@ }, "updated_time": { "type": ["null", "number"] + }, + "optimization_goal_metadata": { + "type": ["null", "object"], + "properties": { + "conversion_tag_v3_goal_metadata": { + "type": ["null", "object"], + "properties": { + "attribution_windows": { + "type": ["null", "object"], + "properties": { + "click_window_days": { + "type": ["null", "integer"] + }, + "engagement_window_days": { + "type": ["null", "integer"] + }, + "view_window_days": { + "type": ["null", "integer"] + } + } + }, + "conversion_event": { + "type": ["null", "string"] + }, + "conversion_tag_id": { + "type": ["null", "string"] + }, + "cpa_goal_value_in_micro_currency": { + "type": ["null", "string"] + }, + "is_roas_optimized": { + "type": ["null", "boolean"] + }, + "learning_mode_type": { + "type": ["null", "string"] + } + } + }, + "frequency_goal_metadata": { + "type": ["null", "object"], + "properties": { + "frequency": { + "type": ["null", "integer"] + }, + "timerange": { + "type": ["null", "string"] + } + } + }, + "scrollup_goal_metadata": { + "type": ["null", "object"], + "properties": { + "scrollup_goal_value_in_micro_currency": { + "type": ["null", "string"] + } + } + } + } + }, + "bid_strategy_type": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json index 2b385cce892d..d5f238bd9b10 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json @@ -91,6 +91,9 @@ "view_tracking_url": { "type": ["null", "string"] }, + "lead_form_id": { + "type": ["null", "string"] + }, "ad_account_id": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json index 2989e890ced2..55a5c52fbd48 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json @@ -11,15 +11,7 @@ "format": "date-time" }, "creative_type": { - "type": ["null", "string"], - "enum": [ - "REGULAR", - "VIDEO", - "CAROUSEL", - "MAX_VIDEO", - "SHOP_THE_PIN", - "IDEA" - ] + "type": ["null", "string"] }, "is_standard": { "type": ["null", "boolean"] @@ -77,6 +69,12 @@ "type": ["null", "string"] } } + }, + "pin_metrics": { + "type": ["null", "object"] + }, + "has_been_promoted": { + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json index 74bdf144cb8a..603145526fa6 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json @@ -35,6 +35,9 @@ } } }, + "pin_metrics": { + "type": ["null", "object"] + }, "media": { "type": ["null", "object"], "properties": { diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json index 561bebf0d971..cb91bc3af2d7 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json @@ -69,6 +69,21 @@ }, "type": { "type": ["null", "string"] + }, + "start_time": { + "type": ["null", "integer"] + }, + "end_time": { + "type": ["null", "integer"] + }, + "summary_status": { + "type": ["null", "string"] + }, + "is_campaign_budget_optimization": { + "type": ["null", "boolean"] + }, + "is_flexible_daily_budgets": { + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py index 828e07f22b53..ea5af593ebf8 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py @@ -3,8 +3,9 @@ # import copy +import logging from base64 import standard_b64encode -from typing import Any, List, Mapping, Tuple +from typing import Any, List, Mapping, Tuple, Type import pendulum import requests @@ -21,6 +22,7 @@ AdvertizerReport, AdvertizerTargetingReport, CampaignTargetingReport, + CustomReport, KeywordReport, PinPromotionReport, PinPromotionTargetingReport, @@ -52,6 +54,8 @@ UserAccountAnalytics, ) +logger = logging.getLogger("airbyte") + class SourcePinterest(AbstractSource): def _validate_and_transform(self, config: Mapping[str, Any], amount_of_days_allowed_for_lookup: int = 89): @@ -59,21 +63,26 @@ def _validate_and_transform(self, config: Mapping[str, Any], amount_of_days_allo today = pendulum.today() latest_date_allowed_by_api = today.subtract(days=amount_of_days_allowed_for_lookup) - start_date = config["start_date"] - if not start_date: - config["start_date"] = latest_date_allowed_by_api - else: + start_date = config.get("start_date") + + # transform to datetime + if start_date and isinstance(start_date, str): try: - config["start_date"] = pendulum.from_format(config["start_date"], "YYYY-MM-DD") + config["start_date"] = pendulum.from_format(start_date, "YYYY-MM-DD") except ValueError: - message = "Entered `Start Date` does not match format YYYY-MM-DD" + message = f"Entered `Start Date` {start_date} does not match format YYYY-MM-DD" raise AirbyteTracedException( message=message, internal_message=message, failure_type=FailureType.config_error, ) - if (today - config["start_date"]).days > amount_of_days_allowed_for_lookup: - config["start_date"] = latest_date_allowed_by_api + + if not start_date or config["start_date"] < latest_date_allowed_by_api: + logger.info( + f"Current start_date: {start_date} does not meet API report requirements. Resetting start_date to: {latest_date_allowed_by_api}" + ) + config["start_date"] = latest_date_allowed_by_api + return config @staticmethod @@ -154,4 +163,32 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ProductGroupTargetingReport(ad_accounts, config=report_config), KeywordReport(ad_accounts, config=report_config), ProductItemReport(ad_accounts, config=report_config), - ] + ] + self.get_custom_report_streams(ad_accounts, config=report_config) + + def get_custom_report_streams(self, parent, config: dict) -> List[Type[Stream]]: + """return custom report streams""" + custom_streams = [] + for report_config in config.get("custom_reports", []): + report_config["authenticator"] = config["authenticator"] + + # https://developers.pinterest.com/docs/api/v5/#operation/analytics/get_report + if report_config.get("granularity") == "HOUR": + # Otherwise: Response Code: 400 {"code":1,"message":"HOURLY request must be less than 3 days"} + amount_of_days_allowed_for_lookup = 2 + elif report_config.get("level") == "PRODUCT_ITEM": + amount_of_days_allowed_for_lookup = 91 + else: + amount_of_days_allowed_for_lookup = 913 + + start_date = report_config.get("start_date") + if not start_date: + report_config["start_date"] = config.get("start_date") + + report_config = self._validate_and_transform(report_config, amount_of_days_allowed_for_lookup) + + stream = CustomReport( + parent=parent, + config=report_config, + ) + custom_streams.append(stream) + return custom_streams diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json index 835d983074c2..ad385a664b48 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json @@ -4,7 +4,6 @@ "$schema": "https://json-schema.org/draft-07/schema#", "title": "Pinterest Spec", "type": "object", - "required": ["start_date"], "additionalProperties": true, "properties": { "start_date": { @@ -61,6 +60,220 @@ } } ] + }, + "custom_reports": { + "title": "Custom Reports", + "description": "A list which contains ad statistics entries, each entry must have a name and can contains fields, breakdowns or action_breakdowns. Click on \"add\" to fill this field.", + "type": "array", + "items": { + "title": "ReportConfig", + "description": "Config for custom report", + "type": "object", + "required": ["name", "level", "granularity", "columns"], + "properties": { + "name": { + "title": "Name", + "description": "The name value of report", + "type": "string", + "order": 0 + }, + "level": { + "title": "Level", + "description": "Chosen level for API", + "default": "ADVERTISER", + "enum": ["ADVERTISER", "ADVERTISER_TARGETING", "CAMPAIGN", "CAMPAIGN_TARGETING", "AD_GROUP", "AD_GROUP_TARGETING", "PIN_PROMOTION", "PIN_PROMOTION_TARGETING", "KEYWORD", "PRODUCT_GROUP", "PRODUCT_GROUP_TARGETING", "PRODUCT_ITEM"], + "type": "string", + "order": 1 + }, + "granularity": { + "title": "Granularity", + "description": "Chosen granularity for API", + "default": "TOTAL", + "enum": ["TOTAL", "DAY", "HOUR", "WEEK", "MONTH"], + "type": "string", + "order": 2 + }, + "columns": { + "title": "Columns", + "description": "A list of chosen columns", + "default": [], + "type": "array", + "order": 3, + "items": { + "title": "ValidEnums", + "description": "An enumeration.", + "enum": [ + "ADVERTISER_ID", + "AD_ACCOUNT_ID", + "AD_GROUP_ENTITY_STATUS", + "AD_GROUP_ID", + "AD_ID", + "CAMPAIGN_DAILY_SPEND_CAP", + "CAMPAIGN_ENTITY_STATUS", + "CAMPAIGN_ID", + "CAMPAIGN_LIFETIME_SPEND_CAP", + "CAMPAIGN_NAME", + "CHECKOUT_ROAS", + "CLICKTHROUGH_1", + "CLICKTHROUGH_1_GROSS", + "CLICKTHROUGH_2", + "CPC_IN_MICRO_DOLLAR", + "CPM_IN_DOLLAR", + "CPM_IN_MICRO_DOLLAR", + "CTR", + "CTR_2", + "ECPCV_IN_DOLLAR", + "ECPCV_P95_IN_DOLLAR", + "ECPC_IN_DOLLAR", + "ECPC_IN_MICRO_DOLLAR", + "ECPE_IN_DOLLAR", + "ECPM_IN_MICRO_DOLLAR", + "ECPV_IN_DOLLAR", + "ECTR", + "EENGAGEMENT_RATE", + "ENGAGEMENT_1", + "ENGAGEMENT_2", + "ENGAGEMENT_RATE", + "IDEA_PIN_PRODUCT_TAG_VISIT_1", + "IDEA_PIN_PRODUCT_TAG_VISIT_2", + "IMPRESSION_1", + "IMPRESSION_1_GROSS", + "IMPRESSION_2", + "INAPP_CHECKOUT_COST_PER_ACTION", + "OUTBOUND_CLICK_1", + "OUTBOUND_CLICK_2", + "PAGE_VISIT_COST_PER_ACTION", + "PAGE_VISIT_ROAS", + "PAID_IMPRESSION", + "PIN_ID", + "PIN_PROMOTION_ID", + "REPIN_1", + "REPIN_2", + "REPIN_RATE", + "SPEND_IN_DOLLAR", + "SPEND_IN_MICRO_DOLLAR", + "TOTAL_CHECKOUT", + "TOTAL_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_CLICKTHROUGH", + "TOTAL_CLICK_ADD_TO_CART", + "TOTAL_CLICK_CHECKOUT", + "TOTAL_CLICK_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_CLICK_LEAD", + "TOTAL_CLICK_SIGNUP", + "TOTAL_CLICK_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_CONVERSIONS", + "TOTAL_CUSTOM", + "TOTAL_ENGAGEMENT", + "TOTAL_ENGAGEMENT_CHECKOUT", + "TOTAL_ENGAGEMENT_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_ENGAGEMENT_LEAD", + "TOTAL_ENGAGEMENT_SIGNUP", + "TOTAL_ENGAGEMENT_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_IDEA_PIN_PRODUCT_TAG_VISIT", + "TOTAL_IMPRESSION_FREQUENCY", + "TOTAL_IMPRESSION_USER", + "TOTAL_LEAD", + "TOTAL_OFFLINE_CHECKOUT", + "TOTAL_PAGE_VISIT", + "TOTAL_REPIN_RATE", + "TOTAL_SIGNUP", + "TOTAL_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_VIDEO_3SEC_VIEWS", + "TOTAL_VIDEO_AVG_WATCHTIME_IN_SECOND", + "TOTAL_VIDEO_MRC_VIEWS", + "TOTAL_VIDEO_P0_COMBINED", + "TOTAL_VIDEO_P100_COMPLETE", + "TOTAL_VIDEO_P25_COMBINED", + "TOTAL_VIDEO_P50_COMBINED", + "TOTAL_VIDEO_P75_COMBINED", + "TOTAL_VIDEO_P95_COMBINED", + "TOTAL_VIEW_ADD_TO_CART", + "TOTAL_VIEW_CHECKOUT", + "TOTAL_VIEW_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_VIEW_LEAD", + "TOTAL_VIEW_SIGNUP", + "TOTAL_VIEW_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_CHECKOUT", + "TOTAL_WEB_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_CLICK_CHECKOUT", + "TOTAL_WEB_CLICK_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_ENGAGEMENT_CHECKOUT", + "TOTAL_WEB_ENGAGEMENT_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_SESSIONS", + "TOTAL_WEB_VIEW_CHECKOUT", + "TOTAL_WEB_VIEW_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "VIDEO_3SEC_VIEWS_2", + "VIDEO_LENGTH", + "VIDEO_MRC_VIEWS_2", + "VIDEO_P0_COMBINED_2", + "VIDEO_P100_COMPLETE_2", + "VIDEO_P25_COMBINED_2", + "VIDEO_P50_COMBINED_2", + "VIDEO_P75_COMBINED_2", + "VIDEO_P95_COMBINED_2", + "WEB_CHECKOUT_COST_PER_ACTION", + "WEB_CHECKOUT_ROAS", + "WEB_SESSIONS_1", + "WEB_SESSIONS_2" + ] + } + }, + "click_window_days": { + "title": "Click window days", + "description": "Number of days to use as the conversion attribution window for a pin click action.", + "default": 30, + "enum": [0, 1, 7, 14, 30, 60], + "type": "integer", + "order": 4 + }, + "engagement_window_days": { + "title": "Engagement window days", + "description": "Number of days to use as the conversion attribution window for an engagement action.", + "default": [30], + "enum": [0, 1, 7, 14, 30, 60], + "type": "integer", + "order": 5 + }, + "view_window_days": { + "title": "View window days", + "description": "Number of days to use as the conversion attribution window for a view action.", + "default": [30], + "enum": [0, 1, 7, 14, 30, 60], + "type": "integer", + "order": 6 + }, + "conversion_report_time": { + "title": "Conversion report time", + "description": "The date by which the conversion metrics returned from this endpoint will be reported. There are two dates associated with a conversion event: the date that the user interacted with the ad, and the date that the user completed a conversion event..", + "default": "TIME_OF_AD_ACTION", + "enum": ["TIME_OF_AD_ACTION", "TIME_OF_CONVERSION"], + "type": "string", + "order": 7 + }, + "attribution_types": { + "title": "Attribution types", + "description": "List of types of attribution for the conversion report", + "default": ["INDIVIDUAL", "HOUSEHOLD"], + "type": "array", + "items": { + "title": "ValidEnums", + "description": "An enumeration.", + "enum": ["INDIVIDUAL", "HOUSEHOLD"] + }, + "order": 8 + }, + "start_date": { + "type": "string", + "title": "Start Date", + "description": "A date in the format YYYY-MM-DD. If you have not set a date, it would be defaulted to latest allowed date by report api (913 days from today).", + "format": "date", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "pattern_descriptor": "YYYY-MM-DD", + "examples": ["2022-07-28"], + "order": 9 + } + } + } } } }, diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py index 8cc1a4f96057..61ba1f1c61f0 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py @@ -1,12 +1,31 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import copy +import os +from unittest.mock import MagicMock +import pytest import responses from source_pinterest import SourcePinterest +from source_pinterest.reports import CampaignAnalyticsReport +from source_pinterest.reports.reports import ( + AdGroupReport, + AdGroupTargetingReport, + AdvertizerReport, + AdvertizerTargetingReport, + CampaignTargetingReport, + KeywordReport, + PinPromotionReport, + PinPromotionTargetingReport, + ProductGroupReport, + ProductGroupTargetingReport, + ProductItemReport, +) from source_pinterest.utils import get_analytics_columns from unit_tests.test_source import setup_responses +os.environ["REQUEST_CACHE_PATH"] = '/tmp' @responses.activate def test_request_body_json(analytics_report_stream, date_range): @@ -62,3 +81,45 @@ def test_streams(test_config): streams = source.streams(test_config) expected_streams_number = 32 assert len(streams) == expected_streams_number + +@responses.activate +def test_custom_streams(test_config): + config = copy.deepcopy(test_config) + config['custom_reports'] = [{ + "name": "vadim_report", + "level": "AD_GROUP", + "granularity": "MONTH", + "click_window_days": 30, + "engagement_window_days": 30, + "view_window_days": 30, + "conversion_report_time": "TIME_OF_CONVERSION", + "attribution_types": ["INDIVIDUAL", "HOUSEHOLD"], + "columns": ["ADVERTISER_ID", "AD_ACCOUNT_ID", "AD_GROUP_ID", "CTR", "IMPRESSION_2"], + "start_date": "2023-01-08" + }] + setup_responses() + source = SourcePinterest() + streams = source.streams(config) + expected_streams_number = 33 + assert len(streams) == expected_streams_number + +@pytest.mark.parametrize( + "report_name, expected_level", + [ + [CampaignAnalyticsReport, 'CAMPAIGN'], + [CampaignTargetingReport, 'CAMPAIGN_TARGETING'], + [AdvertizerReport, 'ADVERTISER'], + [AdvertizerTargetingReport, 'ADVERTISER_TARGETING'], + [AdGroupReport, 'AD_GROUP'], + [AdGroupTargetingReport, 'AD_GROUP_TARGETING'], + [PinPromotionReport, 'PIN_PROMOTION'], + [PinPromotionTargetingReport, 'PIN_PROMOTION_TARGETING'], + [ProductGroupReport, 'PRODUCT_GROUP'], + [ProductGroupTargetingReport, 'PRODUCT_GROUP_TARGETING'], + [ProductItemReport, 'PRODUCT_ITEM'], + [KeywordReport, 'KEYWORD'] + ], +) +def test_level(test_config, report_name, expected_level): + assert report_name(parent=None, config=MagicMock()).level == expected_level + diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py index d86620fad40c..2fd50933d8e7 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py @@ -36,7 +36,7 @@ def test_check_wrong_date_connection(wrong_date_config): logger_mock = MagicMock() with pytest.raises(AirbyteTracedException) as e: source.check_connection(logger_mock, wrong_date_config) - assert e.value.message == "Entered `Start Date` does not match format YYYY-MM-DD" + assert e.value.message == "Entered `Start Date` wrong_date_format does not match format YYYY-MM-DD" @responses.activate diff --git a/airbyte-integrations/connectors/source-postgres/build.gradle b/airbyte-integrations/connectors/source-postgres/build.gradle index 49e0bee537d6..722598ac2d82 100644 --- a/airbyte-integrations/connectors/source-postgres/build.gradle +++ b/airbyte-integrations/connectors/source-postgres/build.gradle @@ -13,12 +13,12 @@ java { } airbyteJavaConnector { - cdkVersionRequired = '0.4.8' + cdkVersionRequired = '0.5.0' features = ['db-sources'] - useLocalCdk = false + useLocalCdk = true } -airbyteJavaConnector.addCdkDependencies() + application { mainClass = 'io.airbyte.integrations.source.postgres.PostgresSource' @@ -52,6 +52,8 @@ dependencies { implementation libs.bundles.datadog testImplementation 'org.hamcrest:hamcrest-all:1.3' + testFixturesImplementation libs.testcontainers.jdbc + testFixturesImplementation libs.testcontainers.postgresql testImplementation libs.testcontainers.jdbc testImplementation libs.testcontainers.postgresql testImplementation libs.junit.jupiter.system.stubs diff --git a/airbyte-integrations/connectors/source-postgres/metadata.yaml b/airbyte-integrations/connectors/source-postgres/metadata.yaml index 1fbe97df91ce..8e617f30cd35 100644 --- a/airbyte-integrations/connectors/source-postgres/metadata.yaml +++ b/airbyte-integrations/connectors/source-postgres/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 - dockerImageTag: 3.2.21 + dockerImageTag: 3.2.22 dockerRepository: airbyte/source-postgres documentationUrl: https://docs.airbyte.com/integrations/sources/postgres githubIssueLabel: source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java index b5208c543092..3c727acef7e7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java @@ -45,6 +45,8 @@ public class PostgresUtils { public static final Duration MIN_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(2); public static final Duration MAX_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(20); public static final Duration DEFAULT_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(5); + public static final Duration DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME = Duration.ofMinutes(1); + private static final int MIN_QUEUE_SIZE = 1000; private static final int MAX_QUEUE_SIZE = 10000; @@ -157,6 +159,18 @@ public static Duration getFirstRecordWaitTime(final JsonNode config) { return firstRecordWaitTime; } + public static Duration getSubsequentRecordWaitTime(final JsonNode config) { + Duration subsequentRecordWaitTime = DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME; + final boolean isTest = config.has("is_test") && config.get("is_test").asBoolean(); + final Optional firstRecordWaitSeconds = getFirstRecordWaitSeconds(config); + if (isTest && firstRecordWaitSeconds.isPresent()) { + // In tests, reuse the initial_waiting_seconds property to speed things up. + subsequentRecordWaitTime = Duration.ofSeconds(firstRecordWaitSeconds.get()); + } + LOGGER.info("Subsequent record waiting time: {} seconds", subsequentRecordWaitTime.getSeconds()); + return subsequentRecordWaitTime; + } + public static boolean isXmin(final JsonNode config) { final boolean isXmin = config.hasNonNull("replication_method") && config.get("replication_method").get("method").asText().equals("Xmin"); diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java index a9a5553a6f72..910a22648508 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java @@ -68,6 +68,7 @@ public static List> cdcCtidIteratorsCombin try { final JsonNode sourceConfig = database.getSourceConfig(); final Duration firstRecordWaitTime = PostgresUtils.getFirstRecordWaitTime(sourceConfig); + final Duration subsequentRecordWaitTime = PostgresUtils.getSubsequentRecordWaitTime(sourceConfig); final OptionalInt queueSize = OptionalInt.of(PostgresUtils.getQueueSize(sourceConfig)); LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds()); LOGGER.info("Queue size: {}", queueSize.getAsInt()); @@ -163,20 +164,9 @@ public static List> cdcCtidIteratorsCombin final var targetPosition = PostgresCdcTargetPosition.targetPosition(database); final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>(sourceConfig, - targetPosition, false, firstRecordWaitTime, queueSize); + targetPosition, false, firstRecordWaitTime, subsequentRecordWaitTime, queueSize); final PostgresCdcStateHandler postgresCdcStateHandler = new PostgresCdcStateHandler(stateManager); - final boolean canShortCircuitDebeziumEngine = savedOffset.isPresent() && - // Until the need presents itself in production, short-circuiting should only be done in tests. - sourceConfig.has("is_test") && sourceConfig.get("is_test").asBoolean() && - !postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - database.getDatabaseConfig(), - sourceConfig.get("replication_method").get("replication_slot").asText(), - sourceConfig.get("replication_method").get("publication").asText(), - PostgresUtils.getPluginValue(sourceConfig.get("replication_method")), - savedOffset.getAsLong(), - targetPosition.targetLsn.asLong()); - final Supplier> incrementalIteratorSupplier = () -> handler.getIncrementalIterators( catalog, new PostgresCdcSavedInfoFetcher(stateToBeUsed), @@ -185,8 +175,7 @@ public static List> cdcCtidIteratorsCombin PostgresCdcProperties.getDebeziumDefaultProperties(database), DebeziumPropertiesManager.DebeziumConnectorType.RELATIONALDB, emittedAt, - false, - canShortCircuitDebeziumEngine); + false); if (initialSyncCtidIterators.isEmpty()) { return Collections.singletonList(incrementalIteratorSupplier.get()); diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java index ea68ae74436b..a1f6b8242952 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java @@ -5,52 +5,36 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.PostgresUtils; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; -import io.airbyte.commons.json.Jsons; -import java.util.List; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; +import java.util.Map; public abstract class AbstractCdcPostgresSourceSslAcceptanceTest extends CdcPostgresSourceAcceptanceTest { protected static final String PASSWORD = "Passw0rd"; - protected static PostgresUtils.Certificate certs; @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make(getServerImageName(), "withWalLevelLogical", "withCert"); - certs = testdb.getCertificate(); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(NAMESPACE)) - .put("replication_method", replicationMethod) - .put(JdbcUtils.SSL_KEY, true) - .put("ssl_mode", getCertificateConfiguration()) - .put("is_test", true) - .build()); - - testdb.database.query(ctx -> { - ctx.execute("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.execute("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.execute("SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - return null; - }); + testdb = PostgresTestDatabase.in(getServerImageName(), "withWalLevelLogical", "withCert") + .with("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .withReplicationSlot() + .withPublicationForAllTables(); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withSchemas(NAMESPACE) + .withSsl(getCertificateConfiguration()) + .withCdcReplication() + .build(); } protected abstract String getServerImageName(); - public abstract ImmutableMap getCertificateConfiguration(); + public abstract Map getCertificateConfiguration(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java index 00c1722eef25..a87c4395785e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java @@ -10,20 +10,17 @@ import static io.airbyte.protocol.models.JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE; import static io.airbyte.protocol.models.JsonSchemaType.STRING_TIME_WITH_TIMEZONE; -import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.JsonSchemaPrimitiveUtil.JsonSchemaPrimitive; import io.airbyte.protocol.models.JsonSchemaType; -import java.sql.SQLException; import java.util.Set; public abstract class AbstractPostgresSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { protected PostgresTestDatabase testdb; - protected JsonNode config; protected static final String SCHEMA_NAME = "test"; @@ -38,12 +35,7 @@ protected String getImageName() { } @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected void tearDown(final TestDestinationEnv testEnv) throws SQLException { + protected void tearDown(final TestDestinationEnv testEnv) { testdb.close(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java index 6014ca946500..d605214a8028 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java @@ -5,14 +5,12 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.PostgresUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -22,6 +20,7 @@ import io.airbyte.protocol.models.v0.SyncMode; import java.util.HashMap; import java.util.List; +import java.util.Map; public abstract class AbstractPostgresSourceSSLCertificateAcceptanceTest extends AbstractPostgresSourceAcceptanceTest { @@ -29,11 +28,9 @@ public abstract class AbstractPostgresSourceSSLCertificateAcceptanceTest extends private static final String STREAM_NAME2 = "starships"; private static final String STREAM_NAME_MATERIALIZED_VIEW = "testview"; private static final String SCHEMA_NAME = "public"; - - private PostgresTestDatabase testdb; - private JsonNode config; protected static final String PASSWORD = "Passw0rd"; - protected static PostgresUtils.Certificate certs; + + protected PostgresTestDatabase testdb; @Override protected FeatureFlags featureFlags() { @@ -42,29 +39,15 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withCert"); - certs = testdb.getCertificate(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put("schemas", Jsons.jsonNode(List.of("public"))) - .put("ssl", true) - .put("replication_method", replicationMethod) - .put("ssl_mode", getCertificateConfiguration()) - .build()); - testdb.database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); - return null; - }); + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withCert") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .with("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); } - public abstract ImmutableMap getCertificateConfiguration(); + public abstract Map getCertificateConfiguration(); @Override protected void tearDown(final TestDestinationEnv testEnv) { @@ -73,7 +56,11 @@ protected void tearDown(final TestDestinationEnv testEnv) { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withStandardReplication() + .withSsl(getCertificateConfiguration()) + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java index 11e848d94275..ae97db058d15 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java @@ -13,11 +13,11 @@ import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.functional.CheckedFunction; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -25,6 +25,8 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.HashMap; import java.util.List; import org.jooq.SQLDialect; @@ -37,13 +39,13 @@ public abstract class AbstractSshPostgresSourceAcceptanceTest extends AbstractPo private final SshBastionContainer bastion = new SshBastionContainer(); private PostgresTestDatabase testdb; - private JsonNode config; private void populateDatabaseTestData() throws Exception { - final var builder = testdb.makeConfigBuilder() - .put("schemas", List.of("public")) - .put("ssl", false); - final var outerConfig = bastion.getTunnelConfig(getTunnelMethod(), builder, false); + final var outerConfig = testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false)) + .build(); SshTunnel.sshWrap( outerConfig, JdbcUtils.HOST_LIST_KEY, @@ -82,12 +84,8 @@ protected FeatureFlags featureFlags() { // requiring data to already be in place. @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withNetwork"); - bastion.initAndStartBastion(testdb.container.getNetwork()); - final var builder = testdb.makeConfigBuilder() - .put("schemas", List.of("public")) - .put("ssl", false); - config = bastion.getTunnelConfig(getTunnelMethod(), builder, true); + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withNetwork"); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); populateDatabaseTestData(); } @@ -98,7 +96,17 @@ protected void tearDown(final TestDestinationEnv testEnv) { @Override protected JsonNode getConfig() { - return config; + try { + return testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), true)) + .build(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java index 5c4eb2ddf57f..8143ccafc663 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java @@ -5,13 +5,14 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class CDCPostgresSourceCaCertificateSslAcceptanceTest extends AbstractCdcPostgresSourceSslAcceptanceTest { - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) + .put("ca_certificate", testdb.getCertificates().caCertificate()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java index 15a2fca44d68..c01f163fc9e7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java @@ -5,16 +5,18 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class CDCPostgresSourceFullCertificateSslAcceptanceTest extends AbstractCdcPostgresSourceSslAcceptanceTest { @Override - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { + final var certs = testdb.getCertificates(); return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) + .put("ca_certificate", certs.caCertificate()) + .put("client_certificate", certs.clientCertificate()) + .put("client_key", certs.clientKey()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java index 6968f340210d..762bfecb3d0a 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java @@ -5,26 +5,16 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; -import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; -import java.sql.SQLException; -import java.util.List; public class CdcInitialSnapshotPostgresSourceDatatypeTest extends AbstractPostgresSourceDatatypeTest { private static final String SCHEMA_NAME = "test"; - private static final int INITIAL_WAITING_SECONDS = 30; - - private String slotName; - private String publication; @Override protected FeatureFlags featureFlags() { @@ -33,60 +23,28 @@ protected FeatureFlags featureFlags() { @Override protected Database setupDatabase() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - - /** - * The publication is not being set as part of the config and because of it - * {@link io.airbyte.integrations.source.postgres.PostgresSource#isCdc(JsonNode)} returns false, as - * a result no test in this class runs through the cdc path. - */ - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put("replication_method", replicationMethod) - .put("is_test", true) - .put(JdbcUtils.SSL_KEY, false) - .build()); - - testdb.database.query(ctx -> { - ctx.execute( - "SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - ctx.execute("CREATE EXTENSION hstore;"); - return null; - }); - - testdb.database.query(ctx -> ctx.fetch("CREATE SCHEMA TEST;")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE inventory_item AS (\n" - + " name text,\n" - + " supplier_id integer,\n" - + " price numeric\n" - + ");")); - - testdb.database.query(ctx -> ctx.fetch("SET TIMEZONE TO 'MST'")); - return testdb.database; + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("CREATE EXTENSION hstore;") + .with("CREATE SCHEMA TEST;") + .with("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") + .with("CREATE TYPE inventory_item AS (\n" + + " name text,\n" + + " supplier_id integer,\n" + + " price numeric\n" + + ");") + .with("SET TIMEZONE TO 'MST'") + .withReplicationSlot() + .withPublicationForAllTables(); + return testdb.getDatabase(); } @Override - protected void tearDown(TestDestinationEnv testEnv) throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_drop_replication_slot('" + slotName + "');"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - return null; - }); - super.tearDown(testEnv); - } - - public boolean testCatalog() { - return true; + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withSchemas(SCHEMA_NAME) + .withoutSsl() + .withCdcReplication() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java index 4fb133959f82..df8c13bd046b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java @@ -8,14 +8,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; @@ -39,12 +37,8 @@ public class CdcPostgresSourceAcceptanceTest extends AbstractPostgresSourceAccep protected static final String NAMESPACE = "public"; private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "starships"; - protected static final int INITIAL_WAITING_SECONDS = 30; protected PostgresTestDatabase testdb; - protected JsonNode config; - protected String slotName; - protected String publication; @Override protected FeatureFlags featureFlags() { @@ -53,47 +47,27 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make(getServerImageName(), "withConf"); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(NAMESPACE)) - .put("replication_method", replicationMethod) - .put(JdbcUtils.SSL_KEY, false) - .put("is_test", true) - .build()); - - testdb.database.query(ctx -> { - ctx.execute("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.execute("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.execute("SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - return null; - }); + testdb = PostgresTestDatabase.in(getServerImageName(), "withConf") + .with("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .withReplicationSlot() + .withPublicationForAllTables(); } @Override protected void tearDown(final TestDestinationEnv testEnv) throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_drop_replication_slot('" + slotName + "');"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - return null; - }); testdb.close(); } @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSchemas(NAMESPACE) + .withoutSsl() + .withCdcReplication() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java index f36fdb61b164..cde90fd79c8e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java @@ -5,21 +5,17 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.AirbyteStateMessage; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; -import java.sql.SQLException; import java.util.Collections; import java.util.List; import java.util.Set; @@ -27,10 +23,7 @@ public class CdcWalLogsPostgresSourceDatatypeTest extends AbstractPostgresSourceDatatypeTest { private static final String SCHEMA_NAME = "test"; - private static final int INITIAL_WAITING_SECONDS = 30; private JsonNode stateAfterFirstSync; - private String slotName; - private String publication; @Override protected List runRead(final ConfiguredAirbyteCatalog configuredCatalog) throws Exception { @@ -43,7 +36,6 @@ protected List runRead(final ConfiguredAirbyteCatalog configured @Override protected void postSetup() throws Exception { final Database database = setupDatabase(); - initTests(); for (final TestDataHolder test : testDataHolders) { database.query(ctx -> { ctx.fetch(test.getCreateSqlQuery()); @@ -78,61 +70,29 @@ protected FeatureFlags featureFlags() { } @Override - protected Database setupDatabase() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - - /** - * The publication is not being set as part of the config and because of it - * {@link io.airbyte.integrations.source.postgres.PostgresSource#isCdc(JsonNode)} returns false, as - * a result no test in this class runs through the cdc path. - */ - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put("replication_method", replicationMethod) - .put("is_test", true) - .put(JdbcUtils.SSL_KEY, false) - .build()); - - testdb.database.query(ctx -> { - ctx.execute( - "SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - ctx.execute("CREATE EXTENSION hstore;"); - return null; - }); - - testdb.database.query(ctx -> ctx.fetch("CREATE SCHEMA TEST;")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE inventory_item AS (\n" - + " name text,\n" - + " supplier_id integer,\n" - + " price numeric\n" - + ");")); - - testdb.database.query(ctx -> ctx.fetch("SET TIMEZONE TO 'MST'")); - return testdb.database; + protected Database setupDatabase() { + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("CREATE EXTENSION hstore;") + .with("CREATE SCHEMA TEST;") + .with("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") + .with("CREATE TYPE inventory_item AS (\n" + + " name text,\n" + + " supplier_id integer,\n" + + " price numeric\n" + + ");") + .with("SET TIMEZONE TO 'MST'") + .withReplicationSlot() + .withPublicationForAllTables(); + return testdb.getDatabase(); } @Override - protected void tearDown(TestDestinationEnv testEnv) throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_drop_replication_slot('" + slotName + "');"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - return null; - }); - super.tearDown(testEnv); - } - - public boolean testCatalog() { - return true; + protected JsonNode getConfig() throws Exception { + return testdb.integrationTestConfigBuilder() + .withSchemas(SCHEMA_NAME) + .withoutSsl() + .withCdcReplication() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java index c6a7c9cf6465..4bd3d9e02e26 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java @@ -7,16 +7,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.PostgresUtils; import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -36,10 +35,8 @@ public class CloudDeploymentPostgresSourceAcceptanceTest extends SourceAcceptanc private static final String SCHEMA_NAME = "public"; private PostgresTestDatabase testdb; - private JsonNode config; protected static final String PASSWORD = "Passw0rd"; - protected static PostgresUtils.Certificate certs; @Override protected FeatureFlags featureFlags() { @@ -52,23 +49,8 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withCert"); - certs = testdb.getCertificate(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put("replication_method", replicationMethod) - .put("ssl_mode", ImmutableMap.builder() - .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) - .build()) - .build()); - - testdb.database.query(ctx -> { + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withCert"); + testdb.query(ctx -> { ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); @@ -96,7 +78,17 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + final var certs = testdb.getCertificates(); + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put("mode", "verify-ca") + .put("ca_certificate", certs.caCertificate()) + .put("client_certificate", certs.clientCertificate()) + .put("client_key", certs.clientKey()) + .put("client_key_password", PASSWORD) + .build()) + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index 2c7c8cf13612..b199b808738d 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -8,16 +8,14 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import io.airbyte.cdk.db.Database; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; @@ -39,13 +37,10 @@ public class PostgresSourceAcceptanceTest extends AbstractPostgresSourceAcceptan private static final String STREAM_NAME_MATERIALIZED_VIEW = "testview"; private static final String SCHEMA_NAME = "public"; public static final String LIMIT_PERMISSION_SCHEMA = "limit_perm_schema"; - - public final String LIMIT_PERMISSION_ROLE_PASSWORD = "test"; + static public final String LIMIT_PERMISSION_ROLE_PASSWORD = "test"; private PostgresTestDatabase testdb; private JsonNode config; - private ConfiguredAirbyteCatalog configCatalog; - private String limitPermissionRole; @Override protected FeatureFlags featureFlags() { @@ -54,12 +49,9 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make(getServerImageName()); - limitPermissionRole = testdb.withSuffix("limit_perm_role"); - - final List schemas = List.of("public"); - config = getConfig(testdb.userName, testdb.password, schemas); - testdb.database.query(ctx -> { + testdb = PostgresTestDatabase.in(getServerImageName()); + config = getConfig(testdb.getUserName(), testdb.getPassword(), "public"); + testdb.query(ctx -> { ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); @@ -67,23 +59,22 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); return null; }); - configCatalog = getCommonConfigCatalog(); } - private JsonNode getConfig(final String username, final String password, final List schemas) { - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(testdb.container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(testdb.container)) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.USERNAME_KEY, username) - .put(JdbcUtils.PASSWORD_KEY, password) - .put(JdbcUtils.SCHEMAS_KEY, Jsons.jsonNode(schemas)) - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", replicationMethod) - .build()); + private String getLimitPermissionRoleName() { + return testdb.withNamespace("limit_perm_role"); + } + + private JsonNode getConfig(final String username, final String password, String... schemas) { + return testdb.configBuilder() + .withResolvedHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, username) + .with(JdbcUtils.PASSWORD_KEY, password) + .withSchemas(schemas) + .withoutSsl() + .withStandardReplication() + .build(); } @Override @@ -98,7 +89,7 @@ protected JsonNode getConfig() { @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() { - return configCatalog; + return getCommonConfigCatalog(); } @Override @@ -113,16 +104,16 @@ protected boolean supportsPerStream() { @Test public void testFullRefreshWithRevokingSchemaPermissions() throws Exception { - prepareEnvForUserWithoutPermissions(testdb.database); + prepareEnvForUserWithoutPermissions(testdb.getDatabase()); - config = getConfig(limitPermissionRole, LIMIT_PERMISSION_ROLE_PASSWORD, List.of(LIMIT_PERMISSION_SCHEMA)); + config = getConfig(getLimitPermissionRoleName(), LIMIT_PERMISSION_ROLE_PASSWORD, LIMIT_PERMISSION_SCHEMA); final ConfiguredAirbyteCatalog configuredCatalog = getLimitPermissionConfiguredCatalog(); final List fullRefreshRecords = filterRecords(runRead(configuredCatalog)); final String assertionMessage = "Expected records after full refresh sync for user with schema permission"; assertFalse(fullRefreshRecords.isEmpty(), assertionMessage); - revokeSchemaPermissions(testdb.database); + revokeSchemaPermissions(testdb.getDatabase()); final List lessPermFullRefreshRecords = filterRecords(runRead(configuredCatalog)); final String assertionMessageWithoutPermission = "Expected no records after full refresh sync for user without schema permission"; @@ -132,9 +123,9 @@ public void testFullRefreshWithRevokingSchemaPermissions() throws Exception { @Test public void testDiscoverWithRevokingSchemaPermissions() throws Exception { - prepareEnvForUserWithoutPermissions(testdb.database); - revokeSchemaPermissions(testdb.database); - config = getConfig(limitPermissionRole, LIMIT_PERMISSION_ROLE_PASSWORD, List.of(LIMIT_PERMISSION_SCHEMA)); + prepareEnvForUserWithoutPermissions(testdb.getDatabase()); + revokeSchemaPermissions(testdb.getDatabase()); + config = getConfig(getLimitPermissionRoleName(), LIMIT_PERMISSION_ROLE_PASSWORD, LIMIT_PERMISSION_SCHEMA); runDiscover(); final AirbyteCatalog lastPersistedCatalogSecond = getLastPersistedCatalog(); @@ -144,20 +135,20 @@ public void testDiscoverWithRevokingSchemaPermissions() throws Exception { private void revokeSchemaPermissions(final Database database) throws SQLException { database.query(ctx -> { - ctx.fetch(String.format("REVOKE USAGE ON schema %s FROM %s;", LIMIT_PERMISSION_SCHEMA, limitPermissionRole)); + ctx.fetch(String.format("REVOKE USAGE ON schema %s FROM %s;", LIMIT_PERMISSION_SCHEMA, getLimitPermissionRoleName())); return null; }); } private void prepareEnvForUserWithoutPermissions(final Database database) throws SQLException { database.query(ctx -> { - ctx.fetch(String.format("CREATE ROLE %s WITH LOGIN PASSWORD '%s';", limitPermissionRole, LIMIT_PERMISSION_ROLE_PASSWORD)); + ctx.fetch(String.format("CREATE ROLE %s WITH LOGIN PASSWORD '%s';", getLimitPermissionRoleName(), LIMIT_PERMISSION_ROLE_PASSWORD)); ctx.fetch(String.format("CREATE SCHEMA %s;", LIMIT_PERMISSION_SCHEMA)); - ctx.fetch(String.format("GRANT CONNECT ON DATABASE %s TO %s;", testdb.dbName, limitPermissionRole)); - ctx.fetch(String.format("GRANT USAGE ON schema %s TO %s;", LIMIT_PERMISSION_SCHEMA, limitPermissionRole)); + ctx.fetch(String.format("GRANT CONNECT ON DATABASE %s TO %s;", testdb.getDatabaseName(), getLimitPermissionRoleName())); + ctx.fetch(String.format("GRANT USAGE ON schema %s TO %s;", LIMIT_PERMISSION_SCHEMA, getLimitPermissionRoleName())); ctx.fetch(String.format("CREATE TABLE %s.id_and_name(id INTEGER, name VARCHAR(200));", LIMIT_PERMISSION_SCHEMA)); ctx.fetch(String.format("INSERT INTO %s.id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", LIMIT_PERMISSION_SCHEMA)); - ctx.fetch(String.format("GRANT SELECT ON table %s.id_and_name TO %s;", LIMIT_PERMISSION_SCHEMA, limitPermissionRole)); + ctx.fetch(String.format("GRANT SELECT ON table %s.id_and_name TO %s;", LIMIT_PERMISSION_SCHEMA, getLimitPermissionRoleName())); return null; }); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java index 116ecb38767b..6c36dd3ee932 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java @@ -5,13 +5,10 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; -import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import java.sql.SQLException; public class PostgresSourceDatatypeTest extends AbstractPostgresSourceDatatypeTest { @@ -23,37 +20,29 @@ protected FeatureFlags featureFlags() { @Override protected Database setupDatabase() throws SQLException { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", replicationMethod) - .build()); - testdb.database.query(ctx -> { - ctx.execute(String.format("CREATE SCHEMA %S;", SCHEMA_NAME)); - ctx.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');"); - ctx.execute("CREATE TYPE inventory_item AS (name text, supplier_id integer, price numeric);"); - // In one of the test case, we have some money values with currency symbol. Postgres can only - // understand those money values if the symbol corresponds to the monetary locale setting. For - // example, - // if the locale is 'en_GB', '£100' is valid, but '$100' is not. So setting the monetary locate is - // necessary here to make sure the unit test can pass, no matter what the locale the runner VM has. - ctx.execute("SET lc_monetary TO 'en_US.utf8';"); - // Set up a fixed timezone here so that timetz and timestamptz always have the same time zone - // wherever the tests are running on. - ctx.execute("SET TIMEZONE TO 'MST'"); - ctx.execute("CREATE EXTENSION hstore;"); - return null; - }); - - return testdb.database; + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("CREATE SCHEMA %S;", SCHEMA_NAME) + .with("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") + .with("CREATE TYPE inventory_item AS (name text, supplier_id integer, price numeric);") + // In one of the test case, we have some money values with currency symbol. Postgres can only + // understand those money values if the symbol corresponds to the monetary locale setting. For + // example, + // if the locale is 'en_GB', '£100' is valid, but '$100' is not. So setting the monetary locate is + // necessary here to make sure the unit test can pass, no matter what the locale the runner VM has. + .with("SET lc_monetary TO 'en_US.utf8';") + // Set up a fixed timezone here so that timetz and timestamptz always have the same time zone + // wherever the tests are running on. + .with("SET TIMEZONE TO 'MST'") + .with("CREATE EXTENSION hstore;"); + return testdb.getDatabase(); } @Override - public boolean testCatalog() { - return true; + protected JsonNode getConfig() throws Exception { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withStandardReplication() + .build(); } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java index bfecd215194a..eb93444a7201 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java @@ -5,14 +5,15 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class PostgresSourceSSLCaCertificateAcceptanceTest extends AbstractPostgresSourceSSLCertificateAcceptanceTest { @Override - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) + .put("ca_certificate", testdb.getCertificates().caCertificate()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java index bf0282c418ee..dcd4810cd34b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java @@ -5,16 +5,17 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class PostgresSourceSSLFullCertificateAcceptanceTest extends AbstractPostgresSourceSSLCertificateAcceptanceTest { @Override - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java index 2a2a7be36c44..90f173ecf813 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java @@ -5,14 +5,12 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -31,12 +29,14 @@ public class XminPostgresSourceAcceptanceTest extends AbstractPostgresSourceAcce private static final String SCHEMA_NAME = "public"; private PostgresTestDatabase testdb; - private JsonNode config; - private ConfiguredAirbyteCatalog configCatalog; @Override protected JsonNode getConfig() throws Exception { - return config; + return testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .withXminReplication() + .build(); } @Override @@ -46,25 +46,12 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:12-bullseye"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Xmin") - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, Jsons.jsonNode(List.of("public"))) - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", replicationMethod) - .build()); - - testdb.database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); - return null; - }); - configCatalog = getXminCatalog(); + testdb = PostgresTestDatabase.in("postgres:12-bullseye") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .with("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); } @Override @@ -74,20 +61,6 @@ protected void tearDown(final TestDestinationEnv testEnv) throws Exception { @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() throws Exception { - return configCatalog; - } - - @Override - protected JsonNode getState() throws Exception { - return Jsons.jsonNode(new HashMap<>()); - } - - @Override - protected boolean supportsPerStream() { - return true; - } - - private ConfiguredAirbyteCatalog getXminCatalog() { return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( new ConfiguredAirbyteStream() .withSyncMode(SyncMode.INCREMENTAL) @@ -121,4 +94,14 @@ private ConfiguredAirbyteCatalog getXminCatalog() { .withSourceDefinedPrimaryKey(List.of(List.of("id")))))); } + @Override + protected JsonNode getState() throws Exception { + return Jsons.jsonNode(new HashMap<>()); + } + + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java index 3ba91d1c4656..a6d7ecb4d970 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java @@ -4,6 +4,9 @@ package io.airbyte.integrations.source.postgres; +import org.junit.jupiter.api.Order; + +@Order(2) public class CdcPostgresSourceLegacyCtidTest extends CdcPostgresSourceTest { protected static String getServerImageName() { diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java index 06b058ad87e2..59064a1893e2 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java @@ -24,20 +24,17 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Streams; -import io.airbyte.cdk.db.Database; import io.airbyte.cdk.db.PgLsn; import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.debezium.CdcSourceTest; import io.airbyte.cdk.integrations.debezium.CdcTargetPosition; import io.airbyte.cdk.integrations.debezium.internals.postgres.PostgresCdcTargetPosition; import io.airbyte.cdk.integrations.debezium.internals.postgres.PostgresReplicationConnection; import io.airbyte.cdk.integrations.util.ConnectorExceptionUtil; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; @@ -59,7 +56,6 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -68,129 +64,100 @@ import java.util.Set; import java.util.stream.Collectors; import javax.sql.DataSource; -import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -public class CdcPostgresSourceTest extends CdcSourceTest { +@Order(1) +public class CdcPostgresSourceTest extends CdcSourceTest { - protected String publication; - protected static final int INITIAL_WAITING_SECONDS = 15; - private PostgresSource source; - - private PostgresTestDatabase testdb; + @Override + protected PostgresTestDatabase createTestDatabase() { + return PostgresTestDatabase.in(getServerImageName(), "withConf").withReplicationSlot(); + } - private JsonNode config; - private String fullReplicationSlot; - private String cleanUserVanillaName, cleanUserReplicationName, cleanUserSuperName; - private final String cleanUserPassword = "password"; + @Override + protected PostgresSource source() { + final var source = new PostgresSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; + } - protected String getPluginName() { - return "pgoutput"; + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .withSchemas(modelsSchema(), modelsSchema() + "_random") + .withoutSsl() + .withCdcReplication("After loading Data in the destination") + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); } + @Override @BeforeEach - protected void setup() throws SQLException { - source = new PostgresSource(); - source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); - testdb = PostgresTestDatabase.make(getServerImageName(), "withConf"); - fullReplicationSlot = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - config = getConfig(testdb.dbName, testdb.userName, testdb.password); - cleanUserSuperName = testdb.withSuffix("super_user"); - cleanUserReplicationName = testdb.withSuffix("replication_user"); - cleanUserVanillaName = testdb.withSuffix("vanilla_user"); + protected void setup() { super.setup(); - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_create_logical_replication_slot('" + fullReplicationSlot + "', '" + getPluginName() + "');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - ctx.execute("CREATE USER " + cleanUserSuperName + " PASSWORD '" + cleanUserPassword + "';"); - ctx.execute("ALTER USER " + cleanUserSuperName + " SUPERUSER;"); - ctx.execute("CREATE USER " + cleanUserReplicationName + " PASSWORD '" + cleanUserPassword + "';"); - ctx.execute("ALTER USER " + cleanUserReplicationName + " REPLICATION;"); - ctx.execute("CREATE USER " + cleanUserVanillaName + " PASSWORD '" + cleanUserPassword + "';"); - return null; - }); - } - - @AfterEach - protected void tearDown() throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("DROP USER " + cleanUserVanillaName + ";"); - ctx.execute("DROP USER " + cleanUserReplicationName + ";"); - ctx.execute("DROP USER " + cleanUserSuperName + ";"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - ctx.execute("SELECT pg_drop_replication_slot('" + fullReplicationSlot + "');"); - return null; - }); - testdb.close(); - } - - private JsonNode getConfig(final String dbName, final String userName, final String userPassword) { - final JsonNode replicationMethod = getReplicationMethod(dbName); - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(MODELS_SCHEMA, MODELS_SCHEMA + "_random")) - .put(JdbcUtils.USERNAME_KEY, userName) - .put(JdbcUtils.PASSWORD_KEY, userPassword) - .put(JdbcUtils.SSL_KEY, false) - .put("is_test", true) - .put("replication_method", replicationMethod) - .put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) - .build()); - } - - private JsonNode getReplicationMethod(final String dbName) { - return Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", fullReplicationSlot) - .put("publication", publication) - .put("plugin", getPluginName()) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .put("lsn_commit_behaviour", "After loading Data in the destination") - .build()); + testdb.withPublicationForAllTables(); } @Test void testCheckReplicationAccessSuperUserPrivilege() throws Exception { - final JsonNode test_config = getConfig(testdb.dbName, cleanUserSuperName, cleanUserPassword); - final AirbyteConnectionStatus status = source.check(test_config); + final var cleanUserSuperName = testdb.withNamespace("super_user"); + testdb + .with("CREATE USER %s PASSWORD '%s';", cleanUserSuperName, testdb.getPassword()) + .with("ALTER USER %s SUPERUSER;", cleanUserSuperName) + .onClose("DROP OWNED BY %s;", cleanUserSuperName) + .onClose("DROP USER %s;", cleanUserSuperName); + final JsonNode testConfig = config(); + ((ObjectNode) testConfig).put(JdbcUtils.USERNAME_KEY, cleanUserSuperName); + final AirbyteConnectionStatus status = source().check(testConfig); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, status.getStatus()); } @Test void testCheckReplicationAccessReplicationPrivilege() throws Exception { - final JsonNode test_config = getConfig(testdb.dbName, cleanUserReplicationName, cleanUserPassword); - final AirbyteConnectionStatus status = source.check(test_config); + final var cleanUserReplicationName = testdb.withNamespace("replication_user"); + testdb + .with("CREATE USER %s PASSWORD '%s';", cleanUserReplicationName, testdb.getPassword()) + .with("ALTER USER %s REPLICATION;", cleanUserReplicationName) + .onClose("DROP OWNED BY %s;", cleanUserReplicationName) + .onClose("DROP USER %s;", cleanUserReplicationName); + final JsonNode testConfig = config(); + ((ObjectNode) testConfig).put(JdbcUtils.USERNAME_KEY, cleanUserReplicationName); + final AirbyteConnectionStatus status = source().check(testConfig); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, status.getStatus()); } @Test void testCheckWithoutReplicationPermission() throws Exception { - final JsonNode test_config = getConfig(testdb.dbName, cleanUserVanillaName, cleanUserPassword); - final AirbyteConnectionStatus status = source.check(test_config); + final var cleanUserVanillaName = testdb.withNamespace("vanilla_user"); + testdb + .with("CREATE USER %s PASSWORD '%s';", cleanUserVanillaName, testdb.getPassword()) + .onClose("DROP OWNED BY %s;", cleanUserVanillaName) + .onClose("DROP USER %s;", cleanUserVanillaName); + final JsonNode testConfig = config(); + ((ObjectNode) testConfig).put(JdbcUtils.USERNAME_KEY, cleanUserVanillaName); + final AirbyteConnectionStatus status = source().check(testConfig); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertEquals(String.format(ConnectorExceptionUtil.COMMON_EXCEPTION_MESSAGE_TEMPLATE, - String.format(PostgresReplicationConnection.REPLICATION_PRIVILEGE_ERROR_MESSAGE, test_config.get("username").asText())), + String.format(PostgresReplicationConnection.REPLICATION_PRIVILEGE_ERROR_MESSAGE, testConfig.get("username").asText())), status.getMessage()); } @Test void testCheckWithoutPublication() throws Exception { - testdb.database.query(ctx -> ctx.execute("DROP PUBLICATION " + publication + ";")); - final AirbyteConnectionStatus status = source.check(getConfig()); + testdb.query(ctx -> ctx.execute("DROP PUBLICATION " + testdb.getPublicationName() + ";")); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - testdb.database.query(ctx -> ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;")); + testdb.query(ctx -> ctx.execute("CREATE PUBLICATION " + testdb.getPublicationName() + " FOR ALL TABLES;")); } @Test void testCheckWithoutReplicationSlot() throws Exception { - testdb.database.query(ctx -> ctx.execute("SELECT pg_drop_replication_slot('" + fullReplicationSlot + "');")); - final AirbyteConnectionStatus status = source.check(getConfig()); + testdb.query(ctx -> ctx.execute("SELECT pg_drop_replication_slot('" + testdb.getReplicationSlotName() + "');")); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - testdb.database.query(ctx -> ctx.execute("SELECT pg_create_logical_replication_slot('" + fullReplicationSlot + "', '" + getPluginName() + "');")); + testdb.query(ctx -> ctx.execute("SELECT pg_create_logical_replication_slot('" + testdb.getReplicationSlotName() + "', 'pgoutput');")); } @Override @@ -243,14 +210,14 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); - if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomTableSchema()))) { + if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomSchema()))) { assertEquals("ctid", streamState.get(STATE_TYPE_KEY).asText()); - } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))) { + } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()))) { assertFalse(streamState.has(STATE_TYPE_KEY)); } else { throw new RuntimeException("Unknown stream"); @@ -268,8 +235,8 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); assertFalse(streamState.has(STATE_TYPE_KEY)); @@ -286,14 +253,14 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List MODEL_RECORDS_2 = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), @@ -303,18 +270,18 @@ public void testTwoStreamSync() throws Exception { Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", + testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2", columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS_2) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", COL_ID, + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID, COL_MAKE_ID, COL_MODEL); } final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() .withStream(CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + modelsSchema(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)) @@ -327,8 +294,7 @@ public void testTwoStreamSync() throws Exception { streams.add(airbyteStream); configuredCatalog.withStreams(streams); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source().read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final Set recordMessages1 = extractRecordMessages(actualRecords1); @@ -389,13 +355,13 @@ public void testTwoStreamSync() throws Exception { recordMessages1, names, names, - MODELS_SCHEMA); + modelsSchema()); - assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA), firstStreamInState); + assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()), firstStreamInState); // Triggering a sync with a ctid state for 1 stream and complete state for other stream - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List stateMessages2 = extractStateMessages(actualRecords2); @@ -432,7 +398,7 @@ public void testTwoStreamSync() throws Exception { recordMessages2, names, names, - MODELS_SCHEMA); + modelsSchema()); } @Override @@ -450,13 +416,13 @@ protected void assertExpectedStateMessagesFromIncrementalSync(final List ctx.execute("DROP PUBLICATION " + publication + ";")); - testdb.database.query(ctx -> ctx.execute(String.format("CREATE PUBLICATION " + publication + " FOR TABLE %s.%s", MODELS_SCHEMA, "models"))); + testdb.query(ctx -> ctx.execute("DROP PUBLICATION " + testdb.getPublicationName() + ";")); + testdb + .query(ctx -> ctx.execute(String.format("CREATE PUBLICATION " + testdb.getPublicationName() + " FOR TABLE %s.%s", modelsSchema(), "models"))); - final AirbyteCatalog catalog = source.discover(getConfig()); + final AirbyteCatalog catalog = source().discover(config()); assertEquals(catalog.getStreams().size(), 2); final AirbyteStream streamInPublication = catalog.getStreams().stream().filter(stream -> stream.getName().equals("models")).findFirst().get(); @@ -565,15 +507,15 @@ void testDiscoverFiltersNonPublication() throws Exception { assertEquals(streamNotInPublication.getSupportedSyncModes(), List.of(SyncMode.FULL_REFRESH)); assertTrue(streamNotInPublication.getSourceDefinedPrimaryKey().isEmpty()); assertFalse(streamNotInPublication.getSourceDefinedCursor()); - testdb.database.query(ctx -> ctx.execute("DROP PUBLICATION " + publication + ";")); - testdb.database.query(ctx -> ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES")); + testdb.query(ctx -> ctx.execute("DROP PUBLICATION " + testdb.getPublicationName() + ";")); + testdb.query(ctx -> ctx.execute("CREATE PUBLICATION " + testdb.getPublicationName() + " FOR ALL TABLES")); } @Test public void testTableWithTimestampColDefault() throws Exception { createAndPopulateTimestampTable(); final AirbyteCatalog catalog = new AirbyteCatalog().withStreams(List.of( - CatalogHelpers.createAirbyteStream("time_stamp_table", MODELS_SCHEMA, + CatalogHelpers.createAirbyteStream("time_stamp_table", modelsSchema(), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING), Field.of("created_at", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)) @@ -584,8 +526,8 @@ public void testTableWithTimestampColDefault() throws Exception { // set all streams to incremental. configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), configuredCatalog, null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -597,7 +539,7 @@ public void testTableWithTimestampColDefault() throws Exception { } private void createAndPopulateTimestampTable() { - createTable(MODELS_SCHEMA, "time_stamp_table", + testdb.with(createTableSqlFmt(), modelsSchema(), "time_stamp_table", columnClause(ImmutableMap.of("id", "INTEGER", "name", "VARCHAR(200)", "created_at", "TIMESTAMPTZ NOT NULL DEFAULT NOW()"), Optional.of("id"))); final List timestampRecords = ImmutableList.of( @@ -617,10 +559,9 @@ private void createAndPopulateTimestampTable() { .jsonNode(ImmutableMap .of("id", 16000, "name", "blah6"))); for (final JsonNode recordJson : timestampRecords) { - executeQuery( - String.format("INSERT INTO %s.%s (%s, %s) VALUES (%s, '%s');", MODELS_SCHEMA, "time_stamp_table", - "id", "name", - recordJson.get("id").asInt(), recordJson.get("name").asText())); + testdb.with("INSERT INTO %s.%s (%s, %s) VALUES (%s, '%s');", modelsSchema(), "time_stamp_table", + "id", "name", + recordJson.get("id").asInt(), recordJson.get("name").asText()); } } @@ -629,13 +570,14 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { final int recordsToCreate = 20; - final JsonNode config = getConfig(); - final JsonNode replicationMethod = ((ObjectNode) getReplicationMethod(config.get(JdbcUtils.DATABASE_KEY).asText())) - .put("lsn_commit_behaviour", "While reading Data"); - ((ObjectNode) config).put("replication_method", replicationMethod); - - final AutoCloseableIterator firstBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, null); + final JsonNode config = testdb.testConfigBuilder() + .withSchemas(modelsSchema(), modelsSchema() + "_random") + .withoutSsl() + .withCdcReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); + final AutoCloseableIterator firstBatchIterator = source() + .read(config, getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -645,8 +587,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { // Extract the last state message final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config, getConfiguredCatalog(), state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); final List stateAfterSecondBatch = extractStateMessages(dataFromSecondBatch); @@ -662,8 +604,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { // Triggering sync with the first sync's state only which would mimic a scenario that the second // sync failed on destination end, and we didn't save state - final AutoCloseableIterator thirdBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, state); + final AutoCloseableIterator thirdBatchIterator = source() + .read(config, getConfiguredCatalog(), state); final List dataFromThirdBatch = AutoCloseableIterators .toListAndClose(thirdBatchIterator); @@ -694,29 +636,30 @@ void testReachedTargetPosition() { @Test protected void syncShouldIncrementLSN() throws Exception { final int recordsToCreate = 20; + final var postgresSource = source(); final DataSource dataSource = DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), + config().get(JdbcUtils.USERNAME_KEY).asText(), + config().get(JdbcUtils.PASSWORD_KEY).asText(), DatabaseDriver.POSTGRESQL.getDriverClassName(), String.format(DatabaseDriver.POSTGRESQL.getUrlFormatString(), - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asInt(), - config.get(JdbcUtils.DATABASE_KEY).asText())); + config().get(JdbcUtils.HOST_KEY).asText(), + config().get(JdbcUtils.PORT_KEY).asInt(), + config().get(JdbcUtils.DATABASE_KEY).asText())); final JdbcDatabase defaultJdbcDatabase = new DefaultJdbcDatabase(dataSource); final Long replicationSlotAtTheBeginning = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); final Long replicationSlotAfterFirstSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // First sync should not make any change to the replication slot status assertLsnPositionForSyncShouldIncrementLSN(replicationSlotAtTheBeginning, replicationSlotAfterFirstSync, 1); @@ -725,15 +668,15 @@ protected void syncShouldIncrementLSN() throws Exception { bulkInsertRecords(recordsToCreate); final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); final List stateAfterSecondBatch = extractStateMessages(dataFromSecondBatch); assertExpectedStateMessagesFromIncrementalSync(stateAfterSecondBatch); final Long replicationSlotAfterSecondSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // Second sync should move the replication slot ahead assertLsnPositionForSyncShouldIncrementLSN(replicationSlotAfterFirstSync, replicationSlotAfterSecondSync, 2); @@ -748,8 +691,8 @@ protected void syncShouldIncrementLSN() throws Exception { // Triggering sync with the first sync's state only which would mimic a scenario that the second // sync failed on destination end, and we didn't save state - final AutoCloseableIterator thirdBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator thirdBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromThirdBatch = AutoCloseableIterators .toListAndClose(thirdBatchIterator); @@ -759,7 +702,7 @@ protected void syncShouldIncrementLSN() throws Exception { dataFromThirdBatch); final Long replicationSlotAfterThirdSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // Since we used the state, no change should happen to the replication slot assertEquals(replicationSlotAfterSecondSync, replicationSlotAfterThirdSync); @@ -773,8 +716,9 @@ protected void syncShouldIncrementLSN() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator fourthBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, Jsons.jsonNode(Collections.singletonList(stateAfterThirdBatch.get(stateAfterThirdBatch.size() - 1)))); + final AutoCloseableIterator fourthBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), + Jsons.jsonNode(Collections.singletonList(stateAfterThirdBatch.get(stateAfterThirdBatch.size() - 1)))); final List dataFromFourthBatch = AutoCloseableIterators .toListAndClose(fourthBatchIterator); @@ -784,7 +728,7 @@ protected void syncShouldIncrementLSN() throws Exception { dataFromFourthBatch); final Long replicationSlotAfterFourthSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // Fourth sync should again move the replication slot ahead assertEquals(1, replicationSlotAfterFourthSync.compareTo(replicationSlotAfterThirdSync)); @@ -815,8 +759,8 @@ protected void verifyCheckpointStatesByRecords() throws Exception { // We require a huge amount of records, otherwise Debezium will notify directly the last offset. final int recordsToCreate = 20000; - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateMessages = extractStateMessages(dataFromFirstBatch); @@ -827,8 +771,8 @@ protected void verifyCheckpointStatesByRecords() throws Exception { bulkInsertRecords(recordsToCreate); final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateMessages.get(stateMessages.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); assertEquals(recordsToCreate, extractRecordMessages(dataFromSecondBatch).size()); @@ -849,8 +793,8 @@ protected void verifyCheckpointStatesBySeconds() throws Exception { // We require a huge amount of records, otherwise Debezium will notify directly the last offset. final int recordsToCreate = 40000; - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateMessages = extractStateMessages(dataFromFirstBatch); @@ -860,13 +804,13 @@ protected void verifyCheckpointStatesBySeconds() throws Exception { assertExpectedStateMessages(stateMessages); bulkInsertRecords(recordsToCreate); - final JsonNode config = getConfig(); + final JsonNode config = config(); ((ObjectNode) config).put(SYNC_CHECKPOINT_DURATION_PROPERTY, 1); ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 100_000); final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateMessages.get(stateMessages.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = source() + .read(config, getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -899,10 +843,10 @@ protected void ctidIteratorPageSizeTest() throws Exception { * size of 8KB instead of default 1GB This allows us to make sure that the iterator logic works with * multiple pages (sub queries) */ - final JsonNode config = getConfig(); + final JsonNode config = config(); ((ObjectNode) config).put(USE_TEST_CHUNK_SIZE, true); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config, getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); @@ -916,18 +860,17 @@ protected void ctidIteratorPageSizeTest() throws Exception { } private void bulkInsertRecords(int recordsToCreate) { - final var bulkInsertQuery = String.format(""" - INSERT INTO %s.%s (%s, %s, %s) - SELECT - 200 + generate_series AS id, - 1 AS make_id, - 'F-' || generate_series AS model - FROM generate_series(0, %d - 1); - """, - MODELS_SCHEMA, MODELS_STREAM_NAME, + testdb.with(""" + INSERT INTO %s.%s (%s, %s, %s) + SELECT + 200 + generate_series AS id, + 1 AS make_id, + 'F-' || generate_series AS model + FROM generate_series(0, %d - 1); + """, + modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, recordsToCreate); - executeQuery(bulkInsertQuery); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java index 5f245d7690ef..a5473eb703ba 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java @@ -16,7 +16,6 @@ import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; @@ -37,15 +36,15 @@ public class CloudDeploymentPostgresSourceTest { @BeforeAll static void setupContainers() { - DB_NO_SSL_WITH_NETWORK = PostgresTestDatabase.make("postgres:16-bullseye", "withNetwork"); - NETWORK_NO_SSL = DB_NO_SSL_WITH_NETWORK.container.getNetwork(); + DB_NO_SSL_WITH_NETWORK = PostgresTestDatabase.in("postgres:16-bullseye", "withNetwork"); + NETWORK_NO_SSL = DB_NO_SSL_WITH_NETWORK.getContainer().getNetwork(); BASTION_NO_SSL = new SshBastionContainer(); BASTION_NO_SSL.initAndStartBastion(NETWORK_NO_SSL); - DB_WITH_SSL = PostgresTestDatabase.make("marcosmarxm/postgres-ssl:dev", "withSSL"); + DB_WITH_SSL = PostgresTestDatabase.in("marcosmarxm/postgres-ssl:dev", "withSSL"); - DB_WITH_SSL_WITH_NETWORK = PostgresTestDatabase.make("marcosmarxm/postgres-ssl:dev", "withSSL", "withNetwork"); - NETWORK_WITH_SSL = DB_WITH_SSL_WITH_NETWORK.container.getNetwork(); + DB_WITH_SSL_WITH_NETWORK = PostgresTestDatabase.in("marcosmarxm/postgres-ssl:dev", "withSSL", "withNetwork"); + NETWORK_WITH_SSL = DB_WITH_SSL_WITH_NETWORK.getContainer().getNetwork(); BASTION_WITH_SSL = new SshBastionContainer(); BASTION_WITH_SSL.initAndStartBastion(NETWORK_WITH_SSL); } @@ -108,9 +107,7 @@ void testSSlRequiredWithTunnelIfServerDoesNotSupportSSL() throws Exception { @Test void testSSlRequiredNoTunnelIfServerSupportSSL() throws Exception { - final ImmutableMap configBuilderWithSSLMode = getDatabaseConfigBuilderWithSSLMode( - DB_WITH_SSL, SSL_MODE_REQUIRE, false).build(); - final JsonNode config = Jsons.jsonNode(configBuilderWithSSLMode); + final JsonNode config = configBuilderWithSSLMode(DB_WITH_SSL, SSL_MODE_REQUIRE, false).build(); addNoTunnel((ObjectNode) config); final AirbyteConnectionStatus connectionStatus = source().check(config); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, connectionStatus.getStatus()); @@ -122,20 +119,20 @@ void testStrictSSLSecuredWithTunnel() throws Exception { assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, connectionStatus.getStatus()); } - private ImmutableMap.Builder getDatabaseConfigBuilderWithSSLMode(final PostgresTestDatabase db, - final String sslMode, - final boolean innerAddress) { + private PostgresTestDatabase.PostgresConfigBuilder configBuilderWithSSLMode( + final PostgresTestDatabase db, + final String sslMode, + final boolean innerAddress) { final var containerAddress = innerAddress - ? SshHelpers.getInnerContainerAddress(db.container) - : SshHelpers.getOuterContainerAddress(db.container); - return ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, Objects.requireNonNull(containerAddress.left)) - .put(JdbcUtils.PORT_KEY, containerAddress.right) - .put(JdbcUtils.DATABASE_KEY, db.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of("public")) - .put(JdbcUtils.USERNAME_KEY, db.userName) - .put(JdbcUtils.PASSWORD_KEY, db.password) - .put(JdbcUtils.SSL_MODE_KEY, Map.of(JdbcUtils.MODE_KEY, sslMode)); + ? SshHelpers.getInnerContainerAddress(db.getContainer()) + : SshHelpers.getOuterContainerAddress(db.getContainer()); + return db.configBuilder() + .with(JdbcUtils.HOST_KEY, Objects.requireNonNull(containerAddress.left)) + .with(JdbcUtils.PORT_KEY, containerAddress.right) + .withDatabase() + .withSchemas("public") + .withCredentials() + .with(JdbcUtils.SSL_MODE_KEY, Map.of(JdbcUtils.MODE_KEY, sslMode)); } private JsonNode getMockedSSLConfig(final String sslMode) { @@ -163,10 +160,10 @@ void testSslModesUnsecuredNoTunnel() throws Exception { } private AirbyteConnectionStatus checkWithTunnel(final PostgresTestDatabase db, SshBastionContainer bastion, final String sslmode) throws Exception { - final var configBuilderWithSSLMode = getDatabaseConfigBuilderWithSSLMode(db, sslmode, true); - final JsonNode configWithSSLModeDisable = - bastion.getTunnelConfig(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, configBuilderWithSSLMode, false); - ((ObjectNode) configWithSSLModeDisable).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1"); + final var configWithSSLModeDisable = configBuilderWithSSLMode(db, sslmode, true) + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false)) + .with(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1") + .build(); return source().check(configWithSSLModeDisable); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java index 85aaf6c87ce6..026ff68dde00 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java @@ -7,16 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.testutils.PostgresTestDatabase; -import io.airbyte.commons.json.Jsons; import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; import java.sql.SQLException; -import java.util.List; import java.util.Set; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -28,53 +22,33 @@ class PostgresCdcGetPublicizedTablesTest { private static final String SCHEMA_NAME = "public"; - protected static final int INITIAL_WAITING_SECONDS = 30; - private String publication; - private String replicationSlot; private PostgresTestDatabase testdb; @BeforeEach - void setup() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - replicationSlot = testdb.withSuffix("replication_slot"); - publication = testdb.withSuffix("publication"); - testdb.database.query(ctx -> { - ctx.execute("create table table_1 (id serial primary key, text_column text);"); - ctx.execute("create table table_2 (id serial primary key, text_column text);"); - ctx.execute("create table table_irrelevant (id serial primary key, text_column text);"); - ctx.execute("SELECT pg_create_logical_replication_slot('" + replicationSlot + "', 'pgoutput');"); - // create a publication including table_1 and table_2, but not table_irrelevant - ctx.execute("CREATE PUBLICATION " + publication + " FOR TABLE table_1, table_2;"); - return null; - }); + void setup() { + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("create table table_1 (id serial primary key, text_column text);") + .with("create table table_2 (id serial primary key, text_column text);") + .with("create table table_irrelevant (id serial primary key, text_column text);") + .withReplicationSlot(); + // create a publication including table_1 and table_2, but not table_irrelevant + testdb = testdb + .with("CREATE PUBLICATION %s FOR TABLE table_1, table_2;", testdb.getPublicationName()) + .onClose("DROP PUBLICATION %s CASCADE", testdb.getPublicationName()); } @AfterEach - void tearDown() throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("DROP PUBLICATION " + publication + ";"); - ctx.execute("SELECT pg_drop_replication_slot('" + replicationSlot + "');"); - return null; - }); + void tearDown() { testdb.close(); } private JsonNode getConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.USERNAME_KEY, testdb.userName) - .put(JdbcUtils.PASSWORD_KEY, testdb.password) - .put(JdbcUtils.SSL_KEY, false) - .put("is_test", true) - .build()); + return testdb.testConfigBuilder().withSchemas(SCHEMA_NAME).withoutSsl().with("is_test", true).build(); } @Test public void testGetPublicizedTables() throws SQLException { - final JdbcDatabase database = new DefaultJdbcDatabase(testdb.dslContext.diagnosticsDataSource()); + final JdbcDatabase database = new DefaultJdbcDatabase(testdb.getDslContext().diagnosticsDataSource()); // when source config does not exist assertEquals(0, PostgresCatalogHelper.getPublicizedTables(database).size()); @@ -83,11 +57,8 @@ public void testGetPublicizedTables() throws SQLException { assertEquals(0, PostgresCatalogHelper.getPublicizedTables(database).size()); // when config is cdc - final ObjectNode cdcConfig = ((ObjectNode) getConfig()); - cdcConfig.set("replication_method", Jsons.jsonNode(ImmutableMap.of( - "replication_slot", replicationSlot, - "initial_waiting_seconds", INITIAL_WAITING_SECONDS, - "publication", publication))); + final JsonNode cdcConfig = + testdb.testConfigBuilder().withSchemas(SCHEMA_NAME).withoutSsl().withCdcReplication().build(); database.setSourceConfig(cdcConfig); final Set expectedTables = Set.of( new AirbyteStreamNameNamespacePair("table_1", SCHEMA_NAME), diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 343e736cd629..e8e958c3689b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -17,20 +17,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; -import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.cdk.testutils.PostgreSQLContainerHelper; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; -import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.source.postgres.internal.models.CursorBasedStatus; import io.airbyte.integrations.source.postgres.internal.models.InternalModels.StateType; @@ -48,41 +41,26 @@ import io.airbyte.protocol.models.v0.ConnectorSpecification; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.utility.MountableFile; -class PostgresJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +class PostgresJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { private static final String DATABASE = "new_db"; protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; protected static final String PASSWORD_WITHOUT_PERMISSION = "new_password"; - private static PostgreSQLContainer PSQL_DB; public static String COL_WAKEUP_AT = "wakeup_at"; public static String COL_LAST_VISITED_AT = "last_visited_at"; public static String COL_LAST_COMMENT_AT = "last_comment_at"; - @BeforeAll - static void init() { - PSQL_DB = new PostgreSQLContainer<>("postgres:13-alpine"); - PSQL_DB.start(); - } - - @Override - @BeforeEach - public void setup() throws Exception { - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); + static { COLUMN_CLAUSE_WITH_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL, wakeup_at TIMETZ NOT NULL, last_visited_at TIMESTAMPTZ NOT NULL, last_comment_at TIMESTAMP NOT NULL"; COLUMN_CLAUSE_WITHOUT_PK = @@ -90,97 +68,72 @@ public void setup() throws Exception { COLUMN_CLAUSE_WITH_COMPOSITE_PK = "first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL, wakeup_at TIMETZ NOT NULL, last_visited_at TIMESTAMPTZ NOT NULL, last_comment_at TIMESTAMP NOT NULL"; - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, PSQL_DB.getHost()) - .put(JdbcUtils.PORT_KEY, PSQL_DB.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME, SCHEMA_NAME2)) - .put(JdbcUtils.USERNAME_KEY, PSQL_DB.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, PSQL_DB.getPassword()) - .put(JdbcUtils.SSL_KEY, false) - .build()); - - final String initScriptName = "init_" + dbName.concat(".sql"); - final String tmpFilePath = IOs.writeFileToRandomTmpDir(initScriptName, "CREATE DATABASE " + dbName + ";"); - PostgreSQLContainerHelper.runSqlScript(MountableFile.forHostPath(tmpFilePath), PSQL_DB); - - source = getSource(); - final JsonNode jdbcConfig = getToDatabaseConfigFunction().apply(config); - - streamName = TABLE_NAME; - - dataSource = DataSourceFactory.create( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - getDriverClass(), - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - JdbcUtils.parseJdbcParameters(jdbcConfig, JdbcUtils.CONNECTION_PROPERTIES_KEY, getJdbcParameterDelimiter())); - - database = new StreamingJdbcDatabase(dataSource, - JdbcUtils.getDefaultSourceOperations(), - AdaptiveStreamingQueryConfig::new); - - createSchemas(); - - database.execute(connection -> { - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, - primaryKeyClause(Collections.singletonList("id")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), - COLUMN_CLAUSE_WITHOUT_PK, "")); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), - COLUMN_CLAUSE_WITH_COMPOSITE_PK, - primaryKeyClause(ImmutableList.of("first_name", "last_name")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('first' ,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('second', 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('third', 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - - }); - CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s BIT(3) NOT NULL);"; INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(B'101');"; } @Override - protected void maybeSetShorterConnectionTimeout() { + protected JsonNode config() { + return testdb.testConfigBuilder() + .withSchemas(SCHEMA_NAME, SCHEMA_NAME2) + .withoutSsl() + .build(); + } + + @Override + protected PostgresSource source() { + final var source = new PostgresSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; + } + + @Override + protected PostgresTestDatabase createTestDatabase() { + return PostgresTestDatabase.in("postgres:16-bullseye"); + } + + @Override + @BeforeEach + public void setup() throws Exception { + testdb = createTestDatabase(); + if (supportsSchemas()) { + createSchemas(); + } + testdb.with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, primaryKeyClause(Collections.singletonList("id")))) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), COLUMN_CLAUSE_WITHOUT_PK, "")) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), COLUMN_CLAUSE_WITH_COMPOSITE_PK, + primaryKeyClause(ImmutableList.of("first_name", "last_name")))) + .with( + "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('first' ,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with( + "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('second', 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with( + "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('third', 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)); + } + + @Override + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1"); } @@ -284,31 +237,9 @@ public boolean supportsSchemas() { return true; } - @Override - public AbstractJdbcSource getJdbcSource() { - var source = new PostgresSource(); - source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); - return source; - } - - @Override - public JsonNode getConfig() { - return config; - } - - @Override - public String getDriverClass() { - return PostgresSource.DRIVER_CLASS; - } - - @AfterAll - static void cleanUp() { - PSQL_DB.close(); - } - @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); assertEquals(expected, actual); @@ -316,7 +247,7 @@ void testSpec() throws Exception { @Override protected List getTestMessages() { - return getTestMessages(streamName); + return getTestMessages(streamName()); } protected List getTestMessages(final String streamName) { @@ -351,17 +282,13 @@ protected List getTestMessages(final String streamName) { } @Override - protected void executeStatementReadIncrementallyTwice() throws SQLException { - database.execute(connection -> { - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (4,'riker', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (5, 'data', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - }); + protected void executeStatementReadIncrementallyTwice() { + testdb.with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (4,'riker', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (5, 'data', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)); } @Override @@ -454,111 +381,103 @@ protected boolean supportsPerStream() { */ @Test void testCheckIncorrectPasswordFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 28P01;")); } @Test public void testCheckIncorrectUsernameFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 28P01;")); } @Test public void testCheckIncorrectHostFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 08001;")); } @Test public void testCheckIncorrectPortFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "30000"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 08001;")); } @Test public void testCheckIncorrectDataBaseFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 3D000;")); } @Test public void testUserHasNoPermissionToDataBase() throws Exception { - maybeSetShorterConnectionTimeout(); - database.execute(connection -> connection.createStatement() - .execute(String.format("create user %s with password '%s';", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION))); - database.execute(connection -> connection.createStatement() - .execute(String.format("create database %s;", DATABASE))); - // deny access for database for all users from group public - database.execute(connection -> connection.createStatement() - .execute(String.format("revoke all on database %s from public;", DATABASE))); + final var config = config(); + maybeSetShorterConnectionTimeout(config); + testdb.with("create user %s with password '%s';", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION) + .with("create database %s;", DATABASE) + // deny access for database for all users from group public + .with("revoke all on database %s from public;", DATABASE); ((ObjectNode) config).put("username", USERNAME_WITHOUT_PERMISSION); ((ObjectNode) config).put("password", PASSWORD_WITHOUT_PERMISSION); ((ObjectNode) config).put("database", DATABASE); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 42501;")); } @Test void testReadMultipleTablesIncrementally() throws Exception { + final var config = config(); ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1); final String namespace = getDefaultNamespace(); final String streamOneName = TABLE_NAME + "one"; // Create a fresh first table - database.execute(connection -> { - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(streamOneName), COLUMN_CLAUSE_WITH_PK, - primaryKeyClause(Collections.singletonList("id")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - }); + testdb.with(createTableQuery(getFullyQualifiedTableName(streamOneName), COLUMN_CLAUSE_WITH_PK, + primaryKeyClause(Collections.singletonList("id")))) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)); // Create a fresh second table final String streamTwoName = TABLE_NAME + "two"; final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); // Insert records into second table - database.execute(ctx -> { - ctx.createStatement().execute( - createTableQuery(streamTwoFullyQualifiedName, COLUMN_CLAUSE_WITH_PK, "")); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (40,'Jean Luc','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (41, 'Groot', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (42, 'Thanos','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - }); + testdb.with(createTableQuery(streamTwoFullyQualifiedName, COLUMN_CLAUSE_WITH_PK, "")) + .with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (40,'Jean Luc','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (41, 'Groot', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName) + .with(String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (42, 'Thanos','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName)); // Create records list that we expect to see in the state message final List streamTwoExpectedRecords = Arrays.asList( createRecord(streamTwoName, namespace, map( @@ -598,7 +517,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Perform initial sync final List messagesFromFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, null)); + .toList(source().read(config, configuredCatalog, null)); final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); @@ -664,7 +583,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // - stream one state still being the first record read via CTID. // - stream two state being the CTID state before the final emitted state before the cursor switch final List messagesFromSecondSyncWithMixedStates = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); @@ -691,20 +610,15 @@ void testReadMultipleTablesIncrementally() throws Exception { // Add some data to each table and perform a third read. // Expect to see all records be synced via cursorBased method and not ctid - - database.execute(ctx -> { - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (4,'Hooper','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (43, 'Iron Man', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - }); + testdb.with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (4,'Hooper','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (43, 'Iron Man', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName); final List messagesFromThirdSync = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); @@ -751,7 +665,7 @@ protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configure protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", @@ -760,7 +674,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String COL_LAST_VISITED_AT, "2006-10-19T17:23:54.123456Z", COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456"))))); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_5, COL_NAME, "data", @@ -771,7 +685,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String final DbStreamState state = new CursorBasedStatus() .withStateType(StateType.CURSOR_BASED) .withVersion(2L) - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5") diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java index 7eee25049e55..463484952671 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java @@ -11,7 +11,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import io.airbyte.cdk.db.jdbc.DateTimeConverter; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.json.Jsons; import java.sql.Connection; import java.sql.PreparedStatement; @@ -34,7 +33,7 @@ class PostgresSourceOperationsTest { @BeforeEach public void init() { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf"); } @AfterEach @@ -64,7 +63,7 @@ public void numericColumnAsCursor() throws SQLException { } final List actualRecords = new ArrayList<>(); - try (final Connection connection = testdb.container.createConnection("")) { + try (final Connection connection = testdb.getContainer().createConnection("")) { final PreparedStatement preparedStatement = connection.prepareStatement( "SELECT * FROM " + tableName + " WHERE " + cursorColumn + " > ?"); postgresSourceOperations.setCursorField(preparedStatement, @@ -104,7 +103,7 @@ public void timeColumnAsCursor() throws SQLException { } final List actualRecords = new ArrayList<>(); - try (final Connection connection = testdb.container.createConnection("")) { + try (final Connection connection = testdb.getContainer().createConnection("")) { final PreparedStatement preparedStatement = connection.prepareStatement( "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); postgresSourceOperations.setCursorField(preparedStatement, @@ -137,7 +136,7 @@ public void testParseMoneyValue() { } protected void executeQuery(final String query) throws SQLException { - try (final Connection connection = testdb.container.createConnection("")) { + try (final Connection connection = testdb.getContainer().createConnection("")) { connection.createStatement().execute(query); } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java index 414317dc1626..1013adad1243 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java @@ -17,7 +17,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; @@ -77,20 +76,16 @@ class PostgresSourceSSLTest { @BeforeEach void setup() throws Exception { - testdb = PostgresTestDatabase.make("marcosmarxm/postgres-ssl:dev", "withSSL"); - testdb.database.query(ctx -> { - ctx.fetch( - "CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));"); - ctx.fetch("CREATE INDEX i1 ON id_and_name (id);"); - ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - ctx.fetch("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);"); - ctx.fetch("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - ctx.fetch( - "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));"); - ctx.fetch( - "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); - return null; - }); + testdb = PostgresTestDatabase.in("marcosmarxm/postgres-ssl:dev", "withSSL") + .with("CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);") + .with("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with( + "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));") + .with( + "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); } @AfterEach @@ -99,16 +94,10 @@ void tearDown() { } private JsonNode getConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of("public")) - .put(JdbcUtils.USERNAME_KEY, testdb.userName) - .put(JdbcUtils.PASSWORD_KEY, testdb.password) - .put(JdbcUtils.SSL_KEY, true) - .put("ssl_mode", ImmutableMap.builder().put("mode", "require").build()) - .build()); + return testdb.testConfigBuilder() + .withSchemas("public") + .withSsl(ImmutableMap.builder().put("mode", "require").build()) + .build(); } @Test diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java index a1069c9b00b4..9710bbdbc57a 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java @@ -26,7 +26,6 @@ import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo; import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.exceptions.ConfigErrorException; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; @@ -135,27 +134,21 @@ class PostgresSourceTest { private PostgresTestDatabase testdb; @BeforeEach - void setup() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye"); - testdb.database.query(ctx -> { - ctx.fetch( - "CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));"); - ctx.fetch("CREATE INDEX i1 ON id_and_name (id);"); - ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);"); - ctx.fetch("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch( - "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));"); - ctx.fetch( - "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); - return null; - }); + void setup() { + testdb = PostgresTestDatabase.in("postgres:16-bullseye") + .with("CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);") + .with("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with( + "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));") + .with("INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', " + + "'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); } @AfterEach - void tearDown() throws SQLException { + void tearDown() { testdb.close(); } @@ -182,17 +175,17 @@ private static Database getDatabase(final DSLContext dslContext) { } private JsonNode getConfig() { - return getConfig(testdb.userName, testdb.password); + return getConfig(testdb.getUserName(), testdb.getPassword()); } private JsonNode getConfig(final String user, final String password) { - return getConfig(testdb.dbName, user, password); + return getConfig(testdb.getDatabaseName(), user, password); } private JsonNode getConfig(final String dbName, final String user, final String password) { return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) + .put(JdbcUtils.HOST_KEY, testdb.getContainer().getHost()) + .put(JdbcUtils.PORT_KEY, testdb.getContainer().getFirstMappedPort()) .put(JdbcUtils.DATABASE_KEY, dbName) .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) .put(JdbcUtils.USERNAME_KEY, user) @@ -201,13 +194,6 @@ private JsonNode getConfig(final String dbName, final String user, final String .build()); } - private JsonNode getConfig(PostgresTestDatabase db) { - return Jsons.jsonNode(db.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.SSL_KEY, false) - .build()); - } - @Test public void testCanReadTablesAndColumnsWithDoubleQuotes() throws Exception { final AirbyteCatalog airbyteCatalog = new AirbyteCatalog().withStreams(List.of( @@ -218,7 +204,7 @@ public void testCanReadTablesAndColumnsWithDoubleQuotes() throws Exception { Field.of("\"test_column\"", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(List.of(List.of("id"))))); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("CREATE TABLE \"\"\"test_dq_table\"\"\"(id INTEGER PRIMARY KEY, \"\"\"test_column\"\"\" varchar);"); ctx.fetch("INSERT INTO \"\"\"test_dq_table\"\"\" (id, \"\"\"test_column\"\"\") VALUES (1,'test1'), (2, 'test2');"); return null; @@ -230,28 +216,17 @@ public void testCanReadTablesAndColumnsWithDoubleQuotes() throws Exception { null)); setEmittedAtToNull(actualMessages); assertEquals(DOUBLE_QUOTED_MESSAGES, actualMessages); - testdb.database.query(ctx -> ctx.execute("DROP TABLE \"\"\"test_dq_table\"\"\";")); + testdb.query(ctx -> ctx.execute("DROP TABLE \"\"\"test_dq_table\"\"\";")); } @Test public void testCanReadUtf8() throws Exception { // force the db server to start with sql_ascii encoding to verify the source can read UTF8 even when // default settings are in another encoding - try (final var asciiTestDB = PostgresTestDatabase.make("postgres:16-alpine", "withASCII")) { - asciiTestDB.database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,E'\\u2013 someutfstring'), (2, E'\\u2215');"); - return null; - }); - final var config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, asciiTestDB.container.getHost()) - .put(JdbcUtils.PORT_KEY, asciiTestDB.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, asciiTestDB.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.USERNAME_KEY, asciiTestDB.userName) - .put(JdbcUtils.PASSWORD_KEY, asciiTestDB.password) - .put(JdbcUtils.SSL_KEY, false) - .build()); + try (final var asciiTestDB = PostgresTestDatabase.in("postgres:16-alpine", "withASCII") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,E'\\u2013 someutfstring'), (2, E'\\u2215');")) { + final var config = asciiTestDB.testConfigBuilder().withSchemas(SCHEMA_NAME).withoutSsl().build(); final Set actualMessages = MoreIterators.toSet(source().read(config, CONFIGURED_CATALOG, null)); setEmittedAtToNull(actualMessages); assertEquals(UTF8_MESSAGES, actualMessages); @@ -260,14 +235,14 @@ public void testCanReadUtf8() throws Exception { @Test void testUserDoesntHasPrivilegesToSelectTable() throws Exception { - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.execute("DROP TABLE id_and_name CASCADE;"); ctx.execute("DROP TABLE id_and_name2 CASCADE;"); ctx.execute("DROP TABLE names CASCADE;"); ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'John'), (2, 'Alfred'), (3, 'Alex');"); ctx.fetch("CREATE USER test_user_3 password '132';"); - ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.dbName + " TO test_user_3;"); + ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.getDatabaseName() + " TO test_user_3;"); ctx.fetch("GRANT ALL ON SCHEMA public TO test_user_3"); ctx.fetch("REVOKE ALL PRIVILEGES ON TABLE public.id_and_name FROM test_user_3"); return null; @@ -308,7 +283,7 @@ void testDiscoverWithPk() throws Exception { @Test void testDiscoverRecursiveRolePermissions() throws Exception { - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.execute("DROP TABLE id_and_name CASCADE;"); ctx.execute("DROP TABLE id_and_name2 CASCADE;"); ctx.execute("DROP TABLE names CASCADE;"); @@ -331,7 +306,7 @@ void testDiscoverRecursiveRolePermissions() throws Exception { ctx.fetch("GRANT airbyte TO test_user_4;"); ctx.fetch("CREATE TABLE unseen(id INTEGER, name VARCHAR(200));"); - ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.dbName + " TO test_user_4;"); + ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.getDatabaseName() + " TO test_user_4;"); return null; }); final var config = getConfig(); @@ -355,7 +330,7 @@ void testDiscoverRecursiveRolePermissions() throws Exception { @Test void testDiscoverDifferentGrantAvailability() throws Exception { final JsonNode config = getConfig(); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("create table not_granted_table_name_1(column_1 integer);"); ctx.fetch("create table not_granted_table_name_2(column_1 integer);"); ctx.fetch("create table not_granted_table_name_3(column_1 integer);"); @@ -410,7 +385,7 @@ void testDiscoverDifferentGrantAvailability() throws Exception { ctx.fetch("create user new_test_user;"); ctx.fetch("ALTER USER new_test_user WITH PASSWORD 'new_pass';"); - ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.dbName + " TO new_test_user;"); + ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.getDatabaseName() + " TO new_test_user;"); ctx.fetch("GRANT ALL ON SCHEMA public TO test_user_4"); ctx.fetch("grant test_role to new_test_user;"); @@ -468,7 +443,7 @@ void testReadSuccess() throws Exception { @Test void testReadIncrementalSuccess() throws Exception { // We want to test ordering, so we can delete the NaN entry and add a 3. - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (3, 'gohan', 222.1);"); return null; @@ -504,7 +479,7 @@ void testReadIncrementalSuccess() throws Exception { final AirbyteStateMessage lastEmittedState = stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1); final JsonNode state = Jsons.jsonNode(List.of(lastEmittedState)); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (5, 'piccolo', 100.0);"); return null; }); @@ -583,7 +558,7 @@ void testGetUsername() { @Test public void tableWithInvalidCursorShouldThrowException() throws Exception { - final ConfiguredAirbyteStream tableWithInvalidCursorType = createTableWithInvalidCursorType(testdb.database); + final ConfiguredAirbyteStream tableWithInvalidCursorType = createTableWithInvalidCursorType(testdb.getDatabase()); final ConfiguredAirbyteCatalog configuredAirbyteCatalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(tableWithInvalidCursorType)); @@ -633,7 +608,7 @@ private JsonNode buildConfigEscapingNeeded() { @Test public void tableWithNullValueCursorShouldThrowException() throws SQLException { - final ConfiguredAirbyteStream table = createTableWithNullValueCursor(testdb.database); + final ConfiguredAirbyteStream table = createTableWithNullValueCursor(testdb.getDatabase()); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); @@ -664,7 +639,7 @@ private ConfiguredAirbyteStream createTableWithNullValueCursor(final Database da @Test public void viewWithNullValueCursorShouldThrowException() throws SQLException { - final ConfiguredAirbyteStream table = createViewWithNullValueCursor(testdb.database); + final ConfiguredAirbyteStream table = createViewWithNullValueCursor(testdb.getDatabase()); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(source().read(getConfig(), catalog, null))); @@ -733,7 +708,7 @@ void testParseJdbcParameters() { public void testJdbcOptionsParameter() throws Exception { // Populate DB. final JsonNode dbConfig = getConfig(); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("CREATE TABLE id_and_bytes (id INTEGER, bytes BYTEA);"); ctx.fetch("INSERT INTO id_and_bytes (id, bytes) VALUES (1, decode('DEADBEEF', 'hex'));"); return null; @@ -771,7 +746,7 @@ public void testJdbcOptionsParameter() throws Exception { @DisplayName("Make sure initial incremental load is reading records in a certain order") void testReadIncrementalRecordOrder() throws Exception { // We want to test ordering, so we can delete the NaN entry - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); for (int i = 3; i < 1000; i++) { ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (%d, 'gohan%d', 222.1);".formatted(i, i)); diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java index 1383d04e60d1..da941383d626 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java @@ -16,11 +16,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.Source; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; @@ -38,7 +35,6 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.math.BigDecimal; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -99,48 +95,31 @@ protected String getDatabaseImageName() { } @BeforeEach - protected void setup() throws SQLException { - testdb = PostgresTestDatabase.make(getDatabaseImageName()); - testdb.database.query(ctx -> { - ctx.fetch( - "CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));"); - ctx.fetch("CREATE INDEX i1 ON id_and_name (id);"); - ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);"); - ctx.fetch("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch( - "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));"); - ctx.fetch( - "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); - return null; - }); + protected void setup() { + testdb = PostgresTestDatabase.in(getDatabaseImageName()) + .with("CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);") + .with("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with( + "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));") + .with( + "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); } @AfterEach - protected void tearDown() throws SQLException { + protected void tearDown() { testdb.close(); } protected JsonNode getXminConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.USERNAME_KEY, testdb.userName) - .put(JdbcUtils.PASSWORD_KEY, testdb.password) - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", getReplicationMethod()) - .put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) - .build()); - } - - private JsonNode getReplicationMethod() { - return Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Xmin") - .build()); + return testdb.testConfigBuilder() + .withSchemas(SCHEMA_NAME) + .withoutSsl() + .withXminReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); } protected Source source() { @@ -254,7 +233,7 @@ void testReadSuccess() throws Exception { // We add some data and perform a third read. We should verify that (i) a delete is not captured and // (ii) the new record that is inserted into the // table is read. - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (3, 'gohan', 222.1);"); return null; diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java index 731c3c423471..65562b84ada1 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java @@ -76,7 +76,7 @@ void testReadSuccess() throws Exception { // We add some data and perform a third read. We should verify that (i) a delete is not captured and // (ii) the new record that is inserted into the // table is read. - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (3, 'gohan', 222.1);"); return null; diff --git a/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java new file mode 100644 index 000000000000..b92c319d9eec --- /dev/null +++ b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.postgres; + +import io.airbyte.cdk.testutils.ContainerFactory; +import java.io.IOException; +import java.io.UncheckedIOException; +import org.testcontainers.containers.Network; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.MountableFile; + +public class PostgresContainerFactory implements ContainerFactory> { + + @Override + public PostgreSQLContainer createNewContainer(DockerImageName imageName) { + return new PostgreSQLContainer<>(imageName.asCompatibleSubstituteFor("postgres")); + + } + + @Override + public Class getContainerClass() { + return PostgreSQLContainer.class; + } + + /** + * Apply the postgresql.conf file that we've packaged as a resource. + */ + public void withConf(PostgreSQLContainer container) { + container + .withCopyFileToContainer( + MountableFile.forClasspathResource("postgresql.conf"), + "/etc/postgresql/postgresql.conf") + .withCommand("postgres -c config_file=/etc/postgresql/postgresql.conf"); + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(PostgreSQLContainer container) { + container.withNetwork(Network.newNetwork()); + } + + /** + * Configure postgres with wal_level=logical. + */ + public void withWalLevelLogical(PostgreSQLContainer container) { + container.withCommand("postgres -c wal_level=logical"); + } + + /** + * Generate SSL certificates and tell postgres to enable SSL and use them. + */ + public void withCert(PostgreSQLContainer container) { + container.start(); + String[] commands = { + "psql -U test -c \"CREATE USER postgres WITH PASSWORD 'postgres';\"", + "psql -U test -c \"GRANT CONNECT ON DATABASE \"test\" TO postgres;\"", + "psql -U test -c \"ALTER USER postgres WITH SUPERUSER;\"", + "openssl ecparam -name prime256v1 -genkey -noout -out ca.key", + "openssl req -new -x509 -sha256 -key ca.key -out ca.crt -subj \"/CN=127.0.0.1\"", + "openssl ecparam -name prime256v1 -genkey -noout -out server.key", + "openssl req -new -sha256 -key server.key -out server.csr -subj \"/CN=localhost\"", + "openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256", + "cp server.key /etc/ssl/private/", + "cp server.crt /etc/ssl/private/", + "cp ca.crt /etc/ssl/private/", + "chmod og-rwx /etc/ssl/private/server.* /etc/ssl/private/ca.*", + "chown postgres:postgres /etc/ssl/private/server.crt /etc/ssl/private/server.key /etc/ssl/private/ca.crt", + "echo \"ssl = on\" >> /var/lib/postgresql/data/postgresql.conf", + "echo \"ssl_cert_file = '/etc/ssl/private/server.crt'\" >> /var/lib/postgresql/data/postgresql.conf", + "echo \"ssl_key_file = '/etc/ssl/private/server.key'\" >> /var/lib/postgresql/data/postgresql.conf", + "echo \"ssl_ca_file = '/etc/ssl/private/ca.crt'\" >> /var/lib/postgresql/data/postgresql.conf", + "mkdir root/.postgresql", + "echo \"hostssl all all 127.0.0.1/32 cert clientcert=verify-full\" >> /var/lib/postgresql/data/pg_hba.conf", + "openssl ecparam -name prime256v1 -genkey -noout -out client.key", + "openssl req -new -sha256 -key client.key -out client.csr -subj \"/CN=postgres\"", + "openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256", + "cp client.crt ~/.postgresql/postgresql.crt", + "cp client.key ~/.postgresql/postgresql.key", + "chmod 0600 ~/.postgresql/postgresql.crt ~/.postgresql/postgresql.key", + "cp ca.crt root/.postgresql/ca.crt", + "chown postgres:postgres ~/.postgresql/ca.crt", + "psql -U test -c \"SELECT pg_reload_conf();\"", + }; + for (String cmd : commands) { + try { + container.execInContainer("su", "-c", cmd); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + /** + * Tell postgres to enable SSL. + */ + public void withSSL(PostgreSQLContainer container) { + container.withCommand("postgres " + + "-c ssl=on " + + "-c ssl_cert_file=/var/lib/postgresql/server.crt " + + "-c ssl_key_file=/var/lib/postgresql/server.key"); + } + + /** + * Configure postgres with client_encoding=sql_ascii. + */ + public void withASCII(PostgreSQLContainer container) { + container.withCommand("postgres -c client_encoding=sql_ascii"); + } + +} diff --git a/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java new file mode 100644 index 000000000000..69c7f37eaaa8 --- /dev/null +++ b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.postgres; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.testutils.TestDatabase; +import io.airbyte.commons.json.Jsons; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.testcontainers.containers.PostgreSQLContainer; + +public class PostgresTestDatabase extends + TestDatabase, PostgresTestDatabase, PostgresTestDatabase.PostgresConfigBuilder> { + + static public PostgresTestDatabase in(String imageName, String... methods) { + final var container = new PostgresContainerFactory().shared(imageName, methods); + return new PostgresTestDatabase(container).initialized(); + } + + public PostgresTestDatabase(PostgreSQLContainer container) { + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of(psqlCmd(Stream.of( + String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER %s PASSWORD '%s'", getUserName(), getPassword()), + String.format("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", getDatabaseName(), getUserName()), + String.format("ALTER USER %s WITH SUPERUSER", getUserName())))); + } + + /** + * Close resources held by this instance. This deliberately avoids dropping the database, which is + * really expensive in Postgres. This is because a DROP DATABASE in Postgres triggers a CHECKPOINT. + * Call {@link #dropDatabaseAndUser} to explicitly drop the database and the user. + */ + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + /** + * Drop the database owned by this instance. + */ + public void dropDatabaseAndUser() { + execInContainer(psqlCmd(Stream.of( + String.format("DROP DATABASE %s", getDatabaseName()), + String.format("DROP OWNED BY %s", getUserName()), + String.format("DROP USER %s", getUserName())))); + } + + public Stream psqlCmd(Stream sql) { + return Stream.concat( + Stream.of("psql", + "-d", getContainer().getDatabaseName(), + "-U", getContainer().getUsername(), + "-v", "ON_ERROR_STOP=1", + "-a"), + sql.flatMap(stmt -> Stream.of("-c", stmt))); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.POSTGRESQL; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.POSTGRES; + } + + private Certificates cachedCerts; + + public synchronized Certificates getCertificates() { + if (cachedCerts == null) { + final String caCert, clientKey, clientCert; + try { + caCert = getContainer().execInContainer("su", "-c", "cat ca.crt").getStdout().trim(); + clientKey = getContainer().execInContainer("su", "-c", "cat client.key").getStdout().trim(); + clientCert = getContainer().execInContainer("su", "-c", "cat client.crt").getStdout().trim(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + cachedCerts = new Certificates(caCert, clientCert, clientKey); + } + return cachedCerts; + } + + public record Certificates(String caCertificate, String clientCertificate, String clientKey) {} + + @Override + public PostgresConfigBuilder configBuilder() { + return new PostgresConfigBuilder(this); + } + + public String getReplicationSlotName() { + return withNamespace("debezium_slot"); + } + + public String getPublicationName() { + return withNamespace("publication"); + } + + public PostgresTestDatabase withReplicationSlot() { + return this + .with("SELECT pg_create_logical_replication_slot('%s', 'pgoutput');", getReplicationSlotName()) + .onClose("SELECT pg_drop_replication_slot('%s');", getReplicationSlotName()); + } + + public PostgresTestDatabase withPublicationForAllTables() { + return this + .with("CREATE PUBLICATION %s FOR ALL TABLES;", getPublicationName()) + .onClose("DROP PUBLICATION %s CASCADE;", getPublicationName()); + } + + static public class PostgresConfigBuilder extends ConfigBuilder { + + protected PostgresConfigBuilder(PostgresTestDatabase testdb) { + super(testdb); + } + + public PostgresConfigBuilder withSchemas(String... schemas) { + return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas)); + } + + public PostgresConfigBuilder withStandardReplication() { + return with("replication_method", ImmutableMap.builder().put("method", "Standard").build()); + } + + public PostgresConfigBuilder withCdcReplication() { + return withCdcReplication("While reading Data"); + } + + public PostgresConfigBuilder withCdcReplication(String LsnCommitBehaviour) { + return this + .with("is_test", true) + .with("replication_method", Jsons.jsonNode(ImmutableMap.builder() + .put("method", "CDC") + .put("replication_slot", testDatabase.getReplicationSlotName()) + .put("publication", testDatabase.getPublicationName()) + .put("initial_waiting_seconds", DEFAULT_CDC_REPLICATION_INITIAL_WAIT.getSeconds()) + .put("lsn_commit_behaviour", LsnCommitBehaviour) + .build())); + } + + public PostgresConfigBuilder withXminReplication() { + return this.with("replication_method", Jsons.jsonNode(ImmutableMap.builder().put("method", "Xmin").build())); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java index e7258bc07e8b..d80a2558ef1b 100644 --- a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java +++ b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java @@ -47,10 +47,7 @@ public JsonNode toDatabaseConfig(final JsonNode redshiftConfig) { final ImmutableMap.Builder builder = ImmutableMap.builder() .put(JdbcUtils.USERNAME_KEY, redshiftConfig.get(JdbcUtils.USERNAME_KEY).asText()) .put(JdbcUtils.PASSWORD_KEY, redshiftConfig.get(JdbcUtils.PASSWORD_KEY).asText()) - .put(JdbcUtils.JDBC_URL_KEY, String.format(DatabaseDriver.REDSHIFT.getUrlFormatString(), - redshiftConfig.get(JdbcUtils.HOST_KEY).asText(), - redshiftConfig.get(JdbcUtils.PORT_KEY).asInt(), - redshiftConfig.get(JdbcUtils.DATABASE_KEY).asText())); + .put(JdbcUtils.JDBC_URL_KEY, getJdbcUrl(redshiftConfig)); if (redshiftConfig.has(JdbcUtils.SCHEMAS_KEY) && redshiftConfig.get(JdbcUtils.SCHEMAS_KEY).isArray()) { schemas = new ArrayList<>(); @@ -75,6 +72,13 @@ public JsonNode toDatabaseConfig(final JsonNode redshiftConfig) { .build()); } + public static String getJdbcUrl(final JsonNode redshiftConfig) { + return String.format(DatabaseDriver.REDSHIFT.getUrlFormatString(), + redshiftConfig.get(JdbcUtils.HOST_KEY).asText(), + redshiftConfig.get(JdbcUtils.PORT_KEY).asInt(), + redshiftConfig.get(JdbcUtils.DATABASE_KEY).asText()); + } + private void addSsl(final List additionalProperties) { additionalProperties.add("ssl=true"); additionalProperties.add("sslfactory=com.amazon.redshift.ssl.NonValidatingFactory"); diff --git a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java index 487fe6da1c29..2f3b9f169ee3 100644 --- a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java +++ b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java @@ -14,6 +14,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import org.slf4j.Logger; @@ -23,6 +24,19 @@ public class RedshiftSourceOperations extends JdbcSourceOperations { private static final Logger LOGGER = LoggerFactory.getLogger(RedshiftSourceOperations.class); + @Override + public void copyToJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json) throws SQLException { + if ("timestamptz".equalsIgnoreCase(resultSet.getMetaData().getColumnTypeName(colIndex))) { + // Massive hack. Sometimes the JDBCType is TIMESTAMP (i.e. without timezone) + // even though it _should_ be TIMESTAMP_WITH_TIMEZONE. + // Check for this case explicitly. + final String columnName = resultSet.getMetaData().getColumnName(colIndex); + putTimestampWithTimezone(json, columnName, resultSet, colIndex); + } else { + super.copyToJsonField(resultSet, colIndex, json); + } + } + @Override protected void putTime(final ObjectNode node, final String columnName, @@ -44,6 +58,17 @@ protected void setTimestamp(final PreparedStatement preparedStatement, final int preparedStatement.setTimestamp(parameterIndex, Timestamp.valueOf(date)); } + @Override + protected void putTimestampWithTimezone(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) + throws SQLException { + try { + super.putTimestampWithTimezone(node, columnName, resultSet, index); + } catch (final Exception e) { + final Instant instant = resultSet.getTimestamp(index).toInstant(); + node.put(columnName, instant.toString()); + } + } + @Override protected void setDate(final PreparedStatement preparedStatement, final int parameterIndex, final String value) throws SQLException { final LocalDate date = LocalDate.parse(value); diff --git a/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java new file mode 100644 index 000000000000..856f6e9d1e6e --- /dev/null +++ b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.io.airbyte.integration_tests.sources; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.source.jdbc.JdbcDataSourceUtils; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.redshift.RedshiftSource; +import io.airbyte.integrations.source.redshift.RedshiftSourceOperations; +import java.nio.file.Path; +import java.sql.SQLException; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.util.List; +import javax.sql.DataSource; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class RedshiftSourceOperationsTest { + + private JdbcDatabase database; + + @BeforeEach + void setup() { + final JsonNode config = Jsons.deserialize(IOs.readFile(Path.of("secrets/config.json"))); + + final DataSource dataSource = DataSourceFactory.create( + config.get("username").asText(), + config.get("password").asText(), + DatabaseDriver.REDSHIFT.getDriverClassName(), + RedshiftSource.getJdbcUrl(config), + JdbcDataSourceUtils.getConnectionProperties(config)); + database = new DefaultJdbcDatabase(dataSource, new RedshiftSourceOperations()); + } + + @Test + void testTimestampWithTimezone() throws SQLException { + // CURRENT_TIMESTAMP is converted to a string by queryJsons. + // CAST(CURRENT_TIMESTAMP AS VARCHAR) does the timestamp -> string conversion on the server side. + // If queryJsons is implemented correctly, both timestamps should be the same. + final List result = database.queryJsons("SELECT CURRENT_TIMESTAMP, CAST(CURRENT_TIMESTAMP AS VARCHAR)"); + + final Instant clientSideParse = Instant.parse(result.get(0).get("timestamptz").asText()); + // Redshift's default timestamp format is "2023-11-17 17:50:36.746606+00", which Instant.parse() + // can't handle. Build a custom datetime formatter. + // (Redshift supports server-side timestamp formatting, but it doesn't provide a way to force + // HH:MM offsets, which are required by Instant.parse) + final Instant serverSideParse = new DateTimeFormatterBuilder() + .append(DateTimeFormatter.ISO_DATE) + .appendLiteral(' ') + .append(DateTimeFormatter.ISO_LOCAL_TIME) + // "X" represents a +/-HH offset + .appendPattern("X") + .toFormatter() + .parse(result.get(0).get("varchar").asText(), Instant::from); + assertEquals(serverSideParse, clientSideParse); + } + +} diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 27c20f59f30f..4c99fb3cc67a 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.2.1 + dockerImageTag: 4.2.2 dockerRepository: airbyte/source-s3 documentationUrl: https://docs.airbyte.com/integrations/sources/s3 githubIssueLabel: source-s3 diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index 01f67fad69b2..d8bfbd5b16bc 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -157,7 +157,7 @@ def _page( break def _handle_file(self, file): - if file["Key"].endswith("zip"): + if file["Key"].endswith(".zip"): yield from self._handle_zip_file(file) else: yield self._handle_regular_file(file) diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle b/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle index 748487784fab..516c96a06808 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle @@ -4,13 +4,11 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.2.0' + cdkVersionRequired = '0.5.0' features = ['db-sources'] - useLocalCdk = true + useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() - application { mainClass = 'io.airbyte.integrations.source.scaffold-java-jdbc.ScaffoldJavaJdbcSource' } @@ -20,6 +18,9 @@ dependencies { //TODO Add jdbc driver import here. Ex: implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' testImplementation 'org.apache.commons:commons-lang3:3.11' + testImplementation libs.testcontainers.jdbc integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-scaffold-java-jdbc') + + testFixturesImplementation libs.testcontainers.jdbc } diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java index 925eeae95e4d..b911468604e9 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java @@ -12,24 +12,22 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConnectorSpecification; import java.util.HashMap; +import org.junit.jupiter.api.Disabled; +@Disabled public class ScaffoldJavaJdbcSourceAcceptanceTest extends SourceAcceptanceTest { - private JsonNode config; + private ScaffoldJavaJdbcTestDatabase testdb; @Override protected void setupEnvironment(final TestDestinationEnv testEnv) { - // TODO create new container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g");" - // TODO make container started. Ex: "container.start();" - // TODO init JsonNode config - // TODO crete airbyte Database object "Databases.createJdbcDatabase(...)" - // TODO insert test data to DB. Ex: "database.execute(connection-> ...)" - // TODO close Database. Ex: "database.close();" + // TODO: create new TestDatabase instance and assign `testdb` to it. + // TODO: use it to create and populate test tables in the database. } @Override protected void tearDown(final TestDestinationEnv testEnv) { - // TODO close container that was initialized in setup() method. Ex: "container.close();" + testdb.close(); } @Override @@ -44,7 +42,8 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + // TODO: (optional) call more builder methods. + return testdb.integrationTestConfigBuilder().build(); } @Override diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java index 94a4db3070d3..70990256b9b8 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java @@ -5,44 +5,32 @@ package io.airbyte.integrations.source.scaffold_java_jdbc; import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -import java.sql.JDBCType; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class ScaffoldJavaJdbcJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +@Disabled +class ScaffoldJavaJdbcJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { private static final Logger LOGGER = LoggerFactory.getLogger(ScaffoldJavaJdbcJdbcSourceAcceptanceTest.class); - // TODO declare a test container for DB. EX: org.testcontainers.containers.OracleContainer - - @BeforeAll - static void init() { - // Oracle returns uppercase values - // TODO init test container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g")" - // TODO start container. Ex: "container.start();" - } - - @BeforeEach - public void setup() throws Exception { - // TODO init config. Ex: "config = Jsons.jsonNode(ImmutableMap.builder().put("host", - // host).put("port", port)....build()); - super.setup(); + @Override + protected JsonNode config() { + // TODO: (optional) call more builder methods. + return testdb.testConfigBuilder().build(); } - @AfterEach - public void tearDown() { - // TODO clean used resources + @Override + protected ScaffoldJavaJdbcSource source() { + // TODO: (optional) call `setFeatureFlags` before returning the source to mock setting env vars. + return new ScaffoldJavaJdbcSource(); } @Override - public AbstractJdbcSource getSource() { - return new ScaffoldJavaJdbcSource(); + protected ScaffoldJavaJdbcTestDatabase createTestDatabase() { + // TODO: return a suitable TestDatabase instance. + return new ScaffoldJavaJdbcTestDatabase(null).initialized(); } @Override @@ -51,25 +39,4 @@ public boolean supportsSchemas() { return false; } - @Override - public JsonNode getConfig() { - return config; - } - - @Override - public String getDriverClass() { - return ScaffoldJavaJdbcSource.DRIVER_CLASS; - } - - @Override - public AbstractJdbcSource getJdbcSource() { - // TODO - return null; - } - - @AfterAll - static void cleanUp() { - // TODO close the container. Ex: "container.close();" - } - } diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java new file mode 100644 index 000000000000..4e0c24508217 --- /dev/null +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.scaffold_java_jdbc; + +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.testutils.TestDatabase; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.testcontainers.containers.JdbcDatabaseContainer; + +public class ScaffoldJavaJdbcTestDatabase + extends TestDatabase, ScaffoldJavaJdbcTestDatabase, ScaffoldJavaJdbcTestDatabase.ScaffoldJavaJdbcConfigBuilder> { + + public ScaffoldJavaJdbcTestDatabase(JdbcDatabaseContainer container) { + // TODO: (optional) consider also implementing a ContainerFactory to share testcontainer instances. + // Effective use requires parallelizing the tests using JUnit instead of gradle. + // This is best achieved by adding a `gradle.properties` file containing + // `testExecutionConcurrency=-1`. + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + // TODO: return a stream of streams of command args to be passed to `execInContainer` calls to set + // up the test state. + // This usually involves the execution of CREATE DATABASE and CREATE USER statements as root. + return Stream.empty(); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + // TODO: (optional) return a stream of command args to be passed to a `execInContainer` call to + // clean up the test state. + return Stream.empty(); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + // TODO: return a suitable value. + return DatabaseDriver.POSTGRESQL; + } + + @Override + public SQLDialect getSqlDialect() { + // TODO: return a suitable value. + return SQLDialect.DEFAULT; + } + + @Override + public ScaffoldJavaJdbcConfigBuilder configBuilder() { + // TODO: flesh out the ConfigBuilder subclass and return a new instance of it here. + return new ScaffoldJavaJdbcConfigBuilder(this); + } + + public static class ScaffoldJavaJdbcConfigBuilder extends TestDatabase.ConfigBuilder { + + public ScaffoldJavaJdbcConfigBuilder(ScaffoldJavaJdbcTestDatabase testDatabase) { + super(testDatabase); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml b/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml index 0dda354b4266..dfdddbb6ca31 100644 --- a/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml @@ -13,6 +13,8 @@ acceptance_tests: discovery: tests: - config_path: "secrets/config.json" + backward_compatibility_tests_config: + disable_for_version: 4.4.2 basic_read: tests: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json index e34da831b7be..97d865ec3c49 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json @@ -163,14 +163,14 @@ { "type": "STREAM", "stream": { - "stream_state": { "expires_at": 10000000000 }, + "stream_state": { "updated": 10000000000 }, "stream_descriptor": { "name": "checkout_sessions" } } }, { "type": "STREAM", "stream": { - "stream_state": { "checkout_session_expires_at": 10000000000 }, + "stream_state": { "checkout_session_updated": 10000000000 }, "stream_descriptor": { "name": "checkout_sessions_line_items" } } }, diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json index fc3ccb073b53..281642987467 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json @@ -143,11 +143,11 @@ "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["checkout_session_expires_at"], + "default_cursor_field": ["checkout_session_updated"], "source_defined_primary_key": [["id"]] }, "primary_key": [["id"]], - "cursor_field": ["checkout_session_expires_at"], + "cursor_field": ["checkout_session_updated"], "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, @@ -459,11 +459,11 @@ "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["updated"], + "default_cursor_field": ["created"], "source_defined_primary_key": [["id"]] }, "primary_key": [["id"]], - "cursor_field": ["updated"], + "cursor_field": ["created"], "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl index a3732127059f..da9e7ed4ea18 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl @@ -1,12 +1,12 @@ -{"stream": "checkout_sessions_line_items", "data": {"id": "li_1O2XZ1EcXtiJtvvh26q22omU", "object": "item", "amount_discount": 0, "amount_subtotal": 3400, "amount_tax": 0, "amount_total": 3400, "currency": "usd", "description": "Test Product 1", "discounts": [], "price": {"id": "price_1MX364EcXtiJtvvh6jKcimNL", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1675345504, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NHcKselSHfKdfc", "recurring": null, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "one_time", "unit_amount": 1700, "unit_amount_decimal": "1700"}, "quantity": 2, "taxes": [], "checkout_session_id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "checkout_session_expires_at": 1697713523}, "emitted_at": 1697627220862} +{"stream": "checkout_sessions_line_items", "data": {"checkout_session_id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "checkout_session_expires_at": 1697713523, "checkout_session_created": 1697627124, "checkout_session_updated": 1697627124, "id": "li_1O2XZ1EcXtiJtvvh26q22omU", "object": "item", "amount_discount": 0, "amount_subtotal": 3400, "amount_tax": 0, "amount_total": 3400, "currency": "usd", "description": "Test Product 1", "discounts": [], "price": {"id": "price_1MX364EcXtiJtvvh6jKcimNL", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1675345504, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NHcKselSHfKdfc", "recurring": null, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "one_time", "unit_amount": 1700, "unit_amount_decimal": "1700"}, "quantity": 2, "taxes": []}, "emitted_at": 1699376426293} {"stream": "customer_balance_transactions", "data": {"id": "cbtxn_1MX2zPEcXtiJtvvhr4L2D3Q1", "object": "customer_balance_transaction", "amount": -50000.0, "created": 1675345091, "credit_note": null, "currency": "usd", "customer": "cus_NGoTFiJFVbSsvZ", "description": null, "ending_balance": 0.0, "invoice": "in_1MX2yFEcXtiJtvvhMXhUCgKx", "livemode": false, "metadata": {}, "type": "applied_to_invoice"}, "emitted_at": 1697627222916} {"stream": "customer_balance_transactions", "data": {"id": "cbtxn_1MWIPLEcXtiJtvvhLnQYjVCj", "object": "customer_balance_transaction", "amount": 50000.0, "created": 1675166031, "credit_note": null, "currency": "usd", "customer": "cus_NGoTFiJFVbSsvZ", "description": "Test credit balance", "ending_balance": 50000.0, "invoice": null, "livemode": false, "metadata": {}, "type": "adjustment"}, "emitted_at": 1697627222918} {"stream": "setup_attempts", "data": {"id": "setatt_1KnfIjEcXtiJtvvhqDfSlpM4", "object": "setup_attempt", "application": null, "created": 1649752937, "customer": null, "flow_directions": null, "livemode": false, "on_behalf_of": null, "payment_method": "pm_1KnfIj2eZvKYlo2CAlv2Vhqc", "payment_method_details": {"acss_debit": {}, "type": "acss_debit"}, "setup_error": null, "setup_intent": "seti_1KnfIjEcXtiJtvvhPw5znVKY", "status": "succeeded", "usage": "off_session"}, "emitted_at": 1697627241471} {"stream": "setup_attempts", "data": {"id": "setatt_1KnfIdEcXtiJtvvhpDrYVlRP", "object": "setup_attempt", "application": null, "created": 1649752931, "customer": null, "flow_directions": null, "livemode": false, "on_behalf_of": null, "payment_method": "pm_1KnfIc2eZvKYlo2Civ7snSPy", "payment_method_details": {"acss_debit": {}, "type": "acss_debit"}, "setup_error": null, "setup_intent": "seti_1KnfIcEcXtiJtvvh61qlCaDf", "status": "succeeded", "usage": "off_session"}, "emitted_at": 1697627242509} {"stream": "setup_attempts", "data": {"id": "setatt_1KnfIVEcXtiJtvvhqouWGuhD", "object": "setup_attempt", "application": null, "created": 1649752923, "customer": null, "flow_directions": null, "livemode": false, "on_behalf_of": null, "payment_method": "pm_1KnfIV2eZvKYlo2CaOLGBF00", "payment_method_details": {"acss_debit": {}, "type": "acss_debit"}, "setup_error": null, "setup_intent": "seti_1KnfIVEcXtiJtvvhWiIbMkpH", "status": "succeeded", "usage": "off_session"}, "emitted_at": 1697627243547} -{"stream": "accounts", "data": {"id": "acct_1NGp6SD04fX0Aizk", "object": "account", "capabilities": {"acss_debit_payments": "active", "affirm_payments": "active", "afterpay_clearpay_payments": "active", "bancontact_payments": "active", "card_payments": "active", "cartes_bancaires_payments": "pending", "cashapp_payments": "active", "eps_payments": "active", "giropay_payments": "active", "ideal_payments": "active", "klarna_payments": "active", "link_payments": "active", "p24_payments": "active", "sepa_debit_payments": "active", "sofort_payments": "active", "transfers": "active", "us_bank_account_ach_payments": "active"}, "charges_enabled": true, "country": "US", "default_currency": "usd", "details_submitted": true, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "payouts_enabled": true, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": "AIRBYTE", "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": "Airbyte", "timezone": "Asia/Tbilisi"}, "payments": {"statement_descriptor": "WWW.AIRBYTE.COM", "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267880} -{"stream": "accounts", "data": {"id": "acct_1MwD6tIyVv44cUB4", "object": "account", "business_profile": {"mcc": null, "name": null, "product_description": null, "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "business_type": null, "capabilities": {"card_payments": "inactive", "transfers": "inactive"}, "charges_enabled": false, "country": "US", "created": 1681342196, "default_currency": "usd", "details_submitted": false, "email": "jenny.rosen@example.com", "external_accounts": {"object": "list", "data": [], "has_more": false, "total_count": 0, "url": "/v1/accounts/acct_1MwD6tIyVv44cUB4/external_accounts"}, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "pending_verification": []}, "settings": {"bacs_debit_payments": {}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"decline_on": {"avs_failure": false, "cvc_failure": false}, "statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "payouts": {"debit_negative_balances": false, "schedule": {"delay_days": 2, "interval": "daily"}, "statement_descriptor": null}, "sepa_debit_payments": {}}, "tos_acceptance": {"date": null, "ip": null, "user_agent": null}, "type": "custom"}, "emitted_at": 1697627267882} -{"stream": "accounts", "data": {"id": "acct_1Jx8unEYmRTj5on1", "object": "account", "business_profile": {"mcc": null, "name": "Airbyte", "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "capabilities": {}, "charges_enabled": false, "controller": {"type": "account"}, "country": "US", "default_currency": "usd", "details_submitted": false, "email": null, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267884} +{"stream": "accounts", "data": {"id": "acct_1NGp6SD04fX0Aizk", "object": "account", "capabilities": {"acss_debit_payments": "active", "affirm_payments": "active", "afterpay_clearpay_payments": "active", "bancontact_payments": "active", "card_payments": "active", "cartes_bancaires_payments": "pending", "cashapp_payments": "active", "eps_payments": "active", "giropay_payments": "active", "ideal_payments": "active", "klarna_payments": "active", "link_payments": "active", "p24_payments": "active", "sepa_debit_payments": "active", "sofort_payments": "active", "transfers": "active", "us_bank_account_ach_payments": "active"}, "charges_enabled": true, "country": "US", "default_currency": "usd", "details_submitted": true, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "payouts_enabled": true, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {"display_name": null, "service_user_number": null}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": "AIRBYTE", "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": "Airbyte", "timezone": "Asia/Tbilisi"}, "payments": {"statement_descriptor": "WWW.AIRBYTE.COM", "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267880} +{"stream": "accounts", "data": {"id": "acct_1MwD6tIyVv44cUB4", "object": "account", "business_profile": {"mcc": null, "name": null, "product_description": null, "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "business_type": null, "capabilities": {"card_payments": "inactive", "transfers": "inactive"}, "charges_enabled": false, "country": "US", "created": 1681342196, "default_currency": "usd", "details_submitted": false, "email": "jenny.rosen@example.com", "external_accounts": {"object": "list", "data": [], "has_more": false, "total_count": 0, "url": "/v1/accounts/acct_1MwD6tIyVv44cUB4/external_accounts"}, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "pending_verification": []}, "settings": {"bacs_debit_payments": {"display_name": null, "service_user_number": null}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"decline_on": {"avs_failure": false, "cvc_failure": false}, "statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "payouts": {"debit_negative_balances": false, "schedule": {"delay_days": 2, "interval": "daily"}, "statement_descriptor": null}, "sepa_debit_payments": {}}, "tos_acceptance": {"date": null, "ip": null, "user_agent": null}, "type": "custom"}, "emitted_at": 1697627267882} +{"stream": "accounts", "data": {"id": "acct_1Jx8unEYmRTj5on1", "object": "account", "business_profile": {"mcc": null, "name": "Airbyte", "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "capabilities": {}, "charges_enabled": false, "controller": {"type": "account"}, "country": "US", "default_currency": "usd", "details_submitted": false, "email": null, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {"display_name": null, "service_user_number": null}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267884} {"stream": "shipping_rates", "data": {"id": "shr_1NXgplEcXtiJtvvhA1ntV782", "object": "shipping_rate", "active": true, "created": 1690274589, "delivery_estimate": "{'maximum': {'unit': 'business_day', 'value': 14}, 'minimum': {'unit': 'business_day', 'value': 10}}", "display_name": "Test Ground Shipping", "fixed_amount": {"amount": 999, "currency": "usd"}, "livemode": false, "metadata": {}, "tax_behavior": "inclusive", "tax_code": "txcd_92010001", "type": "fixed_amount"}, "emitted_at": 1697627269309} {"stream": "balance_transactions", "data": {"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": -9164, "available_on": 1645488000, "created": 1645406919, "currency": "usd", "description": "STRIPE PAYOUT", "exchange_rate": null, "fee": 0, "fee_details": [], "net": -9164, "reporting_category": "payout", "source": "po_1KVQhfEcXtiJtvvhZlUkl08U", "status": "available", "type": "payout"}, "emitted_at": 1697627270253} {"stream": "balance_transactions", "data": {"id": "txn_3K9FSOEcXtiJtvvh0KoS5mx7", "object": "balance_transaction", "amount": 5300, "available_on": 1640649600, "created": 1640120473, "currency": "usd", "description": null, "exchange_rate": null, "fee": 184, "fee_details": [{"amount": 184, "application": null, "currency": "usd", "description": "Stripe processing fees", "type": "stripe_fee"}], "net": 5116, "reporting_category": "charge", "source": "ch_3K9FSOEcXtiJtvvh0zxb7clc", "status": "available", "type": "charge"}, "emitted_at": 1697627270254} @@ -17,7 +17,7 @@ {"stream": "file_links", "data": {"id": "link_1KnfIiEcXtiJtvvhCNceSyei", "object": "file_link", "created": 1649752936, "expired": false, "expires_at": null, "file": "file_1Jx631EcXtiJtvvh9J1J59wL", "livemode": false, "metadata": {}, "url": "https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfY1FvanBFTmt0dUdrRWJXTHBpUlVYVUtu007305bsv3"}, "emitted_at": 1697627273833} {"stream": "file_links", "data": {"id": "link_1KnfIbEcXtiJtvvhyBLUqkSt", "object": "file_link", "created": 1649752929, "expired": false, "expires_at": null, "file": "file_1Jx631EcXtiJtvvh9J1J59wL", "livemode": false, "metadata": {}, "url": "https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfaXh1blBqMmY0MzI3SHZWbUZIeFVGU3Nl0022JjupYq"}, "emitted_at": 1697627273834} {"stream": "file_links", "data": {"id": "link_1KnfIUEcXtiJtvvh0ktKHfWz", "object": "file_link", "created": 1649752922, "expired": false, "expires_at": null, "file": "file_1Jx631EcXtiJtvvh9J1J59wL", "livemode": false, "metadata": {}, "url": "https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfNzhlbE9MUGNYbkJzMkRLSWdEcnhvY3FH00DK5jBVaH"}, "emitted_at": 1697627273835} -{"stream": "checkout_sessions", "data": {"id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "object": "checkout.session", "after_expiration": null, "allow_promotion_codes": null, "amount_subtotal": 3400, "amount_total": 3400, "automatic_tax": {"enabled": false, "status": null}, "billing_address_collection": null, "cancel_url": null, "client_reference_id": null, "client_secret": null, "consent": null, "consent_collection": null, "created": 1697627124, "currency": "usd", "currency_conversion": null, "custom_fields": [], "custom_text": {"shipping_address": null, "submit": null, "terms_of_service_acceptance": null}, "customer": null, "customer_creation": "always", "customer_details": null, "customer_email": null, "expires_at": 1697713523, "invoice": null, "invoice_creation": {"enabled": false, "invoice_data": {"account_tax_ids": null, "custom_fields": null, "description": null, "footer": null, "metadata": {}, "rendering_options": null}}, "livemode": false, "locale": null, "metadata": {}, "mode": "payment", "payment_intent": "pi_3O2XZ1EcXtiJtvvh0zWGn33E", "payment_link": null, "payment_method_collection": "always", "payment_method_configuration_details": {"id": "pmc_1MC0oMEcXtiJtvvhmhbSUwTJ", "parent": null}, "payment_method_options": {"us_bank_account": {"financial_connections": {"permissions": ["payment_method"], "prefetch": []}, "verification_method": "automatic"}, "wechat_pay": {"app_id": null, "client": "web"}}, "payment_method_types": ["card", "alipay", "klarna", "link", "us_bank_account", "wechat_pay", "cashapp"], "payment_status": "unpaid", "phone_number_collection": {"enabled": false}, "recovered_from": null, "setup_intent": null, "shipping_address_collection": null, "shipping_cost": null, "shipping_details": null, "shipping_options": [], "status": "expired", "submit_type": null, "subscription": null, "success_url": "https://example.com/success", "total_details": {"amount_discount": 0, "amount_shipping": 0, "amount_tax": 0}, "ui_mode": "hosted", "url": null, "updated": 1697713523}, "emitted_at": 1697627275062} +{"stream": "checkout_sessions", "data": {"id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "object": "checkout.session", "after_expiration": null, "allow_promotion_codes": null, "amount_subtotal": 3400, "amount_total": 3400, "automatic_tax": {"enabled": false, "status": null}, "billing_address_collection": null, "cancel_url": null, "client_reference_id": null, "client_secret": null, "consent": null, "consent_collection": null, "created": 1697627124, "currency": "usd", "currency_conversion": null, "custom_fields": [], "custom_text": {"shipping_address": null, "submit": null, "terms_of_service_acceptance": null}, "customer": null, "customer_creation": "always", "customer_details": null, "customer_email": null, "expires_at": 1697713523, "invoice": null, "invoice_creation": {"enabled": false, "invoice_data": {"account_tax_ids": null, "custom_fields": null, "description": null, "footer": null, "metadata": {}, "rendering_options": null}}, "livemode": false, "locale": null, "metadata": {}, "mode": "payment", "payment_intent": "pi_3O2XZ1EcXtiJtvvh0zWGn33E", "payment_link": null, "payment_method_collection": "always", "payment_method_configuration_details": {"id": "pmc_1MC0oMEcXtiJtvvhmhbSUwTJ", "parent": null}, "payment_method_options": {"us_bank_account": {"financial_connections": {"permissions": ["payment_method"], "prefetch": []}, "verification_method": "automatic"}, "wechat_pay": {"app_id": null, "client": "web"}}, "payment_method_types": ["card", "alipay", "klarna", "link", "us_bank_account", "wechat_pay", "cashapp"], "payment_status": "unpaid", "phone_number_collection": {"enabled": false}, "recovered_from": null, "setup_intent": null, "shipping_address_collection": null, "shipping_cost": null, "shipping_details": null, "shipping_options": [], "status": "expired", "submit_type": null, "subscription": null, "success_url": "https://example.com/success", "total_details": {"amount_discount": 0, "amount_shipping": 0, "amount_tax": 0}, "ui_mode": "hosted", "url": null, "updated": 1697627124}, "emitted_at": 1697627275062} {"stream": "credit_notes", "data": {"id": "cn_1NGPwmEcXtiJtvvhNXwHpgJF", "object": "credit_note", "amount": 8400, "amount_shipping": 0, "created": 1686158100, "currency": "usd", "customer": "cus_Kou8knsO3qQOwU", "customer_balance_transaction": null, "discount_amount": "0", "discount_amounts": [], "effective_at": 1686158100, "invoice": "in_1K9GK0EcXtiJtvvhSo2LvGqT", "lines": {"object": "list", "data": [{"id": "cnli_1NGPwmEcXtiJtvvhcL7yEIBJ", "object": "credit_note_line_item", "amount": 8400, "amount_excluding_tax": 8400, "description": "a box of parsnips", "discount_amount": 0, "discount_amounts": [], "invoice_line_item": "il_1K9GKLEcXtiJtvvhhHaYMebN", "livemode": false, "quantity": 1, "tax_amounts": [], "tax_rates": [], "type": "invoice_line_item", "unit_amount": 8400, "unit_amount_decimal": 8400.0, "unit_amount_excluding_tax": 8400.0}], "has_more": false, "url": "/v1/credit_notes/cn_1NGPwmEcXtiJtvvhNXwHpgJF/lines"}, "livemode": false, "memo": null, "metadata": {}, "number": "CA35DF83-0001-CN-01", "out_of_band_amount": null, "pdf": "https://pay.stripe.com/credit_notes/acct_1JwnoiEcXtiJtvvh/test_YWNjdF8xSndub2lFY1h0aUp0dnZoLF9PMlV3dFlJelh4NHM1R0VIWnhMR3RjWUtlejFlRWtILDg4MTY4MDc20200Sa50llWu/pdf?s=ap", "reason": null, "refund": null, "shipping_cost": null, "status": "issued", "subtotal": 8400, "subtotal_excluding_tax": 8400, "tax_amounts": [], "total": 8400, "total_excluding_tax": 8400, "type": "pre_payment", "voided_at": null, "updated": 1686158100}, "emitted_at": 1697627276386} {"stream": "customers", "data": {"id": "cus_LIiHR6omh14Xdg", "object": "customer", "address": {"city": "san francisco", "country": "US", "line1": "san francisco", "line2": "", "postal_code": "", "state": "CA"}, "balance": 0, "created": 1646998902, "currency": "usd", "default_source": "card_1MSHU1EcXtiJtvvhytSN6V54", "delinquent": false, "description": "test", "discount": null, "email": "test@airbyte_integration_test.com", "invoice_prefix": "09A6A98F", "invoice_settings": {"custom_fields": null, "default_payment_method": null, "footer": null, "rendering_options": null}, "livemode": false, "metadata": {}, "name": "Test", "next_invoice_sequence": 1, "phone": null, "preferred_locales": [], "shipping": {"address": {"city": "", "country": "US", "line1": "", "line2": "", "postal_code": "", "state": ""}, "name": "", "phone": ""}, "tax_exempt": "none", "test_clock": null, "updated": 1646998902}, "emitted_at": 1697627278433} {"stream": "customers", "data": {"id": "cus_Kou8knsO3qQOwU", "object": "customer", "address": null, "balance": 0, "created": 1640123795, "currency": "usd", "default_source": "src_1MSID8EcXtiJtvvhxIT9lXRy", "delinquent": false, "description": null, "discount": null, "email": "edward.gao+stripe-test-customer-1@airbyte.io", "invoice_prefix": "CA35DF83", "invoice_settings": {"custom_fields": null, "default_payment_method": null, "footer": null, "rendering_options": null}, "livemode": false, "metadata": {}, "name": "edgao-test-customer-1", "next_invoice_sequence": 2, "phone": null, "preferred_locales": [], "shipping": null, "tax_exempt": "none", "test_clock": null, "updated": 1640123795}, "emitted_at": 1697627278435} @@ -46,13 +46,13 @@ {"stream": "products", "data": {"id": "prod_KouQ5ez86yREmB", "object": "product", "active": true, "attributes": [], "created": 1640124902, "default_price": "price_1K9GbqEcXtiJtvvhJ3lZe4i5", "description": null, "features": [], "images": [], "livemode": false, "metadata": {}, "name": "edgao-test-product", "package_dimensions": null, "shippable": null, "statement_descriptor": null, "tax_code": "txcd_10000000", "type": "service", "unit_label": null, "updated": 1696839715, "url": null}, "emitted_at": 1697627307635} {"stream": "products", "data": {"id": "prod_NHcKselSHfKdfc", "object": "product", "active": true, "attributes": [], "created": 1675345504, "default_price": "price_1MX364EcXtiJtvvhE3WgTl4O", "description": "Test Product 1 description", "features": [], "images": ["https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfdjBOT09UaHRiNVl2WmJ6clNYRUlmcFFD00cCBRNHnV"], "livemode": false, "metadata": {}, "name": "Test Product 1", "package_dimensions": null, "shippable": null, "statement_descriptor": null, "tax_code": "txcd_10301000", "type": "service", "unit_label": null, "updated": 1696839789, "url": null}, "emitted_at": 1697627307877} {"stream": "products", "data": {"id": "prod_NCgx1XP2IFQyKF", "object": "product", "active": true, "attributes": [], "created": 1674209524, "default_price": null, "description": null, "features": [], "images": [], "livemode": false, "metadata": {}, "name": "tu", "package_dimensions": null, "shippable": null, "statement_descriptor": null, "tax_code": "txcd_10000000", "type": "service", "unit_label": null, "updated": 1696839225, "url": null}, "emitted_at": 1697627307879} -{"stream": "subscriptions", "data": {"id": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "object": "subscription", "application": null, "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": 1697550676.0, "billing_thresholds": null, "cancel_at": 1705499476.0, "cancel_at_period_end": false, "canceled_at": 1697550676.0, "cancellation_details": {"comment": null, "feedback": null, "reason": "cancellation_requested"}, "collection_method": "charge_automatically", "created": 1697550676, "currency": "usd", "current_period_end": 1700229076.0, "current_period_start": 1697550676, "customer": "cus_NGoTFiJFVbSsvZ", "days_until_due": null, "default_payment_method": null, "default_source": null, "default_tax_rates": [], "description": null, "discount": null, "ended_at": null, "items": {"object": "list", "data": [{"id": "si_OptSP2o3XZUBpx", "object": "subscription_item", "billing_thresholds": null, "created": 1697550677, "metadata": {}, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "price": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "recurring": {"aggregate_usage": null, "interval": "month", "interval_count": 1, "trial_period_days": null, "usage_type": "licensed"}, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "recurring", "unit_amount": 600, "unit_amount_decimal": "600"}, "quantity": 1, "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "tax_rates": []}], "has_more": false, "total_count": 1.0, "url": "/v1/subscription_items?subscription=sub_1O2Dg0EcXtiJtvvhz7Q4zS0n"}, "latest_invoice": "in_1O2Dg0EcXtiJtvvhLe87VaYL", "livemode": false, "metadata": {}, "next_pending_invoice_item_invoice": null, "on_behalf_of": null, "pause_collection": null, "payment_settings": {"payment_method_options": null, "payment_method_types": null, "save_default_payment_method": null}, "pending_invoice_item_interval": null, "pending_setup_intent": null, "pending_update": null, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "quantity": 1, "schedule": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "start_date": 1697550676, "status": "active", "test_clock": null, "transfer_data": null, "trial_end": null, "trial_settings": {"end_behavior": {"missing_payment_method": "create_invoice"}}, "trial_start": null, "updated": 1697550676}, "emitted_at": 1697627310741} +{"stream": "subscriptions", "data": {"id": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "object": "subscription", "application": null, "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": 1697550676.0, "billing_thresholds": null, "cancel_at": 1705499476.0, "cancel_at_period_end": false, "canceled_at": 1697550676.0, "cancellation_details": {"comment": null, "feedback": null, "reason": "cancellation_requested"}, "collection_method": "charge_automatically", "created": 1697550676, "currency": "usd", "current_period_end": 1702821076.0, "current_period_start": 1700229076, "customer": "cus_NGoTFiJFVbSsvZ", "days_until_due": null, "default_payment_method": null, "default_source": null, "default_tax_rates": [], "description": null, "discount": null, "ended_at": null, "items": {"object": "list", "data": [{"id": "si_OptSP2o3XZUBpx", "object": "subscription_item", "billing_thresholds": null, "created": 1697550677, "metadata": {}, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "price": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "recurring": {"aggregate_usage": null, "interval": "month", "interval_count": 1, "trial_period_days": null, "usage_type": "licensed"}, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "recurring", "unit_amount": 600, "unit_amount_decimal": "600"}, "quantity": 1, "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "tax_rates": []}], "has_more": false, "total_count": 1.0, "url": "/v1/subscription_items?subscription=sub_1O2Dg0EcXtiJtvvhz7Q4zS0n"}, "latest_invoice": "in_1ODSSHEcXtiJtvvhW5LllxDH", "livemode": false, "metadata": {}, "next_pending_invoice_item_invoice": null, "on_behalf_of": null, "pause_collection": null, "payment_settings": {"payment_method_options": null, "payment_method_types": null, "save_default_payment_method": null}, "pending_invoice_item_interval": null, "pending_setup_intent": null, "pending_update": null, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "quantity": 1, "schedule": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "start_date": 1697550676, "status": "active", "test_clock": null, "transfer_data": null, "trial_end": null, "trial_settings": {"end_behavior": {"missing_payment_method": "create_invoice"}}, "trial_start": null, "updated": 1697550676}, "emitted_at": 1700232971060} {"stream": "subscription_schedule", "data": {"id": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "object": "subscription_schedule", "application": null, "canceled_at": null, "completed_at": null, "created": 1697550676, "current_phase": {"end_date": 1705499476, "start_date": 1697550676}, "customer": "cus_NGoTFiJFVbSsvZ", "default_settings": {"application_fee_percent": null, "automatic_tax": {"enabled": false}, "billing_cycle_anchor": "automatic", "billing_thresholds": null, "collection_method": "charge_automatically", "default_payment_method": null, "default_source": null, "description": "Test Test", "invoice_settings": "{'days_until_due': None}", "on_behalf_of": null, "transfer_data": null}, "end_behavior": "cancel", "livemode": false, "metadata": {}, "phases": [{"add_invoice_items": [], "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": null, "billing_thresholds": null, "collection_method": "charge_automatically", "coupon": null, "currency": "usd", "default_payment_method": null, "default_tax_rates": [], "description": "Test Test", "end_date": 1705499476, "invoice_settings": "{'days_until_due': None}", "items": [{"billing_thresholds": null, "metadata": {}, "plan": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "price": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "quantity": 1, "tax_rates": []}], "metadata": {}, "on_behalf_of": null, "proration_behavior": "create_prorations", "start_date": 1697550676, "transfer_data": null, "trial_end": null}], "released_at": null, "released_subscription": null, "renewal_interval": null, "status": "active", "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "test_clock": null, "updated": 1697550676}, "emitted_at": 1697627312079} {"stream": "transfers", "data": {"id": "tr_1NH18zEcXtiJtvvhnd827cNO", "object": "transfer", "amount": 10000, "amount_reversed": 0, "balance_transaction": "txn_1NH190EcXtiJtvvhBO3PeR7p", "created": 1686301085, "currency": "usd", "description": null, "destination": "acct_1Jx8unEYmRTj5on1", "destination_payment": "py_1NH18zEYmRTj5on1GkCCsqLK", "livemode": false, "metadata": {}, "reversals": {"object": "list", "data": [], "has_more": false, "total_count": 0.0, "url": "/v1/transfers/tr_1NH18zEcXtiJtvvhnd827cNO/reversals"}, "reversed": false, "source_transaction": null, "source_type": "card", "transfer_group": null, "updated": 1686301085}, "emitted_at": 1697627313262} {"stream": "transfers", "data": {"id": "tr_1NGoaCEcXtiJtvvhjmHtOGOm", "object": "transfer", "amount": 100, "amount_reversed": 100, "balance_transaction": "txn_1NGoaDEcXtiJtvvhsZrNMsdJ", "created": 1686252800, "currency": "usd", "description": null, "destination": "acct_1Jx8unEYmRTj5on1", "destination_payment": "py_1NGoaCEYmRTj5on1LAlAIG3a", "livemode": false, "metadata": {}, "reversals": {"object": "list", "data": [{"id": "trr_1NGolCEcXtiJtvvhOYPck3CP", "object": "transfer_reversal", "amount": 100, "balance_transaction": "txn_1NGolCEcXtiJtvvhZRy4Kd5S", "created": 1686253482, "currency": "usd", "destination_payment_refund": "pyr_1NGolBEYmRTj5on1STal3rmp", "metadata": {}, "source_refund": null, "transfer": "tr_1NGoaCEcXtiJtvvhjmHtOGOm"}], "has_more": false, "total_count": 1.0, "url": "/v1/transfers/tr_1NGoaCEcXtiJtvvhjmHtOGOm/reversals"}, "reversed": true, "source_transaction": null, "source_type": "card", "transfer_group": "ORDER10", "updated": 1686252800}, "emitted_at": 1697627313264} -{"stream": "refunds", "data": {"id": "re_3MVuZyEcXtiJtvvh0A6rSbeJ", "object": "refund", "amount": 200000, "balance_transaction": "txn_3MVuZyEcXtiJtvvh0v0QyAMx", "charge": "ch_3MVuZyEcXtiJtvvh0tiVC7DI", "created": 1675074488, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MVuZyEcXtiJtvvh07Ehi4cx", "reason": "fraudulent", "receipt_number": "3278-5368", "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null, "updated": 1675074488}, "emitted_at": 1697627314206} -{"stream": "refunds", "data": {"id": "re_3NcwAGEcXtiJtvvh1UT4PBe6", "object": "refund", "amount": 600, "balance_transaction": "txn_3NcwAGEcXtiJtvvh1AcNi3Ma", "charge": "ch_3NcwAGEcXtiJtvvh1m0SSmfQ", "created": 1692782173, "currency": "usd", "metadata": {}, "payment_intent": "pi_3NcwAGEcXtiJtvvh1olHTPmH", "reason": null, "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null, "updated": 1692782173}, "emitted_at": 1697627314485} -{"stream": "refunds", "data": {"id": "re_3MngeoEcXtiJtvvh0c4KeMOd", "object": "refund", "amount": 540, "balance_transaction": "txn_3MngeoEcXtiJtvvh0Cz3qwU2", "charge": "ch_3MngeoEcXtiJtvvh0SBFQWe2", "created": 1683889626, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MngeoEcXtiJtvvh0B7Tcbr4", "reason": "requested_by_customer", "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null, "updated": 1683889626}, "emitted_at": 1697627314486} +{"stream": "refunds", "data": {"id": "re_3MVuZyEcXtiJtvvh0A6rSbeJ", "object": "refund", "amount": 200000, "balance_transaction": "txn_3MVuZyEcXtiJtvvh0v0QyAMx", "charge": "ch_3MVuZyEcXtiJtvvh0tiVC7DI", "created": 1675074488, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MVuZyEcXtiJtvvh07Ehi4cx", "reason": "fraudulent", "receipt_number": "3278-5368", "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null}, "emitted_at": 1697627314206} +{"stream": "refunds", "data": {"id": "re_3NcwAGEcXtiJtvvh1UT4PBe6", "object": "refund", "amount": 600, "balance_transaction": "txn_3NcwAGEcXtiJtvvh1AcNi3Ma", "charge": "ch_3NcwAGEcXtiJtvvh1m0SSmfQ", "created": 1692782173, "currency": "usd", "metadata": {}, "payment_intent": "pi_3NcwAGEcXtiJtvvh1olHTPmH", "reason": null, "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null}, "emitted_at": 1697627314485} +{"stream": "refunds", "data": {"id": "re_3MngeoEcXtiJtvvh0c4KeMOd", "object": "refund", "amount": 540, "balance_transaction": "txn_3MngeoEcXtiJtvvh0Cz3qwU2", "charge": "ch_3MngeoEcXtiJtvvh0SBFQWe2", "created": 1683889626, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MngeoEcXtiJtvvh0B7Tcbr4", "reason": "requested_by_customer", "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null}, "emitted_at": 1697627314486} {"stream": "payment_intents", "data": {"id": "pi_3K9FSOEcXtiJtvvh0AEIFllC", "object": "payment_intent", "amount": 5300, "amount_capturable": 0, "amount_details": {"tip": {}}, "amount_received": 5300, "application": null, "application_fee_amount": null, "automatic_payment_methods": null, "canceled_at": null, "cancellation_reason": null, "capture_method": "automatic", "client_secret": "pi_3K9FSOEcXtiJtvvh0AEIFllC_secret_uPUtIaSltgtW0qK7mLD0uF2Mr", "confirmation_method": "automatic", "created": 1640120472, "currency": "usd", "customer": null, "description": null, "invoice": null, "last_payment_error": null, "latest_charge": "ch_3K9FSOEcXtiJtvvh0zxb7clc", "livemode": false, "metadata": {}, "next_action": null, "on_behalf_of": null, "payment_method": null, "payment_method_configuration_details": null, "payment_method_options": {"card": {"installments": null, "mandate_options": null, "network": null, "request_three_d_secure": "automatic"}}, "payment_method_types": ["card"], "processing": null, "receipt_email": null, "review": null, "setup_future_usage": null, "shipping": null, "source": "src_1K9FSOEcXtiJtvvhHGu1qtOx", "statement_descriptor": "airbyte.io", "statement_descriptor_suffix": null, "status": "succeeded", "transfer_data": null, "transfer_group": null, "updated": 1640120472}, "emitted_at": 1697627315508} {"stream": "payment_intents", "data": {"id": "pi_3K9F5DEcXtiJtvvh16scJMp6", "object": "payment_intent", "amount": 4200, "amount_capturable": 0, "amount_details": {"tip": {}}, "amount_received": 4200, "application": null, "application_fee_amount": null, "automatic_payment_methods": null, "canceled_at": null, "cancellation_reason": null, "capture_method": "automatic", "client_secret": "pi_3K9F5DEcXtiJtvvh16scJMp6_secret_YwhzCTpXtfcKYeklXnPnysRRi", "confirmation_method": "automatic", "created": 1640119035, "currency": "usd", "customer": null, "description": "edgao test", "invoice": null, "last_payment_error": null, "latest_charge": "ch_3K9F5DEcXtiJtvvh1w2MaTpj", "livemode": false, "metadata": {}, "next_action": null, "on_behalf_of": null, "payment_method": null, "payment_method_configuration_details": null, "payment_method_options": {"card": {"installments": null, "mandate_options": null, "network": null, "request_three_d_secure": "automatic"}}, "payment_method_types": ["card"], "processing": null, "receipt_email": null, "review": null, "setup_future_usage": null, "shipping": null, "source": "src_1K9F5CEcXtiJtvvhrsZdur8Y", "statement_descriptor": "airbyte.io", "statement_descriptor_suffix": null, "status": "succeeded", "transfer_data": null, "transfer_group": null, "updated": 1640119035}, "emitted_at": 1697627315511} {"stream": "payment_intents", "data": {"id": "pi_3K9F4mEcXtiJtvvh18NKhEuo", "object": "payment_intent", "amount": 4200, "amount_capturable": 0, "amount_details": {"tip": {}}, "amount_received": 0, "application": null, "application_fee_amount": null, "automatic_payment_methods": null, "canceled_at": null, "cancellation_reason": null, "capture_method": "automatic", "client_secret": "pi_3K9F4mEcXtiJtvvh18NKhEuo_secret_pfUt7CTkPjVdJacycm0bMpdLt", "confirmation_method": "automatic", "created": 1640119008, "currency": "usd", "customer": null, "description": "edgao test", "invoice": null, "last_payment_error": {"charge": "ch_3K9F4mEcXtiJtvvh1kUzxjwN", "code": "card_declined", "decline_code": "test_mode_live_card", "doc_url": "https://stripe.com/docs/error-codes/card-declined", "message": "Your card was declined. Your request was in test mode, but used a non test (live) card. For a list of valid test cards, visit: https://stripe.com/docs/testing.", "source": {"id": "src_1K9F4hEcXtiJtvvhrUEwvCyi", "object": "source", "amount": null, "card": {"address_line1_check": null, "address_zip_check": null, "brand": "Visa", "country": "US", "cvc_check": "unchecked", "dynamic_last4": null, "exp_month": 9, "exp_year": 2028, "fingerprint": "Re3p4j8issXA77iI", "funding": "credit", "last4": "8097", "name": null, "three_d_secure": "optional", "tokenization_method": null}, "client_secret": "src_client_secret_b3v8YqNMLGykB120fqv2Tjhq", "created": 1640119003, "currency": null, "flow": "none", "livemode": false, "metadata": {}, "owner": {"address": null, "email": null, "name": null, "phone": null, "verified_address": null, "verified_email": null, "verified_name": null, "verified_phone": null}, "statement_descriptor": null, "status": "consumed", "type": "card", "usage": "reusable"}, "type": "card_error"}, "latest_charge": "ch_3K9F4mEcXtiJtvvh1kUzxjwN", "livemode": false, "metadata": {}, "next_action": null, "on_behalf_of": null, "payment_method": null, "payment_method_configuration_details": null, "payment_method_options": {"card": {"installments": null, "mandate_options": null, "network": null, "request_three_d_secure": "automatic"}}, "payment_method_types": ["card"], "processing": null, "receipt_email": null, "review": null, "setup_future_usage": null, "shipping": null, "source": null, "statement_descriptor": "airbyte.io", "statement_descriptor_suffix": null, "status": "requires_payment_method", "transfer_data": null, "transfer_group": null, "updated": 1640119008}, "emitted_at": 1697627315513} @@ -69,4 +69,4 @@ {"stream": "invoice_line_items", "data": {"id": "il_1MX2yfEcXtiJtvvhiunY2j1x", "object": "line_item", "amount": 25200, "amount_excluding_tax": 25200, "currency": "usd", "description": "edgao-test-product", "discount_amounts": [{"amount": 2520, "discount": "di_1MX2ysEcXtiJtvvh8ORqRVKm"}], "discountable": true, "discounts": ["di_1MX2ysEcXtiJtvvh8ORqRVKm"], "invoice_item": "ii_1MX2yfEcXtiJtvvhfhyOG7SP", "livemode": false, "metadata": {}, "period": {"end": 1675345045, "start": 1675345045}, "plan": null, "price": {"id": "price_1K9GbqEcXtiJtvvhJ3lZe4i5", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1640124902, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_KouQ5ez86yREmB", "recurring": null, "tax_behavior": "inclusive", "tiers_mode": null, "transform_quantity": null, "type": "one_time", "unit_amount": 12600, "unit_amount_decimal": "12600"}, "proration": false, "proration_details": {"credited_items": null}, "quantity": 2, "subscription": null, "tax_amounts": [{"amount": 0, "inclusive": true, "tax_rate": "txr_1MX2yfEcXtiJtvvhVcMEMTRj", "taxability_reason": "not_collecting", "taxable_amount": 0}], "tax_rates": [], "type": "invoiceitem", "unit_amount_excluding_tax": "12600", "invoice_id": "in_1MX2yFEcXtiJtvvhMXhUCgKx"}, "emitted_at": 1697627336449} {"stream": "subscription_items", "data": {"id": "si_OptSP2o3XZUBpx", "object": "subscription_item", "billing_thresholds": null, "created": 1697550677, "metadata": {}, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "price": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "recurring": {"aggregate_usage": null, "interval": "month", "interval_count": 1, "trial_period_days": null, "usage_type": "licensed"}, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "recurring", "unit_amount": 600, "unit_amount_decimal": "600"}, "quantity": 1, "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "tax_rates": []}, "emitted_at": 1697627337431} {"stream": "transfer_reversals", "data": {"id": "trr_1NGolCEcXtiJtvvhOYPck3CP", "object": "transfer_reversal", "amount": 100, "balance_transaction": "txn_1NGolCEcXtiJtvvhZRy4Kd5S", "created": 1686253482, "currency": "usd", "destination_payment_refund": "pyr_1NGolBEYmRTj5on1STal3rmp", "metadata": {}, "source_refund": null, "transfer": "tr_1NGoaCEcXtiJtvvhjmHtOGOm"}, "emitted_at": 1697627338960} -{"stream": "usage_records", "data": {"id": "sis_1O4gIOEcXtiJtvvhmsoeBHkP", "object": "usage_record_summary", "invoice": null, "livemode": false, "period": {"end": null, "start": 1697550676}, "subscription_item": "si_OptSP2o3XZUBpx", "total_usage": 1}, "emitted_at": 1697627340175} +{"stream": "usage_records", "data": {"id": "sis_1ODTdwEcXtiJtvvhZChEVsbN", "object": "usage_record_summary", "invoice": null, "livemode": false, "period": {"end": null, "start": 1700229076}, "subscription_item": "si_OptSP2o3XZUBpx", "total_usage": 1}, "emitted_at": 1700233660884} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-stripe/metadata.yaml b/airbyte-integrations/connectors/source-stripe/metadata.yaml index 0c34e7b7c27d..11fc73a40128 100644 --- a/airbyte-integrations/connectors/source-stripe/metadata.yaml +++ b/airbyte-integrations/connectors/source-stripe/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: e094cb9a-26de-4645-8761-65c0c425d1de - dockerImageTag: 4.5.3 + dockerImageTag: 5.0.1 dockerRepository: airbyte/source-stripe documentationUrl: https://docs.airbyte.com/integrations/sources/stripe githubIssueLabel: source-stripe @@ -33,6 +33,11 @@ data: schema refresh of all effected streams is required to use the new cursor format. upgradeDeadline: "2023-09-14" + 5.0.0: + message: + Version 5.0.0 introduces fixes for the `CheckoutSessions`, `CheckoutSessionsLineItems` and `Refunds` streams. The cursor field is changed for the `CheckoutSessionsLineItems` and `Refunds` streams. This will prevent data loss during incremental syncs. + Also, the `Invoices`, `Subscriptions` and `SubscriptionSchedule` stream schemas have been updated. + upgradeDeadline: "2023-11-30" suggestedStreams: streams: - customers diff --git a/airbyte-integrations/connectors/source-stripe/setup.py b/airbyte-integrations/connectors/source-stripe/setup.py index 8ce3d6936bdd..55bb256393b6 100644 --- a/airbyte-integrations/connectors/source-stripe/setup.py +++ b/airbyte-integrations/connectors/source-stripe/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk==0.52.8", "stripe==2.56.0", "pendulum==2.1.2"] +MAIN_REQUIREMENTS = ["airbyte-cdk==0.53.6", "stripe==2.56.0", "pendulum==2.1.2"] TEST_REQUIREMENTS = ["pytest-mock~=3.6.1", "pytest~=6.1", "requests-mock", "requests_mock~=1.8", "freezegun==1.2.2"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py b/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py index 9906c21a525a..6226ffc12ea9 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py @@ -3,13 +3,16 @@ # import logging -from typing import Optional, Tuple +from typing import Any, Mapping, Optional, Tuple +from airbyte_cdk.models import SyncMode from airbyte_cdk.sources import Source from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy from requests import HTTPError +from .stream_helpers import get_first_record_for_slice, get_first_stream_slice + STRIPE_ERROR_CODES = { "more_permissions_required": "This is most likely due to insufficient permissions on the credentials in use. " "Try to grant required permissions/scopes or re-authenticate", @@ -20,6 +23,60 @@ class StripeAvailabilityStrategy(HttpAvailabilityStrategy): + def _check_availability_for_sync_mode( + self, + stream: Stream, + sync_mode: SyncMode, + logger: logging.Logger, + source: Optional["Source"], + stream_state: Optional[Mapping[str, Any]], + ) -> Tuple[bool, Optional[str]]: + try: + # Some streams need a stream slice to read records (e.g. if they have a SubstreamPartitionRouter) + # Streams that don't need a stream slice will return `None` as their first stream slice. + stream_slice = get_first_stream_slice(stream, sync_mode, stream_state) + except StopIteration: + # If stream_slices has no `next()` item (Note - this is different from stream_slices returning [None]!) + # This can happen when a substream's `stream_slices` method does a `for record in parent_records: yield ` + # without accounting for the case in which the parent stream is empty. + reason = f"Cannot attempt to connect to stream {stream.name} - no stream slices were found, likely because the parent stream is empty." + return False, reason + except HTTPError as error: + is_available, reason = self.handle_http_error(stream, logger, source, error) + if not is_available: + reason = f"Unable to get slices for {stream.name} stream, because of error in parent stream. {reason}" + return is_available, reason + + try: + get_first_record_for_slice(stream, sync_mode, stream_slice, stream_state) + return True, None + except StopIteration: + logger.info(f"Successfully connected to stream {stream.name}, but got 0 records.") + return True, None + except HTTPError as error: + is_available, reason = self.handle_http_error(stream, logger, source, error) + if not is_available: + reason = f"Unable to read {stream.name} stream. {reason}" + return is_available, reason + + def check_availability(self, stream: Stream, logger: logging.Logger, source: Optional["Source"]) -> Tuple[bool, Optional[str]]: + """ + Check stream availability by attempting to read the first record of the + stream. + + :param stream: stream + :param logger: source logger + :param source: (optional) source + :return: A tuple of (boolean, str). If boolean is true, then the stream + is available, and no str is required. Otherwise, the stream is unavailable + for some reason and the str should describe what went wrong and how to + resolve the unavailability, if possible. + """ + is_available, reason = self._check_availability_for_sync_mode(stream, SyncMode.full_refresh, logger, source, None) + if not is_available or not stream.supports_incremental: + return is_available, reason + return self._check_availability_for_sync_mode(stream, SyncMode.incremental, logger, source, {stream.cursor_field: 0}) + def handle_http_error( self, stream: Stream, logger: logging.Logger, source: Optional["Source"], error: HTTPError ) -> Tuple[bool, Optional[str]]: diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json index 09c6e9e28f3e..b00f6569d12e 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json @@ -5,6 +5,8 @@ "id": { "type": ["null", "string"] }, "checkout_session_id": { "type": ["null", "string"] }, "checkout_session_expires_at": { "type": ["null", "integer"] }, + "checkout_session_created": { "type": ["null", "integer"] }, + "checkout_session_updated": { "type": ["null", "integer"] }, "object": { "type": ["null", "string"] }, "amount_subtotal": { "type": ["null", "integer"] }, "amount_tax": { "type": ["null", "integer"] }, diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json index 33d407425ea0..6959909cc77f 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json @@ -499,10 +499,7 @@ "type": ["null", "integer"] }, "default_tax_rates": { - "type": ["null", "array"], - "items": { - "$ref": "tax_rates.json" - } + "$ref": "tax_rates.json" }, "total_excluding_tax": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json index 9f187d82924c..cc14a57138fd 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json @@ -134,11 +134,7 @@ "type": ["null", "string"] }, "default_tax_rates": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "additionalProperties": true - } + "$ref": "tax_rates.json" }, "description": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json index 5d06810fb4c0..1a720f6fd034 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json @@ -323,10 +323,7 @@ } }, "default_tax_rates": { - "type": ["null", "array"], - "items": { - "$ref": "tax_rates.json" - } + "$ref": "tax_rates.json" }, "pause_collection": { "type": ["null", "object"], diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py index e72e4dd5398b..449eb53f99db 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py @@ -2,7 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import logging import os +from datetime import timedelta from typing import Any, List, Mapping, MutableMapping, Optional, Tuple import pendulum @@ -13,17 +15,18 @@ from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.message.repository import InMemoryMessageRepository from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.call_rate import AbstractAPIBudget, HttpAPIBudget, HttpRequestMatcher, MovingWindowCallRatePolicy, Rate from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_cdk.utils.traced_exception import AirbyteTracedException from airbyte_protocol.models import SyncMode from source_stripe.streams import ( - CheckoutSessionsLineItems, CreatedCursorIncrementalStripeStream, CustomerBalanceTransactions, Events, IncrementalStripeStream, + ParentIncrementalStipeSubStream, Persons, SetupAttempts, StripeLazySubStream, @@ -33,9 +36,12 @@ UpdatedCursorIncrementalStripeStream, ) -_MAX_CONCURRENCY = 3 +logger = logging.getLogger("airbyte") + +_MAX_CONCURRENCY = 20 _CACHE_DISABLED = os.environ.get("CACHE_DISABLED") USE_CACHE = not _CACHE_DISABLED +STRIPE_TEST_ACCOUNT_PREFIX = "sk_test_" class SourceStripe(AbstractSource): @@ -114,6 +120,52 @@ def customers(**args): **args, ) + @staticmethod + def is_test_account(config: Mapping[str, Any]) -> bool: + """Check if configuration uses Stripe test account (https://stripe.com/docs/keys#obtain-api-keys) + + :param config: + :return: True if configured to use a test account, False - otherwise + """ + + return str(config["client_secret"]).startswith(STRIPE_TEST_ACCOUNT_PREFIX) + + def get_api_call_budget(self, config: Mapping[str, Any]) -> AbstractAPIBudget: + """Get API call budget which connector is allowed to use. + + :param config: + :return: + """ + + max_call_rate = 25 if self.is_test_account(config) else 100 + if config.get("call_rate_limit"): + call_limit = config["call_rate_limit"] + if call_limit > max_call_rate: + logger.warning( + "call_rate_limit is larger than maximum allowed %s, fallback to default %s.", + max_call_rate, + max_call_rate, + ) + call_limit = max_call_rate + else: + call_limit = max_call_rate + + policies = [ + MovingWindowCallRatePolicy( + rates=[Rate(limit=20, interval=timedelta(seconds=1))], + matchers=[ + HttpRequestMatcher(url="https://api.stripe.com/v1/files"), + HttpRequestMatcher(url="https://api.stripe.com/v1/file_links"), + ], + ), + MovingWindowCallRatePolicy( + rates=[Rate(limit=call_limit, interval=timedelta(seconds=1))], + matchers=[], + ), + ] + + return HttpAPIBudget(policies=policies) + def streams(self, config: Mapping[str, Any]) -> List[Stream]: config = self.validate_and_fill_with_defaults(config) authenticator = TokenAuthenticator(config["client_secret"]) @@ -122,6 +174,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: "account_id": config["account_id"], "start_date": config["start_date"], "slice_range": config["slice_range"], + "api_budget": self.get_api_call_budget(config), } incremental_args = {**args, "lookback_window_days": config["lookback_window_days"]} subscriptions = IncrementalStripeStream( @@ -144,10 +197,9 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: subscription_items = StripeLazySubStream( name="subscription_items", path="subscription_items", - extra_request_params=lambda self, stream_slice, *args, **kwargs: {"subscription": stream_slice[self.parent_id]}, + extra_request_params=lambda self, stream_slice, *args, **kwargs: {"subscription": stream_slice["parent"]["id"]}, parent=subscriptions, use_cache=USE_CACHE, - parent_id="subscription_id", sub_items_attr="items", **args, ) @@ -185,8 +237,22 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ], **args, ) + checkout_sessions = UpdatedCursorIncrementalStripeStream( + name="checkout_sessions", + path="checkout/sessions", + use_cache=USE_CACHE, + legacy_cursor_field="created", + event_types=[ + "checkout.session.async_payment_failed", + "checkout.session.async_payment_succeeded", + "checkout.session.completed", + "checkout.session.expired", + ], + **args, + ) + streams = [ - CheckoutSessionsLineItems(**incremental_args), + checkout_sessions, CustomerBalanceTransactions(**args), Events(**incremental_args), UpdatedCursorIncrementalStripeStream( @@ -214,19 +280,10 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: CreatedCursorIncrementalStripeStream(name="balance_transactions", path="balance_transactions", **incremental_args), CreatedCursorIncrementalStripeStream(name="files", path="files", **incremental_args), CreatedCursorIncrementalStripeStream(name="file_links", path="file_links", **incremental_args), - UpdatedCursorIncrementalStripeStream( - name="checkout_sessions", - path="checkout/sessions", - use_cache=USE_CACHE, - legacy_cursor_field="expires_at", - event_types=[ - "checkout.session.async_payment_failed", - "checkout.session.async_payment_succeeded", - "checkout.session.completed", - "checkout.session.expired", - ], - **args, - ), + # The Refunds stream does not utilize the Events API as it created issues with data loss during the incremental syncs. + # Therefore, we're using the regular API with the `created` cursor field. A bug has been filed with Stripe. + # See more at https://github.com/airbytehq/oncall/issues/3090, https://github.com/airbytehq/oncall/issues/3428 + CreatedCursorIncrementalStripeStream(name="refunds", path="refunds", **incremental_args), UpdatedCursorIncrementalStripeStream( name="payment_methods", path="payment_methods", @@ -343,9 +400,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: **args, ), transfers, - IncrementalStripeStream( - name="refunds", path="refunds", use_cache=USE_CACHE, event_types=["refund.created", "refund.updated"], **args - ), IncrementalStripeStream( name="payment_intents", path="payment_intents", @@ -396,54 +450,63 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ), UpdatedCursorIncrementalStripeLazySubStream( name="application_fees_refunds", - path=lambda self, stream_slice, *args, **kwargs: f"application_fees/{stream_slice[self.parent_id]}/refunds", + path=lambda self, stream_slice, *args, **kwargs: f"application_fees/{stream_slice['parent']['id']}/refunds", parent=application_fees, event_types=["application_fee.refund.updated"], - parent_id="refund_id", sub_items_attr="refunds", - add_parent_id=True, **args, ), UpdatedCursorIncrementalStripeLazySubStream( name="bank_accounts", - path=lambda self, stream_slice, *args, **kwargs: f"customers/{stream_slice[self.parent_id]}/sources", + path=lambda self, stream_slice, *args, **kwargs: f"customers/{stream_slice['parent']['id']}/sources", parent=self.customers(expand_items=["data.sources"], **args), event_types=["customer.source.created", "customer.source.expiring", "customer.source.updated", "customer.source.deleted"], legacy_cursor_field=None, - parent_id="customer_id", sub_items_attr="sources", extra_request_params={"object": "bank_account"}, response_filter=lambda record: record["object"] == "bank_account", **args, ), + ParentIncrementalStipeSubStream( + name="checkout_sessions_line_items", + path=lambda self, stream_slice, *args, **kwargs: f"checkout/sessions/{stream_slice['parent']['id']}/line_items", + parent=checkout_sessions, + expand_items=["data.discounts", "data.taxes"], + cursor_field="checkout_session_updated", + slice_data_retriever=lambda record, stream_slice: { + "checkout_session_id": stream_slice["parent"]["id"], + "checkout_session_expires_at": stream_slice["parent"]["expires_at"], + "checkout_session_created": stream_slice["parent"]["created"], + "checkout_session_updated": stream_slice["parent"]["updated"], + **record, + }, + **args, + ), StripeLazySubStream( name="invoice_line_items", - path=lambda self, stream_slice, *args, **kwargs: f"invoices/{stream_slice[self.parent_id]}/lines", + path=lambda self, stream_slice, *args, **kwargs: f"invoices/{stream_slice['parent']['id']}/lines", parent=invoices, - parent_id="invoice_id", sub_items_attr="lines", - add_parent_id=True, + slice_data_retriever=lambda record, stream_slice: {"invoice_id": stream_slice["parent"]["id"], **record}, **args, ), subscription_items, StripeSubStream( name="transfer_reversals", - path=lambda self, stream_slice, *args, **kwargs: f"transfers/{stream_slice.get('parent', {}).get('id')}/reversals", + path=lambda self, stream_slice, *args, **kwargs: f"transfers/{stream_slice['parent']['id']}/reversals", parent=transfers, **args, ), StripeSubStream( name="usage_records", - path=lambda self, stream_slice, *args, **kwargs: f"subscription_items/{stream_slice.get('parent', {}).get('id')}/usage_record_summaries", + path=lambda self, stream_slice, *args, **kwargs: f"subscription_items/{stream_slice['parent']['id']}/usage_record_summaries", parent=subscription_items, primary_key=None, **args, ), ] - # We cap the number of workers to avoid hitting the Stripe rate limit - # The limit can be removed or increased once we have proper rate limiting - concurrency_level = min(config.get("num_workers", 2), _MAX_CONCURRENCY) + concurrency_level = min(config.get("num_workers", 10), _MAX_CONCURRENCY) streams[0].logger.info(f"Using concurrent cdk with concurrency level {concurrency_level}") return [ diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml index f65886c41298..719177412a96 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml @@ -42,8 +42,8 @@ connectionSpecification: description: >- When set, the connector will always re-export data from the past N days, where N is the value set here. This is useful if your data is frequently updated - after creation. Applies only to streams that do not support event-based incremental syncs: CheckoutSessionLineItems, - Events, SetupAttempts, ShippingRates, BalanceTransactions, Files, FileLinks. More info here order: 3 slice_range: @@ -61,10 +61,18 @@ connectionSpecification: type: integer title: Number of concurrent workers minimum: 1 - maximum: 3 - default: 2 + maximum: 20 + default: 10 examples: [1, 2, 3] description: >- - The number of worker thread to use for the sync. The bigger the value is, the faster the sync will be. - Be careful as rate limiting is not implemented. + The number of worker thread to use for the sync. + The performance upper boundary depends on call_rate_limit setting and type of account. order: 5 + call_rate_limit: + type: integer + title: Max number of API calls per second + examples: [25, 100] + description: >- + The number of API calls per second that you allow connector to make. This value can not be bigger than real + API call rate limit (https://stripe.com/docs/rate-limits). If not specified the default maximum is 25 and 100 + calls per second for test and production tokens respectively. diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py b/airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py new file mode 100644 index 000000000000..dad073ae485b --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Mapping, Optional + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams.core import Stream, StreamData + + +def get_first_stream_slice(stream, sync_mode, stream_state) -> Optional[Mapping[str, Any]]: + """ + Gets the first stream_slice from a given stream's stream_slices. + :param stream: stream + :param sync_mode: sync_mode + :param stream_state: stream_state + :raises StopIteration: if there is no first slice to return (the stream_slices generator is empty) + :return: first stream slice from 'stream_slices' generator (`None` is a valid stream slice) + """ + # We wrap the return output of stream_slices() because some implementations return types that are iterable, + # but not iterators such as lists or tuples + slices = iter(stream.stream_slices(sync_mode=sync_mode, cursor_field=stream.cursor_field, stream_state=stream_state)) + return next(slices) + + +def get_first_record_for_slice( + stream: Stream, sync_mode: SyncMode, stream_slice: Optional[Mapping[str, Any]], stream_state: Optional[Mapping[str, Any]] +) -> StreamData: + """ + Gets the first record for a stream_slice of a stream. + :param stream: stream + :param sync_mode: sync_mode + :param stream_slice: stream_slice + :param stream_state: stream_state + :raises StopIteration: if there is no first record to return (the read_records generator is empty) + :return: StreamData containing the first record in the slice + """ + # We wrap the return output of read_records() because some implementations return types that are iterable, + # but not iterators such as lists or tuples + records_for_slice = iter(stream.read_records(sync_mode=sync_mode, stream_slice=stream_slice, stream_state=stream_state)) + return next(records_for_slice) diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py index 460059716a75..f47f34d26bc6 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py @@ -15,6 +15,7 @@ from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream +from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer from source_stripe.availability_strategy import StripeAvailabilityStrategy, StripeSubStreamAvailabilityStrategy @@ -25,41 +26,54 @@ class IRecordExtractor(ABC): @abstractmethod - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[Mapping]: + def extract_records(self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None) -> Iterable[Mapping]: pass class DefaultRecordExtractor(IRecordExtractor): - def __init__(self, response_filter: Optional[Callable] = None): - self._response_filter = response_filter or (lambda x: x) + def __init__(self, response_filter: Optional[Callable] = None, slice_data_retriever: Optional[Callable] = None): + self._response_filter = response_filter or (lambda record: record) + self._slice_data_retriever = slice_data_retriever or (lambda record, *_: record) - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: - yield from filter(self._response_filter, records) + def extract_records( + self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[MutableMapping]: + yield from filter(self._response_filter, map(lambda x: self._slice_data_retriever(x, stream_slice), records)) class EventRecordExtractor(DefaultRecordExtractor): - def __init__(self, cursor_field: str, response_filter: Optional[Callable] = None): - super().__init__(response_filter) + def __init__(self, cursor_field: str, response_filter: Optional[Callable] = None, slice_data_retriever: Optional[Callable] = None): + super().__init__(response_filter, slice_data_retriever) self.cursor_field = cursor_field - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: + def extract_records( + self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[MutableMapping]: for record in records: item = record["data"]["object"] item[self.cursor_field] = record["created"] if record["type"].endswith(".deleted"): item["is_deleted"] = True if self._response_filter(item): - yield item + yield self._slice_data_retriever(item, stream_slice) class UpdatedCursorIncrementalRecordExtractor(DefaultRecordExtractor): - def __init__(self, cursor_field: str, legacy_cursor_field: Optional[str], response_filter: Optional[Callable] = None): - super().__init__(response_filter) + def __init__( + self, + cursor_field: str, + legacy_cursor_field: Optional[str], + response_filter: Optional[Callable] = None, + slice_data_retriever: Optional[Callable] = None, + ): + super().__init__(response_filter, slice_data_retriever) self.cursor_field = cursor_field self.legacy_cursor_field = legacy_cursor_field - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: - records = super().extract_records(records) + def extract_records( + self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[MutableMapping]: + records = super().extract_records(records, stream_slice) for record in records: if self.cursor_field in record: yield record @@ -126,13 +140,14 @@ def __init__( expand_items: Optional[List[str]] = None, extra_request_params: Optional[Union[Mapping[str, Any], Callable]] = None, response_filter: Optional[Callable] = None, + slice_data_retriever: Optional[Callable] = None, primary_key: Optional[str] = "id", **kwargs, ): self.account_id = account_id self.start_date = start_date self.slice_range = slice_range or self.DEFAULT_SLICE_RANGE - self._record_extractor = record_extractor or DefaultRecordExtractor(response_filter) + self._record_extractor = record_extractor or DefaultRecordExtractor(response_filter, slice_data_retriever) self._name = name self._path = path self._use_cache = use_cache @@ -174,7 +189,7 @@ def parse_response( stream_slice: Optional[Mapping[str, Any]] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Iterable[Mapping[str, Any]]: - yield from self.record_extractor.extract_records(response.json().get("data", [])) + yield from self.record_extractor.extract_records(response.json().get("data", []), stream_slice) def request_headers(self, **kwargs) -> Mapping[str, Any]: headers = {"Stripe-Version": STRIPE_API_VERSION} @@ -251,7 +266,8 @@ def stream_slices( def get_start_timestamp(self, stream_state) -> int: start_point = self.start_date - start_point = max(start_point, stream_state.get(self.cursor_field, 0)) + # we use +1 second because date range is inclusive + start_point = max(start_point, stream_state.get(self.cursor_field, 0) + 1) if start_point and self.lookback_window_days: self.logger.info(f"Applying lookback window of {self.lookback_window_days} days to stream {self.name}") @@ -471,111 +487,14 @@ def read_records( yield from self.parent_stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) -class CheckoutSessionsLineItems(CreatedCursorIncrementalStripeStream): - """ - API docs: https://stripe.com/docs/api/checkout/sessions/line_items - """ - - cursor_field = "checkout_session_expires_at" - - @property - def expand_items(self) -> Optional[List[str]]: - return ["data.discounts", "data.taxes"] - - @property - def checkout_session(self): - return UpdatedCursorIncrementalStripeStream( - name="checkout_sessions", - path="checkout/sessions", - use_cache=USE_CACHE, - legacy_cursor_field="expires_at", - event_types=[ - "checkout.session.async_payment_failed", - "checkout.session.async_payment_succeeded", - "checkout.session.completed", - "checkout.session.expired", - ], - authenticator=self.authenticator, - account_id=self.account_id, - start_date=self.start_date, - slice_range=self.slice_range, - ) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # https://stripe.com/docs/api/checkout/sessions/create#create_checkout_session-expires_at - # 'expires_at' - can be anywhere from 1 to 24 hours after Checkout Session creation. - # thus we should always add 1 day to lookback window to avoid possible checkout_sessions losses - self.lookback_window_days = self.lookback_window_days + 1 - - def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): - return f"checkout/sessions/{stream_slice['checkout_session_id']}/line_items" - - def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - ) -> MutableMapping[str, Any]: - # override to not refer to slice values - params = { - "limit": 100, - **self.extra_request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - } - if self.expand_items: - params["expand[]"] = self.expand_items - if next_page_token: - params.update(next_page_token) - return params - - def stream_slices( - self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None - ) -> Iterable[Optional[Mapping[str, Any]]]: - checkout_session_state = None - if stream_state: - checkout_session_state = {"expires_at": stream_state["checkout_session_expires_at"]} - for checkout_session in self.checkout_session.read_records( - sync_mode=SyncMode.full_refresh, stream_state=checkout_session_state, stream_slice={} - ): - yield { - "checkout_session_id": checkout_session["id"], - "expires_at": checkout_session["expires_at"], - } - - @property - def raise_on_http_errors(self): - return False - - def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: - if response.status_code == 404: - self.logger.warning(response.json()) - return - response.raise_for_status() - - response_json = response.json() - data = response_json.get("data", []) - if data and stream_slice: - self.logger.info(f"stream_slice: {stream_slice}") - cs_id = stream_slice.get("checkout_session_id", None) - cs_expires_at = stream_slice.get("expires_at", None) - for e in data: - e["checkout_session_id"] = cs_id - e["checkout_session_expires_at"] = cs_expires_at - yield from data - - class CustomerBalanceTransactions(StripeStream): """ API docs: https://stripe.com/docs/api/customer_balance_transactions/list """ - def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): - return f"customers/{stream_slice['id']}/balance_transactions" - - @property - def customers(self) -> IncrementalStripeStream: - return IncrementalStripeStream( + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.parent = IncrementalStripeStream( name="customers", path="customers", use_cache=USE_CACHE, @@ -585,13 +504,19 @@ def customers(self) -> IncrementalStripeStream: start_date=self.start_date, ) + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): + return f"customers/{stream_slice['id']}/balance_transactions" + + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() + def stream_slices( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: - parent_stream = self.customers - slices = parent_stream.stream_slices(sync_mode=SyncMode.full_refresh) + slices = self.parent.stream_slices(sync_mode=SyncMode.full_refresh) for _slice in slices: - for customer in parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice): + for customer in self.parent.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice): # we use `get` here because some attributes may not be returned by some API versions if customer.get("next_invoice_sequence") == 1 and customer.get("balance") == 0: # We're making this check in order to speed up a sync. if a customer's balance is 0 and there are no @@ -626,6 +551,12 @@ def __init__(self, **kwargs): def path(self, **kwargs) -> str: return "setup_attempts" + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + # we use the default http availability strategy here because parent stream may lack data in the incremental stream mode + # and this stream would be marked inaccessible which is not actually true + return HttpAvailabilityStrategy() + def stream_slices( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: @@ -665,6 +596,10 @@ def __init__(self, *args, **kwargs): parent = StripeStream(*args, name="accounts", path="accounts", use_cache=USE_CACHE, **kwargs) super().__init__(*args, parent=parent, **kwargs) + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): return f"accounts/{stream_slice['parent']['id']}/persons" @@ -676,7 +611,9 @@ def stream_slices( class StripeSubStream(StripeStream, HttpSubStream): - pass + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() class StripeLazySubStream(StripeStream, HttpSubStream): @@ -721,17 +658,6 @@ class StripeLazySubStream(StripeStream, HttpSubStream): } """ - @property - def add_parent_id(self) -> bool: - return self._add_parent_id - - @property - def parent_id(self) -> str: - """ - :return: string with attribute name - """ - return self._parent_id - @property def sub_items_attr(self) -> str: """ @@ -743,14 +669,10 @@ def sub_items_attr(self) -> str: def __init__( self, *args, - add_parent_id: bool = False, - parent_id: Optional[str] = None, sub_items_attr: Optional[str] = None, **kwargs, ): super().__init__(*args, **kwargs) - self._add_parent_id = add_parent_id - self._parent_id = parent_id self._sub_items_attr = sub_items_attr @property @@ -767,24 +689,16 @@ def request_params(self, stream_slice: Mapping[str, Any] = None, **kwargs): return params def read_records(self, sync_mode: SyncMode, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Mapping[str, Any]]: - parent_record = stream_slice["parent"] - items_obj = parent_record.get(self.sub_items_attr, {}) + items_obj = stream_slice["parent"].get(self.sub_items_attr, {}) if not items_obj: return - items = list(self.record_extractor.extract_records(items_obj.get("data", []))) - - # get next pages items_next_pages = [] + items = list(self.record_extractor.extract_records(items_obj.get("data", []), stream_slice)) if items_obj.get("has_more") and items: - stream_slice = {self.parent_id: parent_record["id"], "starting_after": items[-1]["id"]} + stream_slice = {"starting_after": items[-1]["id"], **stream_slice} items_next_pages = super().read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice, **kwargs) - - for item in chain(items, items_next_pages): - if self.add_parent_id: - # add reference to parent object when item doesn't have it already - item[self.parent_id] = parent_record["id"] - yield item + yield from chain(items, items_next_pages) class IncrementalStripeLazySubStreamSelector(IStreamSelector): @@ -797,6 +711,11 @@ def get_parent_stream(self, stream_state: Mapping[str, Any]) -> StripeStream: class UpdatedCursorIncrementalStripeLazySubStream(StripeStream, ABC): + """ + This stream uses StripeLazySubStream under the hood to run full refresh or initial incremental syncs. + In case of subsequent incremental syncs, it uses the UpdatedCursorIncrementalStripeStream class. + """ + def __init__( self, parent: StripeStream, @@ -804,8 +723,6 @@ def __init__( cursor_field: str = "updated", legacy_cursor_field: Optional[str] = "created", event_types: Optional[List[str]] = None, - parent_id: Optional[str] = None, - add_parent_id: bool = False, sub_items_attr: Optional[str] = None, response_filter: Optional[Callable] = None, **kwargs, @@ -823,8 +740,6 @@ def __init__( self.lazy_substream = StripeLazySubStream( *args, parent=parent, - parent_id=parent_id, - add_parent_id=add_parent_id, sub_items_attr=sub_items_attr, record_extractor=UpdatedCursorIncrementalRecordExtractor( cursor_field=cursor_field, legacy_cursor_field=legacy_cursor_field, response_filter=response_filter @@ -866,3 +781,62 @@ def read_records( yield from self.parent_stream.read_records( sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state ) + + +class ParentIncrementalStipeSubStream(StripeSubStream): + """ + This stream differs from others in that it runs parent stream in exactly same sync mode it is run itself to generate stream slices. + It also uses regular /v1 API endpoints to sync data no matter what the sync mode is. This means that the event-based API can only + be utilized by the parent stream. + """ + + @property + def cursor_field(self) -> str: + return self._cursor_field + + def __init__(self, cursor_field: str, *args, **kwargs): + self._cursor_field = cursor_field + super().__init__(*args, **kwargs) + + def stream_slices( + self, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None + ) -> Iterable[Optional[Mapping[str, Any]]]: + stream_state = stream_state or {} + if stream_state: + # state is shared between self and parent, but cursor fields are different + stream_state = {self.parent.cursor_field: stream_state.get(self.cursor_field, 0)} + parent_stream_slices = self.parent.stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=stream_state) + for stream_slice in parent_stream_slices: + parent_records = self.parent.read_records( + sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state + ) + for record in parent_records: + yield {"parent": record} + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + return {self.cursor_field: max(current_stream_state.get(self.cursor_field, 0), latest_record[self.cursor_field])} + + @property + def raise_on_http_errors(self) -> bool: + return False + + def parse_response(self, response: requests.Response, *args, **kwargs) -> Iterable[Mapping[str, Any]]: + if response.status_code == 200: + return super().parse_response(response, *args, **kwargs) + if response.status_code == 404: + # When running incremental sync with state, the returned parent object very likely will not contain sub-items + # as the events API does not support expandable items. Parent class will try getting sub-items from this object, + # then from its own API. In case there are no sub-items at all for this entity, API will raise 404 error. + self.logger.warning( + "Data was not found for URL: {response.request.url}. " + "If this is a path for getting child attributes like /v1/checkout/sessions//line_items when running " + "the incremental sync, you may safely ignore this warning." + ) + return [] + response.raise_for_status() + + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + # we use the default http availability strategy here because parent stream may lack data in the incremental stream mode + # and this stream would be marked inaccessible which is not actually true + return HttpAvailabilityStrategy() diff --git a/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py b/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py index 0f747acac434..ee41b71dd049 100644 --- a/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py +++ b/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py @@ -3,34 +3,45 @@ # import logging +import urllib.parse +import pytest from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy from source_stripe.availability_strategy import STRIPE_ERROR_CODES, StripeSubStreamAvailabilityStrategy from source_stripe.streams import IncrementalStripeStream, StripeLazySubStream -def test_traverse_over_substreams(mocker): +@pytest.fixture() +def stream_mock(mocker): + def _mocker(): + return mocker.Mock(stream_slices=mocker.Mock(return_value=[{}]), read_records=mocker.Mock(return_value=[{}])) + return _mocker + + +def test_traverse_over_substreams(stream_mock, mocker): # Mock base HttpAvailabilityStrategy to capture all the check_availability method calls - check_availability_mock = mocker.MagicMock() - check_availability_mock.return_value = (True, None) + check_availability_mock = mocker.MagicMock(return_value=(True, None)) + cdk_check_availability_mock = mocker.MagicMock(return_value=(True, None)) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock + ) + mocker.patch( + "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", cdk_check_availability_mock ) - # Prepare tree of nested objects - root = mocker.Mock() + root = stream_mock() root.availability_strategy = HttpAvailabilityStrategy() root.parent = None - child_1 = mocker.Mock() + child_1 = stream_mock() child_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1.parent = root - child_1_1 = mocker.Mock() + child_1_1 = stream_mock() child_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1.parent = child_1 - child_1_1_1 = mocker.Mock() + child_1_1_1 = stream_mock() child_1_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1_1.parent = child_1_1 @@ -38,39 +49,38 @@ def test_traverse_over_substreams(mocker): is_available, reason = child_1_1_1.availability_strategy.check_availability(child_1_1_1, mocker.Mock(), mocker.Mock()) assert is_available and reason is None - # Check availability strategy was called once for every nested object - assert check_availability_mock.call_count == 4 + assert check_availability_mock.call_count == 3 + assert cdk_check_availability_mock.call_count == 1 # Check each availability strategy was called with proper instance argument - assert id(check_availability_mock.call_args_list[0].args[0]) == id(root) - assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1) - assert id(check_availability_mock.call_args_list[2].args[0]) == id(child_1_1) - assert id(check_availability_mock.call_args_list[3].args[0]) == id(child_1_1_1) + assert id(cdk_check_availability_mock.call_args_list[0].args[0]) == id(root) + assert id(check_availability_mock.call_args_list[0].args[0]) == id(child_1) + assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1_1) + assert id(check_availability_mock.call_args_list[2].args[0]) == id(child_1_1_1) -def test_traverse_over_substreams_failure(mocker): +def test_traverse_over_substreams_failure(stream_mock, mocker): # Mock base HttpAvailabilityStrategy to capture all the check_availability method calls - check_availability_mock = mocker.MagicMock() - check_availability_mock.side_effect = [(True, None), (False, "child_1")] + check_availability_mock = mocker.MagicMock(side_effect=[(True, None), (False, "child_1")]) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock ) # Prepare tree of nested objects - root = mocker.Mock() + root = stream_mock() root.availability_strategy = HttpAvailabilityStrategy() root.parent = None - child_1 = mocker.Mock() + child_1 = stream_mock() child_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1.parent = root - child_1_1 = mocker.Mock() + child_1_1 = stream_mock() child_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1.parent = child_1 - child_1_1_1 = mocker.Mock() + child_1_1_1 = stream_mock() child_1_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1_1.parent = child_1_1 @@ -83,15 +93,15 @@ def test_traverse_over_substreams_failure(mocker): assert check_availability_mock.call_count == 2 # Check each availability strategy was called with proper instance argument - assert id(check_availability_mock.call_args_list[0].args[0]) == id(root) - assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1) + assert id(check_availability_mock.call_args_list[0].args[0]) == id(child_1) + assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1_1) def test_substream_availability(mocker, stream_by_name): check_availability_mock = mocker.MagicMock() check_availability_mock.return_value = (True, None) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock ) stream = stream_by_name("invoice_line_items") is_available, reason = stream.availability_strategy.check_availability(stream, mocker.Mock(), mocker.Mock()) @@ -106,7 +116,7 @@ def test_substream_availability_no_parent(mocker, stream_by_name): check_availability_mock = mocker.MagicMock() check_availability_mock.return_value = (True, None) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock ) stream = stream_by_name("invoice_line_items") stream.parent = None @@ -125,3 +135,91 @@ def test_403_error_handling(stream_by_name, requests_mock): available, message = stream.check_availability(logger) assert not available assert STRIPE_ERROR_CODES[error_code] in message + + +@pytest.mark.parametrize( + "stream_name, endpoints, expected_calls", + ( + ( + "accounts", + { + "/v1/accounts": {"data": []} + }, + 1 + ), + ( + "refunds", + { + "/v1/refunds": {"data": []} + }, + 2 + ), + ( + "credit_notes", + { + "/v1/credit_notes": {"data": []}, "/v1/events": {"data": []} + }, + 2 + ), + ( + "charges", + { + "/v1/charges": {"data": []}, "/v1/events": {"data": []} + }, + 2 + ), + ( + "subscription_items", + { + "/v1/subscriptions": {"data": [{"id": 1}]}, + "/v1/events": {"data": []} + }, + 3 + ), + ( + "bank_accounts", + { + "/v1/customers": {"data": [{"id": 1}]}, + "/v1/events": {"data": []} + }, + 2 + ), + ( + "customer_balance_transactions", + { + "/v1/events": {"data": [{"data":{"object": {"id": 1}}, "created": 1, "type": "customer.updated"}]}, + "/v1/customers": {"data": [{"id": 1}]}, + "/v1/customers/1/balance_transactions": {"data": []} + }, + 4 + ), + ( + "transfer_reversals", + { + "/v1/transfers": {"data": [{"id": 1}]}, + "/v1/events": {"data": [{"data":{"object": {"id": 1}}, "created": 1, "type": "transfer.updated"}]}, + "/v1/transfers/1/reversals": {"data": []} + }, + 4 + ), + ( + "persons", + { + "/v1/accounts": {"data": [{"id": 1}]}, + "/v1/events": {"data": []}, + "/v1/accounts/1/persons": {"data": []} + }, + 4 + ) + ) +) +def test_availability_strategy_visits_endpoints(stream_by_name, stream_name, endpoints, expected_calls, requests_mock, mocker, config): + for endpoint, data in endpoints.items(): + requests_mock.get(endpoint, json=data) + stream = stream_by_name(stream_name, config) + is_available, reason = stream.check_availability(mocker.Mock(), mocker.Mock()) + assert (is_available, reason) == (True, None) + assert len(requests_mock.request_history) == expected_calls + + for call in requests_mock.request_history: + assert urllib.parse.urlparse(call.url).path in endpoints.keys() diff --git a/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py b/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py index 61b226b5da83..476dbd38a689 100644 --- a/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +import datetime import logging from contextlib import nullcontext as does_not_raise from unittest.mock import patch @@ -10,7 +10,9 @@ import source_stripe import stripe from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode +from airbyte_cdk.sources.streams.call_rate import CachedLimiterSession, LimiterSession, Rate from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade +from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.utils import AirbyteTracedException from source_stripe import SourceStripe @@ -92,3 +94,57 @@ def test_when_streams_return_full_refresh_as_concurrent(): ).streams(_a_valid_config()) assert len(list(filter(lambda stream: isinstance(stream, StreamFacade), streams))) == 1 + + +@pytest.mark.parametrize( + "input_config, default_call_limit", + ( + ({"account_id": 1, "client_secret": "secret"}, 100), + ({"account_id": 1, "client_secret": "secret", "call_rate_limit": 10}, 10), + ({"account_id": 1, "client_secret": "secret", "call_rate_limit": 110}, 100), + ({"account_id": 1, "client_secret": "sk_test_some_secret"}, 25), + ({"account_id": 1, "client_secret": "sk_test_some_secret", "call_rate_limit": 10}, 10), + ({"account_id": 1, "client_secret": "sk_test_some_secret", "call_rate_limit": 30}, 25), + ), +) +def test_call_budget_creation(mocker, input_config, default_call_limit): + """Test that call_budget was created with specific config i.e., that first policy has specific matchers.""" + + policy_mock = mocker.patch("source_stripe.source.MovingWindowCallRatePolicy") + matcher_mock = mocker.patch("source_stripe.source.HttpRequestMatcher") + source = SourceStripe(catalog=None) + + source.get_api_call_budget(input_config) + + policy_mock.assert_has_calls( + calls=[ + mocker.call(matchers=[mocker.ANY, mocker.ANY], rates=[Rate(limit=20, interval=datetime.timedelta(seconds=1))]), + mocker.call(matchers=[], rates=[Rate(limit=default_call_limit, interval=datetime.timedelta(seconds=1))]), + ], + ) + + matcher_mock.assert_has_calls( + calls=[ + mocker.call(url="https://api.stripe.com/v1/files"), + mocker.call(url="https://api.stripe.com/v1/file_links"), + ] + ) + + +def test_call_budget_passed_to_every_stream(mocker): + """Test that each stream has call_budget passed and creates a proper session""" + + prod_config = {"account_id": 1, "client_secret": "secret"} + source = SourceStripe(catalog=None) + get_api_call_budget_mock = mocker.patch.object(source, "get_api_call_budget") + + streams = source.streams(prod_config) + + assert streams + get_api_call_budget_mock.assert_called_once() + + for stream in streams: + assert isinstance(stream, HttpStream) + session = stream.request_session() + assert isinstance(session, (CachedLimiterSession, LimiterSession)) + assert session._api_budget == get_api_call_budget_mock.return_value diff --git a/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py index 143331e06992..5f942b152157 100644 --- a/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py @@ -7,7 +7,7 @@ import freezegun import pendulum import pytest -from source_stripe.streams import CheckoutSessionsLineItems, CustomerBalanceTransactions, Persons, SetupAttempts +from source_stripe.streams import CustomerBalanceTransactions, Persons, SetupAttempts def read_from_stream(stream, sync_mode, state): @@ -170,40 +170,185 @@ def test_lazy_substream_data_is_filtered( assert record["object"] == expected_object -@freezegun.freeze_time("2023-08-23T15:00:15Z") -def test_created_cursor_incremental_stream(requests_mock, stream_by_name, config): - config["start_date"] = str(pendulum.now().subtract(months=23)) - stream = stream_by_name("balance_transactions", {"lookback_window_days": 14, **config}) - requests_mock.get( - "/v1/balance_transactions", - [ +balance_transactions_api_objects = [ + {"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": 435, "created": 1653299388, "status": "available"}, + {"id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", "object": "balance_transaction", "amount": -9164, "created": 1679568588, "status": "available"}, +] + + +refunds_api_objects = [ + { + "id": "re_3NYB8LAHLf1oYfwN3EZRDIfF", + "object": "refund", + "amount": 100, + "charge": "ch_3NYB8LAHLf1oYfwN3P6BxdKj", + "created": 1653299388, + "currency": "usd", + }, + { + "id": "re_Lf1oYfwN3EZRDIfF3NYB8LAH", + "object": "refund", + "amount": 15, + "charge": "ch_YfwN3P6BxdKj3NYB8LAHLf1o", + "created": 1679568588, + "currency": "eur", + }, +] + + +@pytest.mark.parametrize( + "requests_mock_map, expected_records, expected_slices, stream_name, sync_mode, state", + ( + ( { - "json": { - "data": [{"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": 435, "status": "available"}], - "has_more": False, - } + "/v1/balance_transactions": [ + { + "json": { + "data": [balance_transactions_api_objects[0]], + "has_more": False, + } + }, + { + "json": { + "data": [balance_transactions_api_objects[-1]], + "has_more": False, + } + }, + ], + }, + [ + { + "id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", + "object": "balance_transaction", + "amount": 435, + "created": 1653299388, + "status": "available", + }, + { + "id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", + "object": "balance_transaction", + "amount": -9164, + "created": 1679568588, + "status": "available", + }, + ], + [{"created[gte]": 1631199615, "created[lte]": 1662735615}, {"created[gte]": 1662735616, "created[lte]": 1692802815}], + "balance_transactions", + "full_refresh", + {}, + ), + ( + { + "/v1/balance_transactions": [ + { + "json": { + "data": [balance_transactions_api_objects[-1]], + "has_more": False, + } + }, + ], }, + [ + { + "id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", + "object": "balance_transaction", + "amount": -9164, + "created": 1679568588, + "status": "available", + }, + ], + [{"created[gte]": 1665308989, "created[lte]": 1692802815}], + "balance_transactions", + "incremental", + {"created": 1666518588}, + ), + ( { - "json": { - "data": [ - {"id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", "object": "balance_transaction", "amount": -9164, "status": "available"} - ], - "has_more": False, - } + "/v1/refunds": [ + { + "json": { + "data": [refunds_api_objects[0]], + "has_more": False, + } + }, + { + "json": { + "data": [refunds_api_objects[-1]], + "has_more": False, + } + }, + ], }, - ], - ) + [ + { + "id": "re_3NYB8LAHLf1oYfwN3EZRDIfF", + "object": "refund", + "amount": 100, + "charge": "ch_3NYB8LAHLf1oYfwN3P6BxdKj", + "created": 1653299388, + "currency": "usd", + }, + { + "id": "re_Lf1oYfwN3EZRDIfF3NYB8LAH", + "object": "refund", + "amount": 15, + "charge": "ch_YfwN3P6BxdKj3NYB8LAHLf1o", + "created": 1679568588, + "currency": "eur", + }, + ], + [{"created[gte]": 1631199615, "created[lte]": 1662735615}, {"created[gte]": 1662735616, "created[lte]": 1692802815}], + "refunds", + "full_refresh", + {}, + ), + ( + { + "/v1/refunds": [ + { + "json": { + "data": [refunds_api_objects[-1]], + "has_more": False, + } + }, + ], + }, + [ + { + "id": "re_Lf1oYfwN3EZRDIfF3NYB8LAH", + "object": "refund", + "amount": 15, + "charge": "ch_YfwN3P6BxdKj3NYB8LAHLf1o", + "created": 1679568588, + "currency": "eur", + } + ], + [{"created[gte]": 1665308989, "created[lte]": 1692802815}], + "refunds", + "incremental", + {"created": 1666518588}, + ), + ), +) +@freezegun.freeze_time("2023-08-23T15:00:15Z") +def test_created_cursor_incremental_stream( + requests_mock, requests_mock_map, stream_by_name, expected_records, expected_slices, stream_name, sync_mode, state, config +): + config["start_date"] = str(pendulum.now().subtract(months=23)) + stream = stream_by_name(stream_name, {"lookback_window_days": 14, **config}) + for url, response in requests_mock_map.items(): + requests_mock.get(url, response) - slices = list(stream.stream_slices("full_refresh")) - assert slices == [{"created[gte]": 1631199615, "created[lte]": 1662735615}, {"created[gte]": 1662735616, "created[lte]": 1692802815}] - records = [] + slices = list(stream.stream_slices(sync_mode, stream_state=state)) + assert slices == expected_slices + records = read_from_stream(stream, sync_mode, state) + assert records == expected_records + for record in records: + assert bool(record[stream.cursor_field]) + call_history = iter(requests_mock.request_history) for slice_ in slices: - for record in stream.read_records("full_refresh", stream_slice=slice_): - records.append(record) - assert records == [ - {"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": 435, "status": "available"}, - {"id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", "object": "balance_transaction", "amount": -9164, "status": "available"}, - ] + call = next(call_history) + assert urlencode(slice_) in call.url @pytest.mark.parametrize( @@ -213,8 +358,8 @@ def test_created_cursor_incremental_stream(requests_mock, stream_by_name, config ("2020-01-01T00:00:00Z", 14, 0, {}, "2019-12-18T00:00:00Z"), ("2020-01-01T00:00:00Z", 0, 30, {}, "2023-07-24T15:00:15Z"), ("2020-01-01T00:00:00Z", 14, 30, {}, "2023-07-24T15:00:15Z"), - ("2020-01-01T00:00:00Z", 0, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-17T00:00:00Z"), - ("2020-01-01T00:00:00Z", 14, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-03T00:00:00Z"), + ("2020-01-01T00:00:00Z", 0, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-17T00:00:01Z"), + ("2020-01-01T00:00:00Z", 14, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-03T00:00:01Z"), ("2020-01-01T00:00:00Z", 0, 30, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2023-07-24T15:00:15Z"), ("2020-01-01T00:00:00Z", 14, 30, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2023-07-24T15:00:15Z"), ), @@ -326,50 +471,6 @@ def test_updated_cursor_incremental_stream_read_w_state(requests_mock, stream_by assert records == [{"object": "credit_note", "invoice": "in_1K9GK0EcXtiJtvvhSo2LvGqT", "created": 1653341716, "updated": 1691629292}] -def test_checkout_session_line_items(requests_mock): - - session_id_missed = "cs_test_a165K4wNihuJlp2u3tknuohrvjAxyXFUB7nxZH3lwXRKJsadNEvIEWMUJ9" - session_id_exists = "cs_test_a1RjRHNyGUQOFVF3OkL8V8J0lZUASyVoCtsnZYG74VrBv3qz4245BLA1BP" - - response_sessions = { - "data": [{"id": session_id_missed, "expires_at": 100_000}, {"id": session_id_exists, "expires_at": 100_000}], - "has_more": False, - "object": "list", - "url": "/v1/checkout/sessions", - } - - response_sessions_line_items = { - "data": [{"id": "li_1JpAUUIEn5WyEQxnfGJT5MbL"}], - "has_more": False, - "object": "list", - "url": "/v1/checkout/sessions/{}/line_items".format(session_id_exists), - } - - response_error = { - "error": { - "code": "resource_missing", - "doc_url": "https://stripe.com/docs/error-codes/resource-missing", - "message": "No such checkout session: '{}'".format(session_id_missed), - "param": "session", - "type": "invalid_request_error", - } - } - - requests_mock.get("https://api.stripe.com/v1/checkout/sessions", json=response_sessions) - requests_mock.get( - "https://api.stripe.com/v1/checkout/sessions/{}/line_items".format(session_id_exists), json=response_sessions_line_items - ) - requests_mock.get( - "https://api.stripe.com/v1/checkout/sessions/{}/line_items".format(session_id_missed), json=response_error, status_code=404 - ) - - stream = CheckoutSessionsLineItems(start_date=100_100, account_id=None) - records = [] - for slice_ in stream.stream_slices(sync_mode="full_refresh"): - records.extend(stream.read_records(sync_mode="full_refresh", stream_slice=slice_)) - assert len(records) == 1 - - def test_customer_balance_transactions_stream_slices(requests_mock, stream_args): stream_args["start_date"] = pendulum.now().subtract(days=1).int_timestamp requests_mock.get( @@ -627,7 +728,6 @@ def test_subscription_items_extra_request_params(requests_mock, stream_by_name, "livemode": False, } ], - "has_more": False, }, ) requests_mock.get( @@ -668,3 +768,333 @@ def test_subscription_items_extra_request_params(requests_mock, stream_by_name, ] assert len(requests_mock.request_history) == 2 assert "subscription=sub_1OApco2eZvKYlo2CEDCzwLrE" in requests_mock.request_history[-1].url + + +checkout_session_api_response = { + "/v1/checkout/sessions": { + "object": "list", + "url": "/v1/checkout/sessions", + "has_more": False, + "data": [ + { + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "object": "checkout.session", + "created": 1699647441, + "expires_at": 1699647441, + "payment_intent": "pi_1Gt0KQ2eZvKYlo2CeWXUgmhy", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_1OB18o2eZvKYlo2CObYam50U", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + }, + { + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "object": "checkout.session", + "created": 1699744164, + "expires_at": 1699644174, + "payment_intent": "pi_lo2CeWXUgmhy1Gt0KQ2eZvKY", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_KYlo2CObYam50U1OB18o2eZv", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + }, + ], + } +} + + +checkout_session_line_items_api_response = { + "/v1/checkout/sessions/cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre/line_items": { + "object": "list", + "has_more": False, + "data": [ + { + "id": "li_1OB18o2eZvKYlo2CObYam50U", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + "link": "/v1/checkout/sessions/cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre/line_items", + }, + "/v1/checkout/sessions/cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi/line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions/cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi/line_items", + "data": [ + { + "id": "li_KYlo2CObYam50U1OB18o2eZv", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, +} + + +checkout_session_events_response = { + "/v1/events": { + "data": [ + { + "id": "evt_1NdNFoEcXtiJtvvhBP5mxQmL", + "object": "event", + "api_version": "2020-08-27", + "created": 1699902016, + "data": { + "object": { + "object": "checkout_session", + "checkout_session": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "created": 1653341716, + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "expires_at": 1692896410, + } + }, + "type": "checkout.session.completed", + }, + { + "id": "evt_XtiJtvvhBP5mxQmL1NdNFoEc", + "object": "event", + "api_version": "2020-08-27", + "created": 1699901630, + "data": { + "object": { + "object": "checkout_session", + "checkout_session": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "created": 1653341716, + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "expires_at": 1692896410, + } + }, + "type": "checkout.session.completed", + }, + ], + "has_more": False, + }, +} + + +@pytest.mark.parametrize( + "requests_mock_map, stream_name, sync_mode, state, expected_slices", + ( + ( + checkout_session_api_response, + "checkout_sessions_line_items", + "full_refresh", + {}, + [ + { + "parent": { + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "object": "checkout.session", + "created": 1699647441, + "updated": 1699647441, + "expires_at": 1699647441, + "payment_intent": "pi_1Gt0KQ2eZvKYlo2CeWXUgmhy", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_1OB18o2eZvKYlo2CObYam50U", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + } + }, + { + "parent": { + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "object": "checkout.session", + "created": 1699744164, + "updated": 1699744164, + "expires_at": 1699644174, + "payment_intent": "pi_lo2CeWXUgmhy1Gt0KQ2eZvKY", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_KYlo2CObYam50U1OB18o2eZv", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + } + }, + ], + ), + ( + checkout_session_events_response, + "checkout_sessions_line_items", + "incremental", + {"checkout_session_updated": 1685898010}, + [ + { + "parent": { + "object": "checkout_session", + "checkout_session": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "created": 1653341716, + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "expires_at": 1692896410, + "updated": 1699902016, + } + }, + { + "parent": { + "object": "checkout_session", + "checkout_session": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "created": 1653341716, + "updated": 1699901630, + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "expires_at": 1692896410, + } + }, + ], + ), + ), +) +@freezegun.freeze_time("2023-08-23T15:00:15") +def test_parent_incremental_substream_stream_slices( + requests_mock, requests_mock_map, stream_by_name, stream_name, sync_mode, state, expected_slices +): + for url, response in requests_mock_map.items(): + requests_mock.get(url, json=response) + + stream = stream_by_name(stream_name) + slices = stream.stream_slices(sync_mode, stream_state=state) + assert list(slices) == expected_slices + + +checkout_session_line_items_slice_to_record_data_map = { + "id": "checkout_session_id", + "expires_at": "checkout_session_expires_at", + "created": "checkout_session_created", + "updated": "checkout_session_updated", +} + + +@pytest.mark.parametrize( + "requests_mock_map, stream_name, sync_mode, state, mapped_fields", + ( + ( + {**checkout_session_api_response, **checkout_session_line_items_api_response}, + "checkout_sessions_line_items", + "full_refresh", + {}, + checkout_session_line_items_slice_to_record_data_map, + ), + ( + {**checkout_session_events_response, **checkout_session_line_items_api_response}, + "checkout_sessions_line_items", + "incremental", + {"checkout_session_updated": 1685898010}, + checkout_session_line_items_slice_to_record_data_map, + ), + ), +) +def test_parent_incremental_substream_records_contain_data_from_slice( + requests_mock, requests_mock_map, stream_by_name, stream_name, sync_mode, state, mapped_fields +): + for url, response in requests_mock_map.items(): + requests_mock.get(url, json=response) + + stream = stream_by_name(stream_name) + for slice_ in stream.stream_slices(sync_mode, stream_state=state): + for record in stream.read_records(sync_mode, stream_slice=slice_, stream_state=state): + for key, value in mapped_fields.items(): + assert slice_["parent"][key] == record[value] + + +@pytest.mark.parametrize( + "requests_mock_map, stream_name, state", + ( + ( + { + "/v1/events": ( + { + "data": [ + { + "id": "evt_1NdNFoEcXtiJtvvhBP5mxQmL", + "object": "event", + "api_version": "2020-08-27", + "created": 1699902016, + "data": { + "object": { + "object": "checkout_session", + "checkout_session": "cs_1K9GK0EcXtiJtvvhSo2LvGqT", + "created": 1653341716, + "id": "cs_1K9GK0EcXtiJtvvhSo2LvGqT", + "expires_at": 1692896410, + } + }, + "type": "checkout.session.completed", + } + ], + "has_more": False, + }, + 200, + ), + "/v1/checkout/sessions/cs_1K9GK0EcXtiJtvvhSo2LvGqT/line_items": ({}, 404), + }, + "checkout_sessions_line_items", + {"checkout_session_updated": 1686934810}, + ), + ), +) +@freezegun.freeze_time("2023-08-23T15:00:15") +def test_parent_incremental_substream_handles_404(requests_mock, requests_mock_map, stream_by_name, stream_name, state, caplog): + for url, (response, status) in requests_mock_map.items(): + requests_mock.get(url, json=response, status_code=status) + + stream = stream_by_name(stream_name) + records = read_from_stream(stream, "incremental", state) + assert records == [] + assert "Data was not found for URL" in caplog.text diff --git a/build.gradle b/build.gradle index 99ea87e6f74c..dd81f11462cb 100644 --- a/build.gradle +++ b/build.gradle @@ -332,6 +332,8 @@ subprojects { subproj -> // Effectively disable JUnit concurrency by running tests in only one thread by default. systemProperty 'junit.jupiter.execution.parallel.config.strategy', 'fixed' systemProperty 'junit.jupiter.execution.parallel.config.fixed.parallelism', 1 + // Order test classes by annotation. + systemProperty 'junit.jupiter.testclass.order.default', 'org.junit.jupiter.api.ClassOrderer$OrderAnnotation' if (!subproj.hasProperty('testExecutionConcurrency')) { // By default, let gradle spawn as many independent workers as it wants. diff --git a/buildSrc/src/main/groovy/airbyte-java-connector.gradle b/buildSrc/src/main/groovy/airbyte-java-connector.gradle index 7c86cf297748..9d8a60ed88c8 100644 --- a/buildSrc/src/main/groovy/airbyte-java-connector.gradle +++ b/buildSrc/src/main/groovy/airbyte-java-connector.gradle @@ -5,11 +5,10 @@ Also facilitates importing and working with the Java CDK. import org.gradle.api.Plugin import org.gradle.api.Project -import org.gradle.api.tasks.testing.Test class AirbyteJavaConnectorExtension { - boolean useLocalCdk = true + boolean useLocalCdk String cdkVersionRequired List features = [] // e.g. 'db-sources', 'db-destinations' Project project @@ -18,67 +17,56 @@ class AirbyteJavaConnectorExtension { this.project = project } + void setUseLocalCdk(boolean useLocalCdk) { + this.useLocalCdk = useLocalCdk + addCdkDependencies() + } + + static final List IMPLEMENTATION = [ + 'airbyte-commons', + 'airbyte-json-validation', + 'airbyte-commons-cli', + 'airbyte-api', + 'config-models-oss', + 'init-oss', + ] + + static final List TEST_IMPLEMENTATION = [ + 'airbyte-commons', + 'airbyte-json-validation', + 'airbyte-api', + 'config-models-oss', + ] + + static final List INTEGRATION_TEST_IMPLEMENTATION = [ + 'config-models-oss', + 'init-oss', + 'acceptance-test-harness', + ] + void addCdkDependencies() { - // Create a list of CDK submodules to import - def submoduleNames = ['core'] - features.each { feature -> - submoduleNames.add(feature) - } - if (useLocalCdk) { - project.dependencies { - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-json-validation') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons-cli') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-api') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:config-models-oss') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:init-oss') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-json-validation') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-api') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:config-models-oss') - integrationTestJavaImplementation project.project(':airbyte-cdk:java:airbyte-cdk:config-models-oss') - integrationTestJavaImplementation project.project(':airbyte-cdk:java:airbyte-cdk:init-oss') - integrationTestJavaImplementation project.project(':airbyte-cdk:java:airbyte-cdk:acceptance-test-harness') - submoduleNames.each { submoduleName -> - // Add the CDK module to the dependencies - def cdkModule = project.project(":airbyte-cdk:java:airbyte-cdk:${submoduleName}") - def testFixturesRef = testFixtures(project.project(":airbyte-cdk:java:airbyte-cdk:${submoduleName}")) - implementation cdkModule - testImplementation cdkModule - testImplementation testFixturesRef - integrationTestJavaImplementation cdkModule - integrationTestJavaImplementation testFixturesRef - performanceTestJavaImplementation cdkModule - performanceTestJavaImplementation testFixturesRef - } + def projectName = { ":airbyte-cdk:java:airbyte-cdk:${it}" } + def jarName = { "io.airbyte.cdk:airbyte-cdk-${it}:${cdkVersionRequired}" } + project.dependencies { + def dep = { useLocalCdk ? project.project(projectName(it)) : jarName(it) } + def testFixturesDep = { useLocalCdk ? testFixtures(project.project(projectName(it))) : "${jarName(it)}:test-fixtures" } + + IMPLEMENTATION.each { + implementation dep(it) + testFixturesImplementation dep(it) } - } else { - project.dependencies { - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-commons:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-json-validation:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-commons-cli:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-api:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-config-models-oss:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-init-oss:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-airbyte-commons:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-airbyte-json-validation:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-airbyte-api:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-config-models-oss:${cdkVersionRequired}" - integrationTestJavaImplementation "io.airbyte.cdk:airbyte-cdk-config-models-oss:${cdkVersionRequired}" - integrationTestJavaImplementation "io.airbyte.cdk:airbyte-cdk-init-oss:${cdkVersionRequired}" - integrationTestJavaImplementation "io.airbyte.cdk:airbyte-cdk-acceptance-test-harness:${cdkVersionRequired}" - submoduleNames.each { submoduleName -> - // Add the cdkModule to the dependencies - def cdkModule = "io.airbyte.cdk:airbyte-cdk-${submoduleName}:${cdkVersionRequired}" - def testFixturesRef = "io.airbyte.cdk:airbyte-cdk-${submoduleName}:${cdkVersionRequired}:test-fixtures" - implementation cdkModule - testImplementation cdkModule - testImplementation testFixturesRef - integrationTestJavaImplementation cdkModule - integrationTestJavaImplementation testFixturesRef - performanceTestJavaImplementation cdkModule - performanceTestJavaImplementation testFixturesRef - } + TEST_IMPLEMENTATION.each {testImplementation dep(it) } + INTEGRATION_TEST_IMPLEMENTATION.each {integrationTestJavaImplementation dep(it) } + (["core"] + features).each { + implementation dep(it) + testFixturesImplementation dep(it) + testFixturesImplementation testFixturesDep(it) + testImplementation dep(it) + testImplementation testFixturesDep(it) + integrationTestJavaImplementation dep(it) + integrationTestJavaImplementation testFixturesDep(it) + performanceTestJavaImplementation dep(it) + performanceTestJavaImplementation testFixturesDep(it) } } } @@ -89,24 +77,25 @@ class AirbyteJavaConnectorPlugin implements Plugin { @Override void apply(Project project) { - // def cdkTargetVersion = project.ext.getCdkTargetVersion(project) - def extension = project.extensions.create('airbyteJavaConnector', AirbyteJavaConnectorExtension, project) + project.plugins.apply('java-test-fixtures') project.plugins.apply(AirbyteIntegrationTestJavaPlugin) project.plugins.apply(AirbytePerformanceTestJavaPlugin) + project.configurations { + testFixturesImplementation.extendsFrom implementation + testFixturesRuntimeOnly.extendsFrom runtimeOnly + } + project.dependencies { // Integration and performance tests should automatically // have access to the project's own main source sets. integrationTestJavaImplementation project + integrationTestJavaImplementation testFixtures(project) performanceTestJavaImplementation project + performanceTestJavaImplementation testFixtures(project) } - // TODO: figure out how to make this work. - // Currently fails with error: - // "Cannot change dependencies of dependency configuration '...' after it has been included in dependency resolution." - // project.afterEvaluate(proj -> { - // extension.addCdkDependencies(); - // }); + project.extensions.create('airbyteJavaConnector', AirbyteJavaConnectorExtension, project) } } diff --git a/docs/.gitbook/assets/explore_logs.png b/docs/.gitbook/assets/explore_logs.png deleted file mode 100644 index 98d159e8af7a..000000000000 Binary files a/docs/.gitbook/assets/explore_logs.png and /dev/null differ diff --git a/docs/archive/changelog/README.md b/docs/archive/changelog/README.md deleted file mode 100644 index cc854f303e60..000000000000 --- a/docs/archive/changelog/README.md +++ /dev/null @@ -1,645 +0,0 @@ -# Changelog - -## 1/28/2022 Summary - -* New Source: Chartmogul (contributyed by Titas Skrebė) -* New Source: Hellobaton (contributed by Daniel Luftspring) -* New Source: Flexport (contributed by Juozas) -* New Source: PersistIq (contributed by Wadii Zaim) - -* ✨ Postgres Source: Users can now select which schemas they wish to sync before discovery. This makes the discovery stage for large instances much more performant. -* ✨ Shopify Source: Now verifies permissions on the token before accessing resources. -* ✨ Snowflake Destination: Users now have access to an option to purge their staging data. -* ✨ HubSpot Source: Added some more fields for the email_events stream. -* ✨ Amazon Seller Partner Source: Added the GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL report stream. (contributed by @ron-damon) -* ✨ HubSpot Source: Added the form_submission and property_history streams. - -* 🐛 DynamoDB Destination: The parameter dynamodb_table_name is now named dynamodb_table_name_prefix to more accurately represent it. -* 🐛 Intercom Source: The handling of scroll param is now fixed when it is expired. -* 🐛 S3 + GCS Destinations: Now support arrays with unknown item type. -* 🐛 Postgres Source: Now supports handling of the Java SQL date type. -* 🐛 Salesforce Source: No longer fails during schema generation. - -## 1/13/2022 Summary - -⚠️ WARNING ⚠️ - -Snowflake Source: Normalization with Snowflake now produces permanent tables. [If you want to continue creating transient tables, you will need to create a new transient database for Airbyte.] - -* ✨ GitHub Source: PR related streams now support incremental sync. -* ✨ HubSpot Source: We now support ListMemberships in the Contacts stream. -* ✨ Azure Blob Storage Destination: Now has the option to add a BufferedOutputStream to improve performance and fix writing data with over 50GB in a stream. (contributed by @bmatticus) - -* 🐛 Normalization partitioning now works as expected with FLOAT64 and BigQuery. -* 🐛 Normalization now works properly with quoted and case sensitive columns. -* 🐛 Source MSSQL: Added support for some missing data types. -* 🐛 Snowflake Destination: Schema is now not created if it previously exists. -* 🐛 Postgres Source: Now properly reads materialized views. -* 🐛 Delighted Source: Pagination for survey_responses, bounces and unsubscribes streams now works as expected. -* 🐛 Google Search Console Source: Incremental sync now works as expected. -* 🐛 Recurly Source: Now does not load all accounts when importing account coupon redemptions. -* 🐛 Salesforce Source: Now properly handles 400 when streams don't support query or queryAll. - -## 1/6/2022 Summary - -* New Source: 3PL Central (contributed by Juozas) -* New Source: My Hours (contributed by Wisse Jelgersma) -* New Source: Qualaroo (contributed by gunu) -* New Source: SearchMetrics - -* 💎 Salesforce Source: Now supports filtering streams at configuration, making it easier to handle large Salesforce instances. -* 💎 Snowflake Destination: Now supports byte-buffering for staged inserts. -* 💎 Redshift Destination: Now supports byte-buffering for staged inserts. -* ✨ Postgres Source: Now supports all Postgres 14 types. -* ✨ Recurly Source: Now supports incremental sync for all streams. -* ✨ Zendesk Support Source: Added the Brands, CustomRoles, and Schedules streams. -* ✨ Zendesk Support Source: Now uses cursor-based pagination. -* ✨ Kustomer Source: Setup configuration is now more straightforward. -* ✨ Hubspot Source: Now supports incremental sync on all streams where possible. -* ✨ Facebook Marketing Source: Fixed schema for breakdowns fields. -* ✨ Facebook Marketing Source: Added asset_feed_spec to AdCreatives stream. -* ✨ Redshift Destination: Now has an option to toggle the deletion of staging data. - -* 🐛 S3 Destination: Avro and Parquet formats are now processed correctly. -* 🐛 Snowflake Destination: Fixed SQL Compliation error. -* 🐛 Kafka Source: SASL configurations no longer throw null pointer exceptions (contributed by Nitesh Kumar) -* 🐛 Salesforce Source: Now throws a 400 for non-queryable streams. -* 🐛 Amazon Ads Source: Polling for report generation is now much more resilient. (contributed by Juozas) -* 🐛 Jira Source: The filters stream now works as expected. -* 🐛 BigQuery Destination: You can now properly configure the buffer size with the part_size config field. -* 🐛 Snowflake Destination: You can now properly configure the buffer size with the part_size config field. -* 🐛 CockroachDB Source: Now correctly only discovers tables the user has permission to access. -* 🐛 Stripe Source: The date and arrival_date fields are now typed correctly. - -## 12/16/2021 Summary - -🎉 First off... There's a brand new CDK! Menno Hamburg contributed a .NET/C# implementation for our CDK, allowing you to write HTTP API sources and Generic Dotnet sources. Thank you so much Menno, this is huge! - -* New Source: OpenWeather -* New Destination: ClickHouse (contributed by @Bo) -* New Destination: RabbitMQ (contributed by @Luis Gomez) -* New Destination: Amazon SQS (contributed by @Alasdair Brown) -* New Destination: Rockset (contributed by @Steve Baldwin) - -* ✨ Facebook Marketing Source: Updated the campaign schema with more relevant fields. (contributed by @Maxime Lavoie) -* ✨ TikTok Marketing Source: Now supports the Basic Report stream. -* ✨ MySQL Source: Now supports all MySQL 8.0 data types. -* ✨ Klaviyo Source: Improved performance, added incremental sync support to the Global Exclusions stream. -* ✨ Redshift Destination: You can now specify a bucket path to stage your data in before inserting. -* ✨ Kubernetes deployments: Sidecar memory is now 25Mi, up from 6Mi to cover all usage cases. -* ✨ Kubernetes deployments: The Helm chart can now set up writing logs to S3 easily. (contributed by @Valentin Nourdin) - -* 🐛 Python CDK: Now shows the stack trace of unhandled exceptions. -* 🐛 Google Analytics Source: Fix data window input validation, fix date type conversion. -* 🐛 Google Ads Source: Data from the end_date for syncs is now included in a sync. -* 🐛 Marketo Source: Fixed issues around input type conversion and conformation to the schema. -* 🐛 Mailchimp Source: Fixed schema conversion error causing sync failures. -* 🐛 PayPal Transactions Source: Now reports full error message details on failure. -* 🐛 Shopify Source: Normalization now works as expected. - -## 12/9/2021 Summary - -⚠️ WARNING ⚠️ - -v0.33.0 is a minor version with breaking changes. Take the normal precautions with upgrading safely to this version. -v0.33.0 has a bug that affects GCS logs on Kubernetes. Upgrade straight to v0.33.2 if you are running a K8s deployment of Airbyte. - -* New Source: Mailgun - -🎉 Snowflake Destination: You can now stage your inserts, making them much faster. - -* ✨ Google Ads Source: Source configuration is now more clear. -* ✨ Google Analytics Source: Source configuration is now more clear. -* ✨ S3 Destination: You can now write timestamps in Avro and Parquet formats. -* ✨ BigQuery & BigQuery Denormalized Destinations: Now use byte-based buffering for batch inserts. -* ✨ Iterable Source: Now has email validation on the list_users stream. - -* 🐛 Incremental normalization now works properly with empty tables. -* 🐛 LinkedIn Ads Source: 429 response is now properly handled. -* 🐛 Intercom Source: Now handles failed pagination requests with backoffs. -* 🐛 Intercom Source: No longer drops records from the conversation stream. -* 🐛 Google Analytics Source: 400 errors no longer get ignored with custom reports. -* 🐛 Marketo Source: The createdAt and updatedAt fields are now formatted correctly. - -## 12/2/2021 Summary - -🎃 **Hacktoberfest Submissions** 🎃 ------------------------------------------ -* New Destination: Redis (contributed by @Ivica Taseski) -* New Destination: MQTT (contributed by @Mario Molina) -* New Destination: Google Firestore (contributed by @Adam Dobrawy) -* New Destination: Kinesis (contributed by @Ivica Taseski) -* New Source: Zenloop (contributed by @Alexander Batoulis) -* New Source: Outreach (contributed by @Luis Gomez) - -* ✨ Zendesk Source: The chats stream now supports incremental sync and added testing for all streams. -* 🐛 Monday Source: Pagination now works as expected and the schema has been fixed. -* 🐛 Postgres Source: Views are now properly listed during schema discovery. -* 🐛 Postgres Source: Using the money type with an amount greater than 1000 works properly now. -* 🐛 Google Search Console Search: We now set a default end_data value. -* 🐛 Mixpanel Source: Normalization now works as expected and streams are now displayed properly in the UI. -* 🐛 MongoDB Source: The DATE_TIME type now uses milliseconds. - -## 11/25/2021 Summary -Hey Airbyte Community! Let's go over all the changes from v.32.5 and prior! - -🎃 **Hacktoberfest Submissions** 🎃 -* New Source: Airtable (contributed by Tuan Nguyen). -* New Source: Notion (contributed by Bo Lu). -* New Source: Pardot (contributed by Tuan Nguyen). - -* New Source: Youtube analytics. - -* ✨ Source Exchange Rates: add ignore_weekends option. -* ✨ Source Facebook: add the videos stream. -* ✨ Source Freshdesk: removed the limitation in streams pagination. -* ✨ Source Jira: add option to render fields in HTML format. -* ✨ Source MongoDB v2: improve read performance. -* ✨ Source Pipedrive: specify schema for "persons" stream. -* ✨ Source PostgreSQL: exclude tables on which user doesn't have select privileges. -* ✨ Source SurveyMonkey: improve connection check. - -* 🐛 Source Salesforce: improve resiliency of async bulk jobs. -* 🐛 Source Zendesk Support: fix missing ticket_id in ticket_comments stream. -* 🐛 Normalization: optimize incremental normalization runtime with Snowflake. - -As usual, thank you so much to our wonderful contributors this week that have made Airbyte into what it is today: Madison Swain-Bowden, Tuan Nguyen, Bo Lu, Adam Dobrawy, Christopher Wu, Luis Gomez, Ivica Taseski, Mario Molina, Ping Yee, Koji Matsumoto, Sujit Sagar, Shadab, Juozas V.([Labanoras Tech](http://labanoras.io)) and Serhii Chvaliuk! - -## 11/17/2021 Summary - -Hey Airbyte Community! Let's go over all the changes from v.32.1 and prior! But first, there's an important announcement I need to make about upgrading Airbyte to v.32.1. - -⚠️ WARNING ⚠️ -Upgrading to v.32.0 is equivalent to a major version bump. If your current version is v.32.0, you must upgrade to v.32.0 first before upgrading to any later version - -Keep in mind that this upgrade requires your all of your connector Specs to be retrievable, or Airbyte will fail on startup. You can force delete your connector Specs by setting the `VERSION_0_32_0_FORCE_UPGRADE` environment variable to `true`. Steps to specifically check out v.32.0 and details around this breaking change can be found [here](https://docs.airbyte.com/operator-guides/upgrading-airbyte/#mandatory-intermediate-upgrade). - -*Now back to our regularly scheduled programming.* - -🎃 Hacktoberfest Submissions 🎃 - -* New Destination: ScyllaDB (contributed by Ivica Taseski) -* New Source: Azure Table Storage (contributed by geekwhocodes) -* New Source: Linnworks (contributed by Juozas V.([Labanoras Tech](http://labanoras.io))) - -* ✨ Source MySQL: Now has basic performance tests. -* ✨ Source Salesforce: We now automatically transform and handle incorrect data for the anyType and calculated types. - -* 🐛 IBM Db2 Source: Now handles conversion from DECFLOAT to BigDecimal correctly. -* 🐛 MSSQL Source: Now handles VARBINARY correctly. -* 🐛 CockroachDB Source: Improved parsing of various data types. - -As usual, thank you so much to our wonderful contributors this week that have made Airbyte into what it is today: Achmad Syarif Hidayatullah, Tuan Nguyen, Ivica Taseski, Hai To, Juozas, gunu, Shadab, Per-Victor Persson, and Harsha Teja Kanna! - -## 11/11/2021 Summary - -Time to go over changes from v.30.39! And... let's get another update on Hacktoberfest. - -🎃 Hacktoberfest Submissions 🎃 - -* New Destination: Cassandra (contributed by Ivica Taseski) -* New Destination: Pulsar (contributed by Mario Molina) -* New Source: Confluence (contributed by Tuan Nguyen) -* New Source: Monday (contributed by Tuan Nguyen) -* New Source: Commerce Tools (contributed by James Wilson) -* New Source: Pinterest Marketing (contributed by us!) - -* ✨ Shopify Source: Now supports the FulfillmentOrders and Fulfillments streams. -* ✨ Greenhouse Source: Now supports the Demographics stream. -* ✨ Recharge Source: Broken requests should now be re-requested with improved backoff. -* ✨ Stripe Source: Now supports the checkout_sessions, checkout_sessions_line_item, and promotion_codes streams. -* ✨ Db2 Source: Now supports SSL. - -* 🐛 We've made some updates to incremental normalization to fix some outstanding issues. [Details](https://github.com/airbytehq/airbyte/pull/7669) -* 🐛 Airbyte Server no longer crashes due to too many open files. -* 🐛 MSSQL Source: Data type conversion with smalldatetime and smallmoney works correctly now. -* 🐛 Salesforce Source: anyType fields can now be retrieved properly with the BULK API -* 🐛 BigQuery-Denormalized Destination: Fixed JSON parsing with $ref fields. - -As usual, thank you to our awesome contributors that have done awesome work during the last week: Tuan Nguyen, Harsha Teja Kanna, Aaditya S, James Wilson, Vladimir Remar, Yuhui Shi, Mario Molina, Ivica Taseski, Collin Scangarella, and haoranyu! - -## 11/03/2021 Summary - -It's patch notes time. Let's go over the changes from 0.30.24 and before. But before we do, let's get a quick update on how Hacktober is going! - -🎃 Hacktoberfest Submissions 🎃 - -* New Destination: Elasticsearch (contributed by Jeremy Branham) -* New Source: Salesloft (contributed by Pras) -* New Source: OneSignal (contributed by Bo) -* New Source: Strava (contributed by terencecho) -* New Source: Lemlist (contributed by Igli Koxha) -* New Source: Amazon SQS (contributed by Alasdair Brown) -* New Source: Freshservices (contributed by Tuan Nguyen) -* New Source: Freshsales (contributed by Tuan Nguyen) -* New Source: Appsflyer (contributed by Achmad Syarif Hidayatullah) -* New Source: Paystack (contributed by Foluso Ogunlana) -* New Source: Sentry (contributed by koji matsumoto) -* New Source: Retently (contributed by Subhash Gopalakrishnan) -* New Source: Delighted! (contributed by Rodrigo Parra) - -with 18 more currently in review... - -🎉 **Incremental Normalization is here!** 🎉 - -💎 Basic normalization no longer runs on already normalized data, making it way faster and cheaper. :gem: - -🎉 **Airbyte Compiles on M1 Macs!** - -Airbyte developers with M1 chips in their MacBooks can now compile the project and run the server. This is a major step towards being able to fully run Airbyte on M1. (contributed by Harsha Teja Kanna) - -* ✨ BigQuery Destination: You can now run transformations in batches, preventing queries from hitting BigQuery limits. (contributed by Andrés Bravo) -* ✨ S3 Source: Memory and Performance optimizations, also some fancy new PyArrow CSV configuration options. -* ✨ Zuora Source: Now supports Unlimited as an option for the Data Query Live API. -* ✨ Clickhouse Source: Now supports SSL and connection via SSH tunneling. - -* 🐛 Oracle Source: Now handles the LONG RAW data type correctly. -* 🐛 Snowflake Source: Fixed parsing of extreme values for FLOAT and NUMBER data types. -* 🐛 Hubspot Source: No longer fails due to lengthy URI/URLs. -* 🐛 Zendesk Source: The chats stream now pulls data past the first page. -* 🐛 Jira Source: Normalization now works as expected. - -As usual, thank you to our awesome contributors that have done awesome work during this productive spooky season: Tuan Nguyen, Achmad Syarif Hidayatullah, Christopher Wu, Andrés Bravo, Harsha Teja Kanna, Collin Scangarella, haoranyu, koji matsumoto, Subhash Gopalakrishnan, Jeremy Branham, Rodrigo Parra, Foluso Ogunlana, EdBizarro, Gergely Lendvai, Rodeoclash, terencecho, Igli Koxha, Alasdair Brown, bbugh, Pras, Bo, Xiangxuan Liu, Hai To, s-mawjee, Mario Molina, SamyPesse, Yuhui Shi, Maciej Nędza, Matt Hoag, and denis-sokolov! - -## 10/20/2021 Summary - -It's patch notes time! Let's go over changes from 0.30.16! But before we do... I want to remind everyone that Airbyte Hacktoberfest is currently taking place! For every connector that is merged into our codebase, you'll get $500, so make sure to submit before the hackathon ends on November 19th. - -* 🎉 New Source: WooCommerce (contributed by James Wilson) -* 🎉 K8s deployments: Worker image pull policy is now configurable (contributed by Mario Molina) - -* ✨ MSSQL destination: Now supports basic normalization -* 🐛 LinkedIn Ads source: Analytics streams now work as expected. - -We've had a lot of contributors over the last few weeks, so I'd like to thank all of them for their efforts: James Wilson, Mario Molina, Maciej Nędza, Pras, Tuan Nguyen, Andrés Bravo, Christopher Wu, gunu, Harsha Teja Kanna, Jonathan Stacks, darian, Christian Gagnon, Nicolas Moreau, Matt Hoag, Achmad Syarif Hidayatullah, s-mawjee, SamyPesse, heade, zurferr, denis-solokov, and aristidednd! - -## 09/29/2021 Summary - -It's patch notes time, let's go over the changes from our new minor version, v0.30.0. As usual, bug fixes are in the thread. - -* New source: LinkedIn Ads -* New source: Kafka -* New source: Lever Hiring - -* 🎉 New License: Nothing changes for users of Airbyte/contributors. You just can't sell your own Airbyte Cloud! - -* 💎 New API endpoint: You can now call connections/search in the web backend API to search sources and destinations. (contributed by Mario Molina) -* 💎 K8s: Added support for ImagePullSecrets for connector images. -* 💎 MSSQL, Oracle, MySQL sources & destinations: Now support connection via SSH (Bastion server) - -* ✨ MySQL destination: Now supports connection via TLS/SSL -* ✨ BigQuery (denormalized) destination: Supports reading BigQuery types such as date by reading the format field (contributed by Nicolas Moreau) -* ✨ Hubspot source: Added contacts associations to the deals stream. -* ✨ GitHub source: Now supports pulling commits from user-specified branches. -* ✨ Google Search Console source: Now accepts admin email as input when using a service account key. -* ✨ Greenhouse source: Now identifies API streams it has access to if permissions are limited. -* ✨ Marketo source: Now Airbyte native. -* ✨ S3 source: Now supports any source that conforms to the S3 protocol (Non-AWS S3). -* ✨ Shopify source: Now reports pre_tax_price on the line_items stream if you have Shopify Plus. -* ✨ Stripe source: Now actually uses the mandatory start_date config field for incremental syncs. - -* 🏗 Python CDK: Now supports passing custom headers to the requests in OAuth2, enabling token refresh calls. -* 🏗 Python CDK: Parent streams can now be configured to cache data for their child streams. -* 🏗 Python CDK: Now has a Transformer class that can cast record fields to the data type expected by the schema. - -* 🐛 Amplitude source: Fixed schema for date-time objects. -* 🐛 Asana source: Schema fixed for the sections, stories, tasks, and users streams. -* 🐛 GitHub source: Added error handling for streams not applicable to a repo. (contributed by Christopher Wu) -* 🐛 Google Search Console source: Verifies access to sites when performing the connection check. -* 🐛 Hubspot source: Now conforms to the V3 API, with streams such as owners reflecting the new fields. -* 🐛 Intercom source: Fixed data type for the updated_at field. (contributed by Christian Gagnon) -* 🐛 Iterable source: Normalization now works as expected. -* 🐛 Pipedrive source: Schema now reflects the correct types for date/time fields. -* 🐛 Stripe source: Incorrect timestamp formats removed for coupons and subscriptions streams. -* 🐛 Salesforce source: You can now sync more than 10,000 records with the Bulk API. -* 🐛 Snowflake destination: Now accepts any date-time format with normalization. -* 🐛 Snowflake destination: Inserts are now split into batches to accommodate for large data loads. - -Thank you to our awesome contributors. Y'all are amazing: Mario Molina, Pras, Vladimir Remar, Christopher Wu, gunu, Juliano Benvenuto Piovezan, Brian M, Justinas Lukasevicius, Jonathan Stacks, Christian Gagnon, Nicolas Moreau, aristidednd, camro, minimax75, peter-mcconnell, and sashkalife! - -## 09/16/2021 Summary - -Now let's get to the 0.29.19 changelog. As with last time, bug fixes are in the thread! - -* New Destination: Databricks 🎉 -* New Source: Google Search Console -* New Source: Close.com - -* 🏗 Python CDK: Now supports auth workflows involving query params. -* 🏗 Java CDK: You can now run the connector gradle build script on Macs with M1 chips! (contributed by @Harsha Teja Kanna) - -* 💎 Google Ads source: You can now specify user-specified queries in GAQL. -* ✨ GitHub source: All streams with a parent stream use cached parent stream data when possible. -* ✨ Shopify source: Substantial performance improvements to the incremental sync mode. -* ✨ Stripe source: Now supports the PaymentIntents stream. -* ✨ Pipedrive source: Now supports the Organizations stream. -* ✨ Sendgrid source: Now supports the SingleSendStats stream. -* ✨ Bing Ads source: Now supports the Report stream. -* ✨ GitHub source: Now supports the Reactions stream. -* ✨ MongoDB source: Now Airbyte native! -* 🐛 Facebook Marketing source: Numeric values are no longer wrapped into strings. -* 🐛 Facebook Marketing source: Fetching conversion data now works as expected. (contributed by @Manav) -* 🐛 Keen destination: Timestamps are now parsed correctly. -* 🐛 S3 destination: Parquet schema parsing errors are fixed. -* 🐛 Snowflake destination: No longer syncs unnecessary tables with S3. -* 🐛 SurveyMonkey source: Cached responses are now decoded correctly. -* 🐛 Okta source: Incremental sync now works as expected. - -Also, a quick shout out to Jinni Gu and their team who made the DynamoDB destination that we announced last week! - -As usual, thank you to all of our contributors: Harsha Teja Kanna, Manav, Maciej Nędza, mauro, Brian M, Iakov Salikov, Eliziario (Marcos Santos), coeurdestenebres, and mohammadbolt. - -## 09/09/2021 Summary - -We're going over the changes from 0.29.17 and before... and there's a lot of big improvements here, so don't miss them! - -**New Source**: Facebook Pages **New Destination**: MongoDB **New Destination**: DynamoDB - -* 🎉 You can now send notifications via webhook for successes and failures on Airbyte syncs. \(This is a massive contribution by @Pras, thank you\) 🎉 -* 🎉 Scheduling jobs and worker jobs are now separated, allowing for workers to be scaled horizontally. -* 🎉 When developing a connector, you can now preview what your spec looks like in real time with this process. -* 🎉 Oracle destination: Now has basic normalization. -* 🎉 Add XLSB \(binary excel\) support to the Files source \(contributed by Muutech\). -* 🎉 You can now properly cancel K8s deployments. -* ✨ S3 source: Support for Parquet format. -* ✨ Github source: Branches, repositories, organization users, tags, and pull request stats streams added \(contributed by @Christopher Wu\). -* ✨ BigQuery destination: Added GCS upload option. -* ✨ Salesforce source: Now Airbyte native. -* ✨ Redshift destination: Optimized for performance. -* 🏗 CDK: 🎉 We’ve released a tool to generate JSON Schemas from OpenAPI specs. This should make specifying schemas for API connectors a breeze! 🎉 -* 🏗 CDK: Source Acceptance Tests now verify that connectors correctly format strings which are declared as using date-time and date formats. -* 🏗 CDK: Add private options to help in testing: \_limit and \_page\_size are now accepted by any CDK connector to minimze your output size for quick iteration while testing. -* 🐛 Fixed a bug that made it possible for connector definitions to be duplicated, violating uniqueness. -* 🐛 Pipedrive source: Output schemas no longer remove timestamp from fields. -* 🐛 Github source: Empty repos and negative backoff values are now handled correctly. -* 🐛 Harvest source: Normalization now works as expected. -* 🐛 All CDC sources: Removed sleep logic which caused exceptions when loading data from high-volume sources. -* 🐛 Slack source: Increased number of retries to tolerate flaky retry wait times on the API side. -* 🐛 Slack source: Sync operations no longer hang indefinitely. -* 🐛 Jira source: Now uses updated time as the cursor field for incremental sync instead of the created time. -* 🐛 Intercom source: Fixed inconsistency between schema and output data. -* 🐛 HubSpot source: Streams with the items property now have their schemas fixed. -* 🐛 HubSpot source: Empty strings are no longer handled as dates, fixing the deals, companies, and contacts streams. -* 🐛 Typeform source: Allows for multiple choices in responses now. -* 🐛 Shopify source: The type for the amount field is now fixed in the schema. -* 🐛 Postgres destination: \u0000\(NULL\) value processing is now fixed. - -As usual... thank you to our wonderful contributors this week: Pras, Christopher Wu, Brian M, yahu98, Michele Zuccala, jinnig, and luizgribeiro! - -## 09/01/2021 Summary - -Got the changes from 0.29.13... with some other surprises! - -* 🔥 There's a new way to create Airbyte sources! The team at Faros AI has created a Javascript/Typescript CDK which can be found here and in our docs here. This is absolutely awesome and give a huge thanks to Chalenge Masekera, Christopher Wu, eskrm, and Matthew Tovbin! -* ✨ New Destination: Azure Blob Storage ✨ - -**New Source**: Bamboo HR \(contributed by @Oren Haliva\) **New Source**: BigCommerce \(contributed by @James Wilson\) **New Source**: Trello **New Source**: Google Analytics V4 **New Source**: Amazon Ads - -* 💎 Alpine Docker images are the new standard for Python connectors, so image sizes have dropped by around 100 MB! -* ✨ You can now apply tolerations for Airbyte Pods on K8s deployments \(contributed by @Pras\). -* 🐛 Shopify source: Rate limit throttling fixed. -* 📚 We now have a doc on how to deploy Airbyte at scale. Check it out here! -* 🏗 Airbyte CDK: You can now ignore HTTP status errors and override retry parameters. - -As usual, thank you to our awesome contributors: Oren Haliva, Pras, James Wilson, and Muutech. - -## 08/26/2021 Summary - -New Source: Short.io \(contributed by @Apostol Tegko\) - -* 💎 GitHub source: Added support for rotating through multiple API tokens! -* ✨ Syncs are now scheduled with a 3 day timeout \(contributed by @Vladimir Remar\). -* ✨ Google Ads source: Added UserLocationReport stream \(contributed by @Max Krog\). -* ✨ Cart.com source: Added the order\_items stream. -* 🐛 Postgres source: Fixed out-of-memory issue with CDC interacting with large JSON blobs. -* 🐛 Intercom source: Pagination now works as expected. - -As always, thank you to our awesome community contributors this week: Apostol Tegko, Vladimir Remar, Max Krog, Pras, Marco Fontana, Troy Harvey, and damianlegawiec! - -## 08/20/2021 Summary - -Hey Airbyte community, we got some patch notes for y'all. Here's all the changes we've pushed since the last update. - -* **New Source**: S3/Abstract Files -* **New Source**: Zuora -* **New Source**: Kustomer -* **New Source**: Apify -* **New Source**: Chargebee -* **New Source**: Bing Ads - -New Destination: Keen - -* ✨ Shopify source: The `status` property is now in the `Products` stream. -* ✨ Amazon Seller Partner source: Added support for `GET_MERCHANT_LISTINGS_ALL_DATA` and `GET_FBA_INVENTORY_AGED_DATA` stream endpoints. -* ✨ GitHub source: Existing streams now don't minify the user property. -* ✨ HubSpot source: Updated user-defined custom field schema generation. -* ✨ Zendesk source: Migrated from Singer to the Airbyte CDK. -* ✨ Amazon Seller Partner source: Migrated to the Airbyte CDK. -* 🐛 Shopify source: Fixed the `products` schema to be in accordance with the API. -* 🐛 S3 source: Fixed bug where syncs could hang indefinitely. - -And as always... we'd love to shout out the awesome contributors that have helped push Airbyte forward. As a reminder, you can now see your contributions publicly reflected on our [contributors page](https://airbyte.com/contributors). - -Thank you to Rodrigo Parra, Brian Krausz, Max Krog, Apostol Tegko, Matej Hamas, Vladimir Remar, Marco Fontana, Nicholas Bull, @mildbyte, @subhaklp, and Maciej Nędza! - -## 07/30/2021 Summary - -For this week's update, we got... a few new connectors this week in 0.29.0. We found that a lot of sources can pull data directly from the underlying db instance, which we naturally already supported. - -* New Source: PrestaShop ✨ -* New Source: Snapchat Marketing ✨ -* New Source: Drupal -* New Source: Magento -* New Source: Microsoft Dynamics AX -* New Source: Microsoft Dynamics Customer Engagement -* New Source: Microsoft Dynamics GP -* New Source: Microsoft Dynamics NAV -* New Source: Oracle PeopleSoft -* New Source: Oracle Siebel CRM -* New Source: SAP Business One -* New Source: Spree Commerce -* New Source: Sugar CRM -* New Source: Wordpress -* New Source: Zencart -* 🐛 Shopify source: Fixed the products schema to be in accordance with the API -* 🐛 BigQuery source: No longer fails with nested array data types. - -View the full release highlights here: [Platform](platform.md), [Connectors](connectors.md) - -And as always, thank you to our wonderful contributors: Madison Swain-Bowden, Brian Krausz, Apostol Tegko, Matej Hamas, Vladimir Remar, Oren Haliva, satishblotout, jacqueskpoty, wallies - -## 07/23/2021 Summary - -What's going on? We just released 0.28.0 and here's the main highlights. - -* New Destination: Google Cloud Storage ✨ -* New Destination: Kafka ✨ \(contributed by @Mario Molina\) -* New Source: Pipedrive -* New Source: US Census \(contributed by @Daniel Mateus Pires \(Earnest Research\)\) -* ✨ Google Ads source: Now supports Campaigns, Ads, AdGroups, and Accounts streams. -* ✨ Stripe source: All subscription types \(including expired and canceled ones\) are now returned. -* 🐛 Facebook source: Improved rate limit management -* 🐛 Square source: The send\_request method is no longer broken due to CDK changes -* 🐛 MySQL destination: Does not fail on columns with JSON data now. - -View the full release highlights here: [Platform](platform.md), [Connectors](connectors.md) - -And as always, thank you to our wonderful contributors: Mario Molina, Daniel Mateus Pires \(Earnest Research\), gunu, Ankur Adhikari, Vladimir Remar, Madison Swain-Bowden, Maksym Pavlenok, Sam Crowder, mildbyte, avida, and gaart - -## 07/16/2021 Summary - -As for our changes this week... - -* New Source: Zendesk Sunshine -* New Source: Dixa -* New Source: Typeform -* 💎 MySQL destination: Now supports normalization! -* 💎 MSSQL source: Now supports CDC \(Change Data Capture\) -* ✨ Snowflake destination: Data coming from Airbyte is now identifiable -* 🐛 GitHub source: Now uses the correct cursor field for the IssueEvents stream -* 🐛 Square source: The send\_request method is no longer broken due to CDK changes - -View the full release highlights here: [Platform](platform.md), [Connectors](connectors.md) - -As usual, thank you to our awesome community contributors this week: Oliver Meyer, Varun, Brian Krausz, shadabshaukat, Serhii Lazebnyi, Juliano Benvenuto Piovezan, mildbyte, and Sam Crowder! - -## 07/09/2021 Summary - -* New Source: PayPal Transaction -* New Source: Square -* New Source: SurveyMonkey -* New Source: CockroachDB -* New Source: Airbyte-Native GitHub -* New Source: Airbyte-Native GitLab -* New Source: Airbyte-Native Twilio -* ✨ S3 destination: Now supports anyOf, oneOf and allOf schema fields. -* ✨ Instagram source: Migrated to the CDK and has improved error handling. -* ✨ Shopify source: Add support for draft orders. -* ✨ K8s Deployments: Now support logging to GCS. -* 🐛 GitHub source: Fixed issue with locked breaking normalization of the pull\_request stream. -* 🐛 Okta source: Fix endless loop when syncing data from logs stream. -* 🐛 PostgreSQL source: Fixed decimal handling with CDC. -* 🐛 Fixed random silent source failures. -* 📚 New document on how the CDK handles schemas. -* 🏗️ Python CDK: Now allows setting of network adapter args on outgoing HTTP requests. - -View the full release highlights here: [Platform](platform.md), [Connectors](connectors.md) - -As usual, thank you to our awesome community contributors this week: gunu, P.VAD, Rodrigo Parra, Mario Molina, Antonio Grass, sabifranjo, Jaime Farres, shadabshaukat, Rodrigo Menezes, dkelwa, Jonathan Duval, and Augustin Lafanechère. - -## 07/01/2021 Summary - -* New Destination: Google PubSub -* New Source: AWS CloudTrail - -_The risks and issues with upgrading Airbyte are now gone..._ - -* 🎉 Airbyte automatically upgrades versions safely at server startup 🎉 -* 💎 Logs on K8s are now stored in Minio by default, no S3 bucket required -* ✨ Looker Source: Supports the Run Look output stream -* ✨ Slack Source: is now Airbyte native! -* 🐛 Freshdesk Source: No longer fails after 300 pages -* 📚 New tutorial on building Java destinations - -Starting from next week, our weekly office hours will now become demo days! Drop by to get sneak peeks and new feature demos. - -* We added the \#careers channel, so if you're hiring, post your job reqs there! -* We added a \#understanding-airbyte channel to mirror [this](../../understanding-airbyte/) section on our docs site. Ask any questions about our architecture or protocol there. -* We added a \#contributing-to-airbyte channel. A lot of people ask us about how to contribute to the project, so ask away there! - -View the full release highlights here: [Platform](platform.md), [Connectors](connectors.md) - -As usual, thank you to our awesome community contributors this week: Harshith Mullapudi, Michael Irvine, and [sabifranjo](https://github.com/sabifranjo). - -## 06/24/2021 Summary - -* New Source: [IBM Db2](../../integrations/sources/db2.md) -* 💎 We now support Avro and JSONL output for our S3 destination! 💎 -* 💎 Brand new BigQuery destination flavor that now supports denormalized STRUCT types. -* ✨ Looker source now supports self-hosted instances. -* ✨ Facebook Marketing source is now migrated to the CDK, massively improving async job performance and error handling. - -View the full connector release notes [here](connectors.md). - -As usual, thank you to some of our awesome community contributors this week: Harshith Mullapudi, Tyler DeLange, Daniel Mateus Pires, EdBizarro, Tyler Schroeder, and Konrad Schlatte! - -## 06/18/2021 Summary - -* New Source: [Snowflake](../../integrations/sources/snowflake.md) -* 💎 We now support custom dbt transformations! 💎 -* ✨ We now support configuring your destination namespace at the table level when setting up a connection! -* ✨ The S3 destination now supports Minio S3 and Parquet output! - -View the full release notes here: [Platform](platform.md), [Connectors](connectors.md) - -As usual, thank you to some of our awesome community contributors this week: Tyler DeLange, Mario Molina, Rodrigo Parra, Prashanth Patali, Christopher Wu, Itai Admi, Fred Reimer, and Konrad Schlatte! - -## 06/10/2021 Summary - -* New Destination: [S3!!](../../integrations/destinations/s3.md) -* New Sources: [Harvest](../../integrations/sources/harvest.md), [Amplitude](../../integrations/sources/amplitude.md), [Posthog](../../integrations/sources/posthog.md) -* 🐛 Ensure that logs from threads created by replication workers are added to the log file. -* 🐛 Handle TINYINT\(1\) and BOOLEAN correctly and fix target file comparison for MySQL CDC. -* Jira source: now supports all available entities in Jira Cloud. -* 📚 Added a troubleshooting section, a gradle cheatsheet, a reminder on what the reset button does, and a refresh on our docs best practices. - -#### Connector Development: - -* Containerized connector code generator -* Added JDBC source connector bootstrap template. -* Added Java destination generator. - -View the full release notes highlights here: [Platform](platform.md), [Connectors](connectors.md) - -As usual, thank you to some of our awesome community contributors this week \(I've noticed that we've had more contributors to our docs, which we really appreciate\). Ping, Harshith Mullapudi, Michael Irvine, Matheus di Paula, jacqueskpoty and P.VAD. - -## Overview - -Airbyte is comprised of 2 parts: - -* Platform \(The scheduler, workers, api, web app, and the Airbyte protocol\). Here is the [changelog for Platform](platform.md). -* Connectors that run in Docker containers. Here is the [changelog for the connectors](connectors.md). - -## Airbyte Platform Releases - -### Production v. Dev Releases - -The "production" version of Airbyte is the version of the app specified in `.env`. With each production release, we update the version in the `.env` file. This version will always be available for download on DockerHub. It is the version of the app that runs when a user runs `docker compose up`. - -The "development" version of Airbyte is the head of master branch. It is the version of the app that runs when a user runs `./gradlew build && -VERSION=dev docker compose up`. - -### Production Release Schedule - -#### Scheduled Releases - -Airbyte currently releases a new minor version of the application on a weekly basis. Generally this weekly release happens on Monday or Tuesday. - -#### Hotfixes - -Airbyte releases a new version whenever it discovers and fixes a bug that blocks any mission critical functionality. - -**Mission Critical** - -e.g. Non-ASCII characters break the Salesforce source. - -**Non-Mission Critical** - -e.g. Buttons in the UI are offset. - -#### Unscheduled Releases - -We will often release more frequently than the weekly cadence if we complete a feature that we know that a user is waiting on. - -### Development Release Schedule - -As soon as a feature is on master, it is part of the development version of Airbyte. We merge features as soon as they are ready to go \(have been code reviewed and tested\). We attempt to keep the development version of the app working all the time. We are iterating quickly, however, and there may be intermittent periods where the development version is broken. - -If there is ever a feature that is only on the development version, and you need it on the production version, please let us know. We are very happy to do ad-hoc production releases if it unblocks a specific need for one of our users. - -## Airbyte Connector Releases - -Each connector is tracked with its own version. These versions are separate from the versions of Airbyte Platform. We generally will bump the version of a connector anytime we make a change to it. We rely on a large suite of tests to make sure that these changes do not cause regressions in our connectors. - -When we updated the version of a connector, we usually update the connector's version in Airbyte Platform as well. Keep in mind that you might not see the updated version of that connector in the production version of Airbyte Platform until after a production release of Airbyte Platform. - diff --git a/docs/archive/changelog/connectors.md b/docs/archive/changelog/connectors.md deleted file mode 100644 index a1f8b8126e07..000000000000 --- a/docs/archive/changelog/connectors.md +++ /dev/null @@ -1,776 +0,0 @@ ---- -description: Do not miss the new connectors we support! ---- - -# Connectors - -**You can request new connectors directly** [**here**](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=area%2Fintegration%2C+new-integration&template=new-integration-request.md&title=)**.** - -Note: Airbyte is not built on top of Singer but is compatible with Singer's protocol. Airbyte's ambitions go beyond what Singer enables us to do, so we are building our own protocol that maintains compatibility with Singer's protocol. - -Check out our [connector roadmap](https://github.com/airbytehq/airbyte/projects/3) to see what we're currently working on. - -## 1/28/2022 - -New sources: - -- [**Chartmogul**](https://docs.airbyte.com/integrations/sources/chartmogul) -- [**Hellobaton**](https://docs.airbyte.com/integrations/sources/hellobaton) -- [**Flexport**](https://docs.airbyte.com/integrations/sources/flexport) -- [**PersistIq**](https://docs.airbyte.com/integrations/sources/persistiq) - -## 1/6/2022 - -New sources: - -- [**3PL Central**](https://docs.airbyte.com/integrations/sources/tplcentral) -- [**My Hours**](https://docs.airbyte.com/integrations/sources/my-hours) -- [**Qualaroo**](https://docs.airbyte.com/integrations/sources/qualaroo) -- [**SearchMetrics**](https://docs.airbyte.com/integrations/sources/search-metrics) - -## 12/16/2021 - -New source: - -- [**OpenWeather**](https://docs.airbyte.com/integrations/sources/openweather) - -New destinations: - -- [**ClickHouse**](https://docs.airbyte.com/integrations/destinations/clickhouse) -- [**RabbitMQ**](https://docs.airbyte.com/integrations/destinations/rabbitmq) -- [**Amazon SQS**](https://docs.airbyte.com/integrations/destinations/amazon-sqs) -- [**Rockset**](https://docs.airbyte.com/integrations/destinations/rockset) - -## 12/9/2021 - -New source: - -- [**Mailgun**](https://docs.airbyte.com/integrations/sources/mailgun) - -## 12/2/2021 - -New destinations: - -- [**Redis**](https://docs.airbyte.com/integrations/destinations/redis) -- [**MQTT**](https://docs.airbyte.com/integrations/destinations/mqtt) -- [**Google Firestore**](https://docs.airbyte.com/integrations/destinations/firestore) -- [**Kinesis**](https://docs.airbyte.com/integrations/destinations/kinesis) - -## 11/25/2021 - -New sources: - -- [**Airtable**](https://docs.airbyte.com/integrations/sources/airtable) -- [**Notion**](https://docs.airbyte.com/integrations/sources/notion) -- [**Pardot**](https://docs.airbyte.com/integrations/sources/pardot) -- [**Notion**](https://docs.airbyte.com/integrations/sources/linnworks) -- [**YouTube Analytics**](https://docs.airbyte.com/integrations/sources/youtube-analytics) - -New features: - -- **Exchange Rates** Source: add `ignore_weekends` option. -- **Facebook** Source: add the videos stream. -- **Freshdesk** Source: removed the limitation in streams pagination. -- **Jira** Source: add option to render fields in HTML format. -- **MongoDB v2** Source: improve read performance. -- **Pipedrive** Source: specify schema for "persons" stream. -- **PostgreSQL** Source: exclude tables on which user doesn't have select privileges. -- **SurveyMonkey** Source: improve connection check. - -## 11/17/2021 - -New destination: - -- [**ScyllaDB**](https://docs.airbyte.com/integrations/destinations/scylla) - -New sources: - -- [**Azure Table Storage**](https://docs.airbyte.com/integrations/sources/azure-table) -- [**Linnworks**](https://docs.airbyte.com/integrations/sources/linnworks) - -New features: - -- **MySQL** Source: Now has basic performance tests. -- **Salesforce** Source: We now automatically transform and handle incorrect data for the anyType and calculated types. - -## 11/11/2021 - -New destinations: - -- [**Cassandra**](https://docs.airbyte.com/integrations/destinations/cassandra) -- [**Pulsar**](https://docs.airbyte.com/integrations/destinations/pulsar) - -New sources: - -- [**Confluence**](https://docs.airbyte.com/integrations/sources/confluence) -- [**Monday**](https://docs.airbyte.com/integrations/sources/monday) -- [**Commerce Tools**](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-commercetools) -- [**Pinterest**](https://docs.airbyte.com/integrations/sources/pinterest) - -New features: - -- **Shopify** Source: Now supports the FulfillmentOrders and Fulfillments streams. -- **Greenhouse** Source: Now supports the Demographics stream. -- **Recharge** Source: Broken requests should now be re-requested with improved backoff. -- **Stripe** Source: Now supports the checkout_sessions, checkout_sessions_line_item, and promotion_codes streams. -- **Db2** Source: Now supports SSL. - -## 11/3/2021 - -New destination: - -- [**Elasticsearch**](https://docs.airbyte.com/integrations/destinations/elasticsearch) - -New sources: - -- [**Salesloft**](https://docs.airbyte.com/integrations/sources/salesloft) -- [**OneSignal**](https://docs.airbyte.com/integrations/sources/onesignal) -- [**Strava**](https://docs.airbyte.com/integrations/sources/strava) -- [**Lemlist**](https://docs.airbyte.com/integrations/sources/lemlist) -- [**Amazon SQS**](https://docs.airbyte.com/integrations/sources/amazon-sqs) -- [**Freshservices**](https://docs.airbyte.com/integrations/sources/freshservice/) -- [**Freshsales**](https://docs.airbyte.com/integrations/sources/freshsales) -- [**Appsflyer**](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-appsflyer) -- [**Paystack**](https://docs.airbyte.com/integrations/sources/paystack) -- [**Sentry**](https://docs.airbyte.com/integrations/sources/sentry) -- [**Retently**](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-retently) -- [**Delighted!**](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-delighted) - -New features: - -- **BigQuery** Destination: You can now run transformations in batches, preventing queries from hitting BigQuery limits. (contributed by @Andrés Bravo) -- **S3** Source: Memory and Performance optimizations, also some fancy new PyArrow CSV configuration options. -- **Zuora** Source: Now supports Unlimited as an option for the Data Query Live API. -- **Clickhouse** Source: Now supports SSL and connection via SSH tunneling. - -## 10/20/2021 - -New source: - -- [**WooCommerce**](https://docs.airbyte.com/integrations/sources/woocommerce) - -New feature: - -- **MSSQL** destination: Now supports basic normalization - -## 9/29/2021 - -New sources: - -- [**LinkedIn Ads**](https://docs.airbyte.com/integrations/sources/linkedin-ads) -- [**Kafka**](https://docs.airbyte.com/integrations/sources/kafka) -- [**Lever Hiring**](https://docs.airbyte.com/integrations/sources/lever-hiring) - -New features: - -- **MySQL** destination: Now supports connection via TLS/SSL -- **BigQuery** (denormalized) destination: Supports reading BigQuery types such as date by reading the format field (contributed by @Nicolas Moreau) -- **Hubspot** source: Added contacts associations to the deals stream. -- **GitHub** source: Now supports pulling commits from user-specified branches. -- **Google Search Console** source: Now accepts admin email as input when using a service account key. -- **Greenhouse** source: Now identifies API streams it has access to if permissions are limited. -- **Marketo** source: Now Airbyte native. -- **S3** source: Now supports any source that conforms to the S3 protocol (Non-AWS S3). -- **Shopify** source: Now reports pre_tax_price on the line_items stream if you have Shopify Plus. -- **Stripe** source: Now actually uses the mandatory start_date config field for incremental syncs. - -## 9/16/2021 - -New destinations: - -- [**Databricks**](https://docs.airbyte.com/integrations/destinations/databricks) - -New sources: - -- [**Close.com**](https://docs.airbyte.com/integrations/sources/close-com) -- [**Google Search Console**](https://docs.airbyte.com/integrations/sources/google-search-console) - -New features: - -- **Google Ads** source: You can now specify user-specified queries in GAQL. -- **GitHub** source: All streams with a parent stream use cached parent stream data when possible. -- **Shopify** source: Substantial performance improvements to the incremental sync mode. -- **Stripe** source: Now supports the PaymentIntents stream. -- **Pipedrive** source: Now supports the Organizations stream. -- **Sendgrid** source: Now supports the SingleSendStats stream. -- **Bing Ads** source: Now supports the Report stream. -- **GitHub** source: Now supports the Reactions stream. -- **MongoDB** source: Now Airbyte native! - -## 9/9/2021 - -New source: - -- [**Facebook Pages**](https://docs.airbyte.com/integrations/sources/facebook-pages) - -New destinations: - -- [**MongoDB**](https://docs.airbyte.com/integrations/destinations/mongodb) -- [**DynamoDB**](https://docs.airbyte.com/integrations/destinations/dynamodb) - -New features: - -- **S3** source: Support for Parquet format. -- **Github** source: Branches, repositories, organization users, tags, and pull request stats streams added \(contributed by @Christopher Wu\). -- **BigQuery** destination: Added GCS upload option. -- **Salesforce** source: Now Airbyte native. -- **Redshift** destination: Optimized for performance. - -Bug fixes: - -- **Pipedrive** source: Output schemas no longer remove timestamp from fields. -- **Github** source: Empty repos and negative backoff values are now handled correctly. -- **Harvest** source: Normalization now works as expected. -- **All CDC sources**: Removed sleep logic which caused exceptions when loading data from high-volume sources. -- **Slack** source: Increased number of retries to tolerate flaky retry wait times on the API side. -- **Slack** source: Sync operations no longer hang indefinitely. -- **Jira** source: Now uses updated time as the cursor field for incremental sync instead of the created time. -- **Intercom** source: Fixed inconsistency between schema and output data. -- **HubSpot** source: Streams with the items property now have their schemas fixed. -- **HubSpot** source: Empty strings are no longer handled as dates, fixing the deals, companies, and contacts streams. -- **Typeform** source: Allows for multiple choices in responses now. -- **Shopify** source: The type for the amount field is now fixed in the schema. -- **Postgres** destination: \u0000\(NULL\) value processing is now fixed. - -## 9/1/2021 - -New sources: - -- [**Bamboo HR**](https://docs.airbyte.com/integrations/sources/bamboo-hr) -- [**BigCommerce**](https://docs.airbyte.com/integrations/sources/bigcommerce) -- [**Trello**](https://docs.airbyte.com/integrations/sources/trello) -- [**Google Analytics V4**](https://docs.airbyte.com/integrations/sources/google-analytics-v4) -- [**Amazon Ads**](https://docs.airbyte.com/integrations/sources/google-analytics-v4) - -Bug fixes: - -- **Shopify** source: Rate limit throttling fixed. - -## 8/26/2021 - -New source: - -- [**Short.io**](https://docs.airbyte.com/integrations/sources/shortio) - -New features: - -- **GitHub** source: Add support for rotating through multiple API tokens. -- **Google Ads** source: Added `UserLocationReport` stream. -- **Cart.com** source: Added the `order_items` stream. - -Bug fixes: - -- **Postgres** source: Fix out-of-memory issue with CDC interacting with large JSON blobs. -- **Intercom** source: Pagination now works as expected. - -## 8/18/2021 - -New source: - -- [**Bing Ads**](https://docs.airbyte.com/integrations/sources/bing-ads) - -New destination: - -- [**Keen**](https://docs.airbyte.com/integrations/destinations/keen) - -New features: - -- **Chargebee** source: Adds support for the `items`, `item prices` and `attached items` endpoints. - -Bug fixes: - -- **QuickBooks** source: Now uses the number data type for decimal fields. -- **HubSpot** source: Fixed `empty string` inside of the `number` and `float` datatypes. -- **GitHub** source: Validation fixed on non-required fields. -- **BigQuery** destination: Now supports processing of arrays of records properly. -- **Oracle** destination: Fixed destination check for users without DBA role. - -## 8/9/2021 - -New sources: - -- [**S3/Abstract Files**](https://docs.airbyte.com/integrations/sources/s3) -- [**Zuora**](https://docs.airbyte.com/integrations/sources/zuora) -- [**Kustomer**](https://docs.airbyte.com/integrations/sources/kustomer-singer/) -- [**Apify**](https://docs.airbyte.com/integrations/sources/apify-dataset) -- [**Chargebee**](https://docs.airbyte.com/integrations/sources/chargebee) - -New features: - -- **Shopify** source: The `status` property is now in the `Products` stream. -- **Amazon Seller Partner** source: Added support for `GET_MERCHANT_LISTINGS_ALL_DATA` and `GET_FBA_INVENTORY_AGED_DATA` stream endpoints. -- **GitHub** source: Existing streams now don't minify the `user` property. -- **HubSpot** source: Updated user-defined custom field schema generation. -- **Zendesk** source: Migrated from Singer to the Airbyte CDK. -- **Amazon Seller Partner** source: Migrated to the Airbyte CDK. - -Bug fixes: - -- **HubSpot** source: Casting exceptions are now logged correctly. -- **S3** source: Fixed bug where syncs could hang indefinitely. -- **Shopify** source: Fixed the `products` schema to be in accordance with the API. -- **PayPal Transactions** source: Fixed the start date minimum to be 3 years rather than 45 days. -- **Google Ads** source: Added the `login-customer-id` setting. -- **Intercom** source: Rate limit corrected from 1000 requests/minute from 1000 requests/hour. -- **S3** source: Fixed bug in spec to properly display the `format` field in the UI. - -New CDK features: - -- Now allows for setting request data in non-JSON formats. - -## 7/30/2021 - -New sources: - -- [**PrestaShop**](https://docs.airbyte.com/integrations/sources/prestashop) -- [**Snapchat Marketing**](https://docs.airbyte.com/integrations/sources/snapchat-marketing) -- [**Drupal**](https://docs.airbyte.com/integrations/sources/drupal) -- [**Magento**](https://docs.airbyte.com/integrations/sources/magento) -- [**Microsoft Dynamics AX**](https://docs.airbyte.com/integrations/sources/microsoft-dynamics-ax) -- [**Microsoft Dynamics Customer Engagement**](https://docs.airbyte.com/integrations/sources/microsoft-dynamics-customer-engagement) -- [**Microsoft Dynamics GP**](https://docs.airbyte.com/integrations/sources/microsoft-dynamics-gp) -- [**Microsoft Dynamics NAV**](https://docs.airbyte.com/integrations/sources/microsoft-dynamics-nav) -- [**Oracle PeopleSoft**](https://docs.airbyte.com/integrations/sources/oracle-peoplesoft) -- [**Oracle Siebel CRM**](https://docs.airbyte.com/integrations/sources/oracle-siebel-crm) -- [**SAP Business One**](https://docs.airbyte.com/integrations/sources/sap-business-one) -- [**Spree Commerce**](https://docs.airbyte.com/integrations/sources/spree-commerce) -- [**Sugar CRM**](https://docs.airbyte.com/integrations/sources/sugar-crm) -- [**WooCommerce**](https://docs.airbyte.com/integrations/sources/woocommerce) -- [**Wordpress**](https://docs.airbyte.com/integrations/sources/wordpress) -- [**Zencart**](https://docs.airbyte.com/integrations/sources/zencart) - -Bug fixes: - -- **Shopify** source: Fixed the `products` schema to be in accordance with the API. -- **BigQuery** source: No longer fails with `Array of Records` data types. -- **BigQuery** destination: Improved logging, Job IDs are now filled with location and Project IDs. - -## 7/23/2021 - -New sources: - -- [**Pipedrive**](https://docs.airbyte.com/integrations/sources/pipedrive) -- [**US Census**](https://docs.airbyte.com/integrations/sources/us-census) -- [**BigQuery**](https://docs.airbyte.com/integrations/sources/bigquery) - -New destinations: - -- [**Google Cloud Storage**](https://docs.airbyte.com/integrations/destinations/gcs) -- [**Kafka**](https://docs.airbyte.com/integrations/destinations/kafka) - -New Features: - -- **Java Connectors**: Now have config validators for check, discover, read, and write calls -- **Stripe** source: All subscription types are returnable \(including expired and canceled ones\). -- **Mixpanel** source: Migrated to the CDK. -- **Intercom** source: Migrated to the CDK. -- **Google Ads** source: Now supports the `Campaigns`, `Ads`, `AdGroups`, and `Accounts` streams. - -Bug Fixes: - -- **Facebook** source: Improved rate limit management -- **Instagram** source: Now supports old format for state and automatically updates it to the new format. -- **Sendgrid** source: Now gracefully handles malformed responses from API. -- **Jira** source: Fixed dbt failing to normalize schema for the labels stream. -- **MySQL** destination: Does not fail anymore with columns that contain JSON data. -- **Slack** source: Now does not fail stream slicing on reading threads. - -## 7/16/2021 - -3 new sources: - -- [**Zendesk Sunshine**](https://docs.airbyte.com/integrations/sources/zendesk-sunshine) -- [**Dixa**](https://docs.airbyte.com/integrations/sources/dixa) -- [**Typeform**](https://docs.airbyte.com/integrations/sources/typeform) - -New Features: - -- **MySQL** destination: Now supports normalization! -- **MSSQL** source: Now supports CDC \(Change Data Capture\). -- **Snowflake** destination: Data coming from Airbyte is now identifiable. -- **GitHub** source: Now handles rate limiting. - -Bug Fixes: - -- **GitHub** source: Now uses the correct cursor field for the `IssueEvents` stream. -- **Square** source: `send_request` method is no longer broken. - -## 7/08/2021 - -7 new sources: - -- [**PayPal Transaction**](https://docs.airbyte.com/integrations/sources/paypal-transaction) -- [**Square**](https://docs.airbyte.com/integrations/sources/square) -- [**SurveyMonkey**](https://docs.airbyte.com/integrations/sources/surveymonkey) -- [**CockroachDB**](https://docs.airbyte.com/integrations/sources/cockroachdb) -- [**Airbyte-native GitLab**](https://docs.airbyte.com/integrations/sources/gitlab) -- [**Airbyte-native GitHub**](https://docs.airbyte.com/integrations/sources/github) -- [**Airbyte-native Twilio**](https://docs.airbyte.com/integrations/sources/twilio) - -New Features: - -- **S3** destination: Now supports `anyOf`, `oneOf` and `allOf` schema fields. -- **Instagram** source: Migrated to the CDK and has improved error handling. -- **Snowflake** source: Now has comprehensive data type tests. -- **Shopify** source: Change the default stream cursor field to `update_at` where possible. -- **Shopify** source: Add support for draft orders. -- **MySQL** destination: Now supports normalization. - -Connector Development: - -- **Python CDK**: Now allows setting of network adapter args on outgoing HTTP requests. -- Abstract classes for non-JDBC relational database sources. - -Bugfixes: - -- **GitHub** source: Fixed issue with `locked` breaking normalization of the pull_request stream. -- **PostgreSQL** source: Fixed decimal handling with CDC. -- **Okta** source: Fix endless loop when syncing data from logs stream. - -## 7/01/2021 - -Bugfixes: - -- **Looker** source: Now supports the Run Look stream. -- **Google Adwords**: CI is fixed and new version is published. -- **Slack** source: Now Airbyte native and supports channels, channel members, messages, users, and threads streams. -- **Freshdesk** source: Does not fail after 300 pages anymore. -- **MSSQL** source: Now has comprehensive data type tests. - -## 6/24/2021 - -1 new source: - -- [**Db2**](https://docs.airbyte.com/integrations/sources/db2) - -New features: - -- **S3** destination: supports Avro and Jsonl output! -- **BigQuery** destination: now supports loading JSON data as structured data. -- **Looker** source: Now supports self-hosted instances. -- **Facebook** source: is now migrated to the CDK. - -## 6/18/2021 - -1 new source: - -- [**Snowflake**](https://docs.airbyte.com/integrations/sources/snowflake) - -New features: - -- **Postgres** source: now has comprehensive data type tests. -- **Google Ads** source: now uses the [Google Ads Query Language](https://developers.google.com/google-ads/api/docs/query/overview)! -- **S3** destination: supports Parquet output! -- **S3** destination: supports Minio S3! -- **BigQuery** destination: credentials are now optional. - -## 6/10/2021 - -1 new destination: - -- [**S3**](https://docs.airbyte.com/integrations/destinations/s3) - -3 new sources: - -- [**Harvest**](https://docs.airbyte.com/integrations/sources/harvest) -- [**Amplitude**](https://docs.airbyte.com/integrations/sources/amplitude) -- [**Posthog**](https://docs.airbyte.com/integrations/sources/posthog) - -New features: - -- **Jira** source: now supports all available entities in Jira Cloud. -- **ExchangeRatesAPI** source: clearer messages around unsupported currencies. -- **MySQL** source: Comprehensive core extension to be more compatible with other JDBC sources. -- **BigQuery** destination: Add dataset location. -- **Shopify** source: Add order risks + new attributes to orders schema for native connector - -Bugfixes: - -- **MSSQL** destination: fixed handling of unicode symbols. - -Connector development updates: - -- Containerized connector code generator. -- Added JDBC source connector bootstrap template. -- Added Java destination generator. - -## 06/3/2021 - -2 new sources: - -- [**Okta**](https://docs.airbyte.com/integrations/sources/okta) -- [**Amazon Seller Partner**](https://docs.airbyte.com/integrations/sources/amazon-seller-partner) - -New features: - -- **MySQL CDC** now only polls for 5 minutes if we haven't received any records \([\#3789](https://github.com/airbytehq/airbyte/pull/3789)\) -- **Python CDK** now supports Python 3.7.X \([\#3692](https://github.com/airbytehq/airbyte/pull/3692)\) -- **File** source: now supports Azure Blob Storage \([\#3660](https://github.com/airbytehq/airbyte/pull/3660)\) - -Bugfixes: - -- **Recurly** source: now uses type `number` instead of `integer` \([\#3769](https://github.com/airbytehq/airbyte/pull/3769)\) -- **Stripe** source: fix types in schema \([\#3744](https://github.com/airbytehq/airbyte/pull/3744)\) -- **Stripe** source: output `number` instead of `int` \([\#3728](https://github.com/airbytehq/airbyte/pull/3728)\) -- **MSSQL** destination: fix issue with unicode symbols handling \([\#3671](https://github.com/airbytehq/airbyte/pull/3671)\) - -## 05/25/2021 - -4 new sources: - -- [**Asana**](https://docs.airbyte.com/integrations/sources/asana) -- [**Klaviyo**](https://docs.airbyte.com/integrations/sources/klaviyo) -- [**Recharge**](https://docs.airbyte.com/integrations/sources/recharge) -- [**Tempo**](https://docs.airbyte.com/integrations/sources/tempo) - -Progress on connectors: - -- **CDC for MySQL** is now available! -- **Sendgrid** source: support incremental sync, as rewritten using HTTP CDK \([\#3445](https://github.com/airbytehq/airbyte/pull/3445)\) -- **Github** source bugfix: exception when parsing null date values, use `created_at` as cursor value for issue_milestones \([\#3314](https://github.com/airbytehq/airbyte/pull/3314)\) -- **Slack** source bugfix: don't overwrite thread_ts in threads stream \([\#3483](https://github.com/airbytehq/airbyte/pull/3483)\) -- **Facebook Marketing** source: allow configuring insights lookback window \([\#3396](https://github.com/airbytehq/airbyte/pull/3396)\) -- **Freshdesk** source: fix discovery \([\#3591](https://github.com/airbytehq/airbyte/pull/3591)\) - -## 05/18/2021 - -1 new destination: [**MSSQL**](https://docs.airbyte.com/integrations/destinations/mssql) - -1 new source: [**ClickHouse**](https://docs.airbyte.com/integrations/sources/clickhouse) - -Progress on connectors: - -- **Shopify**: make this source more resilient to timeouts \([\#3409](https://github.com/airbytehq/airbyte/pull/3409)\) -- **Freshdesk** bugfix: output correct schema for various streams \([\#3376](https://github.com/airbytehq/airbyte/pull/3376)\) -- **Iterable**: update to use latest version of CDK \([\#3378](https://github.com/airbytehq/airbyte/pull/3378)\) - -## 05/11/2021 - -1 new destination: [**MySQL**](https://docs.airbyte.com/integrations/destinations/mysql) - -2 new sources: - -- [**Google Search Console**](https://docs.airbyte.com/integrations/sources/google-search-console) -- [**PokeAPI**](https://docs.airbyte.com/integrations/sources/pokeapi) \(talking about long tail and having fun ;\)\) - -Progress on connectors: - -- **Zoom**: bugfix on declaring correct types to match data coming from API \([\#3159](https://github.com/airbytehq/airbyte/pull/3159)\), thanks to [vovavovavovavova](https://github.com/vovavovavovavova) -- **Smartsheets**: bugfix on gracefully handling empty cell values \([\#3337](https://github.com/airbytehq/airbyte/pull/3337)\), thanks to [Nathan Nowack](https://github.com/zzstoatzz) -- **Stripe**: fix date property name, only add connected account header when set, and set primary key \(\#3210\), thanks to [Nathan Yergler](https://github.com/nyergler) - -## 05/04/2021 - -2 new sources: - -- [**Smartsheets**](https://docs.airbyte.com/integrations/sources/smartsheets), thanks to [Nathan Nowack](https://github.com/zzstoatzz) -- [**Zendesk Chat**](https://docs.airbyte.com/integrations/sources/zendesk-chat) - -Progress on connectors: - -- **Appstore**: bugfix private key handling in the UI \([\#3201](https://github.com/airbytehq/airbyte/pull/3201)\) -- **Facebook marketing**: Wait longer \(5 min\) for async jobs to start \([\#3116](https://github.com/airbytehq/airbyte/pull/3116)\), thanks to [Max Krog](https://github.com/MaxKrog) -- **Stripe**: support reading data from connected accounts \(\#3121\), and 2 new streams with Refunds & Bank Accounts \([\#3030](https://github.com/airbytehq/airbyte/pull/3030)\) \([\#3086](https://github.com/airbytehq/airbyte/pull/3086)\) -- **Redshift destination**: Ignore records that are too big \(instead of failing\) \([\#2988](https://github.com/airbytehq/airbyte/pull/2988)\) -- **MongoDB**: add supporting TLS and Replica Sets \([\#3111](https://github.com/airbytehq/airbyte/pull/3111)\) -- **HTTP sources**: bugfix on handling array responses gracefully \([\#3008](https://github.com/airbytehq/airbyte/pull/3008)\) - -## 04/27/2021 - -- **Zendesk Talk**: fix normalization failure \([\#3022](https://github.com/airbytehq/airbyte/pull/3022)\), thanks to [yevhenii-ldv](https://github.com/yevhenii-ldv) -- **Github**: pull_requests stream only incremental syncs \([\#2886](https://github.com/airbytehq/airbyte/pull/2886)\) \([\#3009](https://github.com/airbytehq/airbyte/pull/3009)\), thanks to [Zirochkaa](https://github.com/Zirochkaa) -- Create streaming writes to a file and manage the issuance of copy commands for the destination \([\#2921](https://github.com/airbytehq/airbyte/pull/2921)\) -- **Redshift**: make Redshift part size configurable. \([\#3053](https://github.com/airbytehq/airbyte/pull/23053)\) -- **HubSpot**: fix argument error in log call \(\#3087\) \([\#3087](https://github.com/airbytehq/airbyte/pull/3087)\) , thanks to [Nathan Yergler](https://github.com/nyergler) - -## 04/20/2021 - -3 new source connectors! - -- [**Zendesk Talk**](https://docs.airbyte.com/integrations/sources/zendesk-talk) -- [**Iterable**](https://docs.airbyte.com/integrations/sources/iterable) -- [**QuickBooks**](https://docs.airbyte.com/integrations/sources/quickbooks-singer) - -Other progress on connectors: - -- **Postgres source/destination**: add SSL option, thanks to [Marcos Marx](https://github.com/marcosmarxm) \([\#2757](https://github.com/airbytehq/airbyte/pull/2757)\) -- **Google sheets bugfix**: handle duplicate sheet headers, thanks to [Aneesh Makala](https://github.com/makalaaneesh) \([\#2905](https://github.com/airbytehq/airbyte/pull/2905)\) -- **Source Google Adwords**: support specifying the lookback window for conversions, thanks to [Harshith Mullapudi](https://github.com/harshithmullapudi) \([\#2918](https://github.com/airbytehq/airbyte/pull/2918)\) -- **MongoDB improvement**: speed up mongodb schema discovery, thanks to [Yury Koleda](https://github.com/FUT) \([\#2851](https://github.com/airbytehq/airbyte/pull/2851)\) -- **MySQL bugfix**: parsing Mysql jdbc params, thanks to [Vasily Safronov](https://github.com/gingeard) \([\#2891](https://github.com/airbytehq/airbyte/pull/2891)\) -- **CSV bugfix**: discovery takes too much memory \([\#2089](https://github.com/airbytehq/airbyte/pull/2851)\) -- A lot of work was done on improving the standard tests for the connectors, for better standardization and maintenance! - -## 04/13/2021 - -- New connector: [**Oracle DB**](https://docs.airbyte.com/integrations/sources/oracle), thanks to [Marcos Marx](https://github.com/marcosmarxm) - -## 04/07/2021 - -- New connector: [**Google Workspace Admin Reports**](https://docs.airbyte.com/integrations/sources/google-workspace-admin-reports) \(audit logs\) -- Bugfix in the base python connector library that caused errors to be silently skipped rather than failing the sync -- **Exchangeratesapi.io** bugfix: to point to the updated API URL -- **Redshift destination** bugfix: quote keywords “DATETIME” and “TIME” when used as identifiers -- **GitHub** bugfix: syncs failing when a personal repository doesn’t contain collaborators or team streams available -- **Mixpanel** connector: sync at most the last 90 days of data in the annotations stream to adhere to API limits - -## 03/29/2021 - -- We started measuring throughput of connectors. This will help us improve that point for all connectors. -- **Redshift**: implemented Copy strategy to improve its throughput. -- **Instagram**: bugfix an issue which caused media and media_insights streams to stop syncing prematurely. -- Support NCHAR and NVCHAR types in SQL-based database sources. -- Add the ability to specify a custom JDBC parameters for the MySQL source connector. - -## 03/22/2021 - -- 2 new source connectors: [**Gitlab**](https://docs.airbyte.com/integrations/sources/gitlab) and [**Airbyte-native HubSpot**](https://docs.airbyte.com/integrations/sources/hubspot) -- Developing connectors now requires almost no interaction with Gradle, Airbyte’s monorepo build tool. If you’re building a Python connector, you never have to worry about developing outside your typical flow. See [the updated documentation](https://docs.airbyte.com/connector-development). - -## 03/15/2021 - -- 2 new source connectors: [**Instagram**](https://docs.airbyte.com/integrations/sources/instagram) and [**Google Directory**](https://docs.airbyte.com/integrations/sources/google-directory) -- **Facebook Marketing**: support of API v10 -- **Google Analytics**: support incremental sync -- **Jira**: bug fix to consistently pull all tickets -- **HTTP Source**: bug fix to correctly parse JSON responses consistently - -## 03/08/2021 - -- 1 new source connector: **MongoDB** -- **Google Analytics**: Support chunked syncs to avoid sampling -- **AppStore**: fix bug where the catalog was displayed incorrectly - -## 03/01/2021 - -- **New native HubSpot connector** with schema folder populated -- Facebook Marketing connector: add option to include deleted records - -## 02/22/2021 - -- Bug fixes: - - **Google Analytics:** add the ability to sync custom reports - - **Apple Appstore:** bug fix to correctly run incremental syncs - - **Exchange rates:** UI now correctly validates input date pattern - - **File Source:** Support JSONL \(newline-delimited JSON\) format - - **Freshdesk:** Enable controlling how many requests per minute the connector makes to avoid overclocking rate limits - -## 02/15/2021 - -- 1 new destination connector: [MeiliSearch](https://docs.airbyte.com/integrations/destinations/meilisearch) -- 2 new sources that support incremental append: [Freshdesk](https://docs.airbyte.com/integrations/sources/freshdesk) and [Sendgrid](https://docs.airbyte.com/integrations/sources/sendgrid) -- Other fixes: - - Thanks to [@ns-admetrics](https://github.com/ns-admetrics) for contributing an upgrade to the **Shopify** source connector which now provides the landing_site field containing UTM parameters in the Orders table. - - **Sendgrid** source connector supports most available endpoints available in the API - - **Facebook** Source connector now supports syncing Ad Insights data - - **Freshdesk** source connector now supports syncing satisfaction ratings and conversations - - **Microsoft Teams** source connector now gracefully handles rate limiting - - Bug fix in **Slack** source where the last few records in a sync were sporadically dropped - - Bug fix in **Google Analytics** source where the last few records in sync were sporadically dropped - - In **Redshift source**, support non alpha-numeric table names - - Bug fix in **Github Source** to fix instances where syncs didn’t always fail if there was an error while reading data from the API - -## 02/02/2021 - -- Sources that we improved reliability for \(and that became “certified”\): - - [Certified sources](https://docs.airbyte.com/integrations): Files and Shopify - - Enhanced continuous testing for Tempo and Looker sources -- Other fixes / features: - - Correctly handle boolean types in the File Source - - Add docs for [App Store](https://docs.airbyte.com/integrations/sources/appstore) source - - Fix a bug in Snowflake destination where the connector didn’t check for all needed write permissions, causing some syncs to fail - -## 01/26/2021 - -- Improved reliability with our best practices on : Google Sheets, Google Ads, Marketo, Tempo -- Support incremental for Facebook and Google Ads -- The Facebook connector now supports the FB marketing API v9 - -## 01/19/2021 - -- **Our new** [**Connector Health Grade**](../../integrations/) **page** -- **1 new source:** App Store \(thanks to [@Muriloo](https://github.com/Muriloo)\) -- Fixes on connectors: - - Bug fix writing boolean columns to Redshift - - Bug fix where getting a connector’s input configuration hung indefinitely - - Stripe connector now gracefully handles rate limiting from the Stripe API - -## 01/12/2021 - -- **1 new source:** Tempo \(thanks to [@thomasvl](https://github.com/thomasvl)\) -- **Incremental support for 3 new source connectors:** [Salesforce](../../integrations/sources/salesforce.md), [Slack](../../integrations/sources/slack.md) and [Braintree](../../integrations/sources/braintree.md) -- Fixes on connectors: - - Fix a bug in MSSQL and Redshift source connectors where custom SQL types weren't being handled correctly. - - Improvement of the Snowflake connector from [@hudsondba](https://github.com/hudsondba) \(batch size and timeout sync\) - -## 01/05/2021 - -- **Incremental support for 2 new source connectors:** [Mixpanel](../../integrations/sources/mixpanel.md) and [HubSpot](../../integrations/sources/hubspot.md) -- Fixes on connectors: - - Fixed a bug in the github connector where the connector didn’t verify the provided API token was granted the correct permissions - - Fixed a bug in the Google sheets connector where rate limits were not always respected - - Alpha version of Facebook marketing API v9. This connector is a native Airbyte connector \(current is Singer based\). - -## 12/30/2020 - -**New sources:** [Plaid](../../integrations/sources/plaid.md) \(contributed by [tgiardina](https://github.com/tgiardina)\), [Looker](../../integrations/sources/looker.md) - -## 12/18/2020 - -**New sources:** [Drift](../../integrations/sources/drift.md), [Microsoft Teams](../../integrations/sources/microsoft-teams.md) - -## 12/10/2020 - -**New sources:** [Intercom](../../integrations/sources/intercom.md), [Mixpanel](../../integrations/sources/mixpanel.md), [Jira Cloud](../../integrations/sources/jira.md), [Zoom](../../integrations/sources/zoom.md) - -## 12/07/2020 - -**New sources:** [Slack](../../integrations/sources/slack.md), [Braintree](../../integrations/sources/braintree.md), [Zendesk Support](../../integrations/sources/zendesk-support.md) - -## 12/04/2020 - -**New sources:** [Redshift](../../integrations/sources/redshift.md), [Greenhouse](../../integrations/sources/greenhouse.md) **New destination:** [Redshift](../../integrations/destinations/redshift.md) - -## 11/30/2020 - -**New sources:** [Freshdesk](../../integrations/sources/freshdesk.md), [Twilio](../../integrations/sources/twilio.md) - -## 11/25/2020 - -**New source:** [Recurly](../../integrations/sources/recurly.md) - -## 11/23/2020 - -**New source:** [Sendgrid](../../integrations/sources/sendgrid.md) - -## 11/18/2020 - -**New source:** [Mailchimp](../../integrations/sources/mailchimp.md) - -## 11/13/2020 - -**New source:** [MSSQL](../../integrations/sources/mssql.md) - -## 11/11/2020 - -**New source:** [Shopify](../../integrations/sources/shopify.md) - -## 11/09/2020 - -**New sources:** [Files \(CSV, JSON, HTML...\)](../../integrations/sources/file.md) - -## 11/04/2020 - -**New sources:** [Facebook Ads](connectors.md), [Google Ads](../../integrations/sources/google-ads.md), [Marketo](../../integrations/sources/marketo.md) **New destination:** [Snowflake](../../integrations/destinations/snowflake.md) - -## 10/30/2020 - -**New sources:** [Salesforce](../../integrations/sources/salesforce.md), Google Analytics, [HubSpot](../../integrations/sources/hubspot.md), [GitHub](../../integrations/sources/github.md), [Google Sheets](../../integrations/sources/google-sheets.md), [Rest APIs](connectors.md), and [MySQL](../../integrations/sources/mysql.md) - -## 10/21/2020 - -**New destinations:** we built our own connectors for [BigQuery](../../integrations/destinations/bigquery.md) and [Postgres](../../integrations/destinations/postgres.md), to ensure they are of the highest quality. - -## 09/23/2020 - -**New sources:** [Stripe](../../integrations/sources/stripe.md), [Postgres](../../integrations/sources/postgres.md) **New destinations:** [BigQuery](../../integrations/destinations/bigquery.md), [Postgres](../../integrations/destinations/postgres.md), [local CSV](../../integrations/destinations/csv.md) diff --git a/docs/archive/changelog/platform.md b/docs/archive/changelog/platform.md deleted file mode 100644 index 92bc158dce83..000000000000 --- a/docs/archive/changelog/platform.md +++ /dev/null @@ -1,509 +0,0 @@ ---- -description: Be sure to not miss out on new features and improvements! ---- - -# Platform - -This is the changelog for Airbyte Platform. For our connector changelog, please visit our [Connector Changelog](connectors.md) page. - -## [20-12-2021 - 0.32.5](https://github.com/airbytehq/airbyte/releases/tag/v0.32.5-alpha) -* Add an endpoint that specify that the feedback have been given after the first sync. - -## [18-12-2021 - 0.32.4](https://github.com/airbytehq/airbyte/releases/tag/v0.32.4-alpha) -* No major changes to Airbyte Core. - -## [18-12-2021 - 0.32.3](https://github.com/airbytehq/airbyte/releases/tag/v0.32.3-alpha) -* No major changes to Airbyte Core. - -## [18-12-2021 - 0.32.2](https://github.com/airbytehq/airbyte/releases/tag/v0.32.2-alpha) -* Improve error handling when additional sources/destinations cannot be read. -* Implement connector config dependency for OAuth consent URL. -* Treat oauthFlowInitParameters just as hidden instead of getting rid of them. -* Stop using gentle close with heartbeat. - -## [17-12-2021 - 0.32.1](https://github.com/airbytehq/airbyte/releases/tag/v0.32.1-alpha) -* Add to the new connection flow form with an existing source and destination dropdown. -* Implement protocol change for OAuth outputs. -* Enhance API for use by cloud to provide per-connector billing info. - -## [11-12-2021 - 0.32.0](https://github.com/airbytehq/airbyte/releases/tag/v0.32.0-alpha) -* This is a **MAJOR** version update. You need to [update to this version](../../operator-guides/upgrading-airbyte.md#mandatory-intermediate-upgrade) before updating to any version newer than `0.32.0` - -## [11-11-2021 - 0.31.0](https://github.com/airbytehq/airbyte/releases/tag/v0.31.0-alpha) -* No major changes to Airbyte Core. - -## [11-11-2021 - 0.30.39](https://github.com/airbytehq/airbyte/releases/tag/v0.30.39-alpha) -* We migrated our secret management to Google Secret Manager, allowing us to scale how many connectors we support. - -## [11-09-2021 - 0.30.37](https://github.com/airbytehq/airbyte/releases/tag/v0.30.37-alpha) -* No major changes to Airbyte Core. - -## [11-09-2021 - 0.30.36](https://github.com/airbytehq/airbyte/releases/tag/v0.30.36-alpha) -* No major changes to Airbyte Core. - -## [11-08-2021 - 0.30.35](https://github.com/airbytehq/airbyte/releases/tag/v0.30.35-alpha) -* No major changes to Airbyte Core. - -## [11-06-2021 - 0.30.34](https://github.com/airbytehq/airbyte/releases/tag/v0.30.34-alpha) -* No major changes to Airbyte Core. - -## [11-06-2021 - 0.30.33](https://github.com/airbytehq/airbyte/releases/tag/v0.30.33-alpha) -* No major changes to Airbyte Core. - -## [11-05-2021 - 0.30.32](https://github.com/airbytehq/airbyte/releases/tag/v0.30.32-alpha) -* Airbyte Server no longer crashes from having too many open files. - -## [11-04-2021 - 0.30.31](https://github.com/airbytehq/airbyte/releases/tag/v0.30.31-alpha) -* No major changes to Airbyte Core. - -## [11-01-2021 - 0.30.25](https://github.com/airbytehq/airbyte/releases/tag/v0.30.25-alpha) -* No major changes to Airbyte Core. - -## [11-01-2021 - 0.30.24](https://github.com/airbytehq/airbyte/releases/tag/v0.30.24-alpha) -* Incremental normalization is live. Basic normalization no longer runs on already normalized data, making it way faster and cheaper. - -## [11-01-2021 - 0.30.23](https://github.com/airbytehq/airbyte/releases/tag/v0.30.23-alpha) -* No major changes to Airbyte Core. - -## [10-21-2021 - 0.30.22](https://github.com/airbytehq/airbyte/releases/tag/v0.30.22-alpha) -* We now support experimental deployment of Airbyte on Macbooks with M1 chips! - -:::info - -This interim patch period mostly contained stability changes for Airbyte Cloud, so we skipped from `0.30.16` to `0.30.22`. - -::: - -## [10-07-2021 - 0.30.16](https://github.com/airbytehq/airbyte/releases/tag/v0.30.16-alpha) -* On Kubernetes deployments, you can now configure the Airbyte Worker Pod's image pull policy. - -:::info - -This interim patch period mostly contained stability changes for Airbyte Cloud, so we skipped from `0.30.2` to `0.30.16`. - -::: - -## [09-30-2021 - 0.30.2](https://github.com/airbytehq/airbyte/releases/tag/v0.30.2-alpha) -* Fixed a bug that would fail Airbyte upgrades for deployments with sync notifications. - -## [09-24-2021 - 0.29.22](https://github.com/airbytehq/airbyte/releases/tag/v0.29.22-alpha) -* We now have integration tests for SSH. - -## [09-19-2021 - 0.29.21](https://github.com/airbytehq/airbyte/releases/tag/v0.29.21-alpha) -* You can now [deploy Airbyte on Kubernetes with a Helm Chart](https://github.com/airbytehq/airbyte/pull/5891)! - -## [09-16-2021 - 0.29.19](https://github.com/airbytehq/airbyte/releases/tag/v0.29.19-alpha) -* Fixes a breaking bug that prevents Airbyte upgrading from older versions. - -## [09-15-2021 - 0.29.18](https://github.com/airbytehq/airbyte/releases/tag/v0.29.18-alpha) -* Building images is now optional in the CI build. - -## [09-08-2021 - 0.29.17](https://github.com/airbytehq/airbyte/releases/tag/v0.29.17-alpha) - -* You can now properly cancel deployments when deploying on K8s. - -## [09-08-2021 - 0.29.16](https://github.com/airbytehq/airbyte/releases/tag/v0.29.16-alpha) - -* You can now send notifications via webhook for successes and failures on Airbyte syncs. -* Scheduling jobs and worker jobs are now separated, allowing for workers to be scaled horizontally. - -## [09-04-2021 - 0.29.15](https://github.com/airbytehq/airbyte/releases/tag/v0.29.15-alpha) - -* Fixed a bug that made it possible for connector definitions to be duplicated, violating uniqueness. - -## [09-02-2021 - 0.29.14](https://github.com/airbytehq/airbyte/releases/tag/v0.29.14-alpha) - -* Nothing of note. - -## [08-27-2021 - 0.29.13](https://github.com/airbytehq/airbyte/releases/tag/v0.29.13-alpha) - -* The scheduler now waits for the server before it creates any databases. -* You can now apply tolerations for Airbyte Pods on K8s deployments. - -## [08-23-2021 - 0.29.12](https://github.com/airbytehq/airbyte/releases/tag/v0.29.12-alpha) - -* Syncs now have a `max_sync_timeout` that times them out after 3 days. -* Fixed Kube deploys when logging with Minio. - -## [08-20-2021 - 0.29.11](https://github.com/airbytehq/airbyte/releases/tag/v0.29.11-alpha) - -* Nothing of note. - -## [08-20-2021 - 0.29.10](https://github.com/airbytehq/airbyte/releases/tag/v0.29.10-alpha) - -* Migration of Python connector template images to Alpine Docker images to reduce size. - -## [08-20-2021 - 0.29.9](https://github.com/airbytehq/airbyte/releases/tag/v0.29.9-alpha) - -* Nothing of note. - -## [08-17-2021 - 0.29.8](https://github.com/airbytehq/airbyte/releases/tag/v0.29.8-alpha) - -* Nothing of note. - -## [08-14-2021 - 0.29.7](https://github.com/airbytehq/airbyte/releases/tag/v0.29.7-alpha) - -* Re-release: Fixed errant ENV variable in `0.29.6` - -## [08-14-2021 - 0.29.6](https://github.com/airbytehq/airbyte/releases/tag/v0.29.6-alpha) - -* Connector pods no longer fail with edge case names for the associated Docker images. - -## [08-14-2021 - 0.29.5](https://github.com/airbytehq/airbyte/releases/tag/v0.29.5-alpha) - -* Nothing of note. - -## [08-12-2021 - 0.29.4](https://github.com/airbytehq/airbyte/releases/tag/v0.29.4-alpha) - -* Introduced implementation for date-time support in normalization. - -## [08-9-2021 - 0.29.3](https://github.com/airbytehq/airbyte/releases/tag/v0.29.3-alpha) - -* Importing configuration no longer removes available but unused connectors. - -## [08-6-2021 - 0.29.2](https://github.com/airbytehq/airbyte/releases/tag/v0.29.2-alpha) - -* Fixed nil pointer exception in version migrations. - -## [07-29-2021 - 0.29.1](https://github.com/airbytehq/airbyte/releases/tag/v0.29.1-alpha) - -* When migrating, types represented in the config archive need to be a subset of the types declared in the schema. - -## [07-28-2021 - 0.29.0](https://github.com/airbytehq/airbyte/releases/tag/v0.29.0-alpha) - -* Deprecated `DEFAULT_WORKSPACE_ID`; default workspace no longer exists by default. - -## [07-28-2021 - 0.28.2](https://github.com/airbytehq/airbyte/releases/tag/v0.28.2-alpha) - -* Backend now handles workspaceId for WebBackend operations. - -## [07-26-2021 - 0.28.1](https://github.com/airbytehq/airbyte/releases/tag/v0.28.1-alpha) - -* K8s: Overly-sensitive logs are now silenced. - -## [07-22-2021 - 0.28.0](https://github.com/airbytehq/airbyte/releases/tag/v0.28.0-alpha) - -* Acceptance test dependencies fixed. - -## [07-22-2021 - 0.27.5](https://github.com/airbytehq/airbyte/releases/tag/v0.27.5-alpha) - -* Fixed unreliable logging on Kubernetes deployments. -* Introduced pre-commit to auto-format files on commits. - -## [07-21-2021 - 0.27.4](https://github.com/airbytehq/airbyte/releases/tag/v0.27.4-alpha) - -* Config persistence is now migrated to the internal Airbyte database. -* Source connector ports now properly close when deployed on Kubernetes. -* Missing dependencies added that allow acceptance tests to run. - -## [07-15-2021 - 0.27.3](https://github.com/airbytehq/airbyte/releases/tag/v0.27.3-alpha) - -* Fixed some minor API spec errors. - -## [07-12-2021 - 0.27.2](https://github.com/airbytehq/airbyte/releases/tag/v0.27.2-alpha) - -* GCP environment variable is now stubbed out to prevent noisy and harmless errors. - -## [07-8-2021 - 0.27.1](https://github.com/airbytehq/airbyte/releases/tag/v0.27.1-alpha) - -* New API endpoint: List workspaces -* K8s: Server doesn't start up before Temporal is ready to operate now. -* Silent source failures caused by last patch fixed to throw exceptions. - -## [07-1-2021 - 0.27.0](https://github.com/airbytehq/airbyte/releases/tag/v0.27.0-alpha) - -* Airbyte now automatically upgrades on server startup! - * Airbyte will check whether your `.env` Airbyte version is compatible with the Airbyte version in the database and upgrade accordingly. -* When running Airbyte on K8s logs will automatically be stored in a Minio bucket unless configured otherwise. -* CDC for MySQL now handles decimal types correctly. - -## [06-21-2021 - 0.26.2](https://github.com/airbytehq/airbyte/releases/tag/v0.26.2-alpha) - -* First-Class Kubernetes support! - -## [06-16-2021 - 0.26.0](https://github.com/airbytehq/airbyte/releases/tag/v0.26.0-alpha) - -* Custom dbt transformations! -* You can now configure your destination namespace at the table level when setting up a connection! -* Migrate basic normalization settings to the sync operations. - -## [06-09-2021 - 0.24.8 / 0.25.0](https://github.com/airbytehq/airbyte/releases/tag/v0.24.8-alpha) - -* Bugfix: Handle TINYINT\(1\) and BOOLEAN correctly and fix target file comparison for MySQL CDC. -* Bugfix: Updating the source/destination name in the UI now works as intended. - -## [06-04-2021 - 0.24.7](https://github.com/airbytehq/airbyte/releases/tag/v0.24.7-alpha) - -* Bugfix: Ensure that logs from threads created by replication workers are added to the log file. - -## [06-03-2021 - 0.24.5](https://github.com/airbytehq/airbyte/releases/tag/v0.24.5-alpha) - -* Remove hash from table names when it's not necessary for normalization outputs. - -## [06-03-2021 - 0.24.4](https://github.com/airbytehq/airbyte/releases/tag/v0.24.4-alpha) - -* PythonCDK: change minimum Python version to 3.7.0 - -## [05-28-2021 - 0.24.3](https://github.com/airbytehq/airbyte/releases/tag/v0.24.3-alpha) - -* Minor fixes to documentation -* Reliability updates in preparation for custom transformations -* Limit Docker log size to 500 MB \([\#3702](https://github.com/airbytehq/airbyte/pull/3702)\) - -## [05-26-2021 - 0.24.2](https://github.com/airbytehq/airbyte/releases/tag/v0.24.2-alpha) - -* Fix for file names being too long in Windows deployments \([\#3625](https://github.com/airbytehq/airbyte/pull/3625)\) -* Allow users to access the API and WebApp from the same port \([\#3603](https://github.com/airbytehq/airbyte/pull/3603)\) - -## [05-25-2021 - 0.24.1](https://github.com/airbytehq/airbyte/releases/tag/v0.24.1-alpha) - -* **Checkpointing for incremental syncs** that will now continue where they left off even if they fail! \([\#3290](https://github.com/airbytehq/airbyte/pull/3290)\) - -## [05-25-2021 - 0.24.0](https://github.com/airbytehq/airbyte/releases/tag/v0.24.0-alpha) - -* Avoid dbt runtime exception "maximum recursion depth exceeded" in ephemeral materialization \([\#3470](https://github.com/airbytehq/airbyte/pull/3470)\) - -## [05-18-2021 - 0.23.0](https://github.com/airbytehq/airbyte/releases/tag/v0.23.0-alpha) - -* Documentation to deploy locally on Windows is now available \([\#3425](https://github.com/airbytehq/airbyte/pull/3425)\) -* Connector icons are now displayed in the UI -* Restart core containers if they fail automatically \([\#3423](https://github.com/airbytehq/airbyte/pull/3423)\) -* Progress on supporting custom transformation using dbt. More updates on this soon! - -## [05-11-2021 - 0.22.3](https://github.com/airbytehq/airbyte/releases/tag/v0.22.3-alpha) - -* Bump K8s deployment version to latest stable version, thanks to [Coetzee van Staden](https://github.com/coetzeevs) -* Added tutorial to deploy Airbyte on Azure VM \([\#3171](https://github.com/airbytehq/airbyte/pull/3171)\), thanks to [geekwhocodes](https://github.com/geekwhocodes) -* Progress on checkpointing to support rate limits better -* Upgrade normalization to use dbt from docker images \([\#3186](https://github.com/airbytehq/airbyte/pull/3186)\) - -## [05-04-2021 - 0.22.2](https://github.com/airbytehq/airbyte/releases/tag/v0.22.2-alpha) - -* Split replication and normalization into separate temporal activities \([\#3136](https://github.com/airbytehq/airbyte/pull/3136)\) -* Fix normalization Nesting bug \([\#3110](https://github.com/airbytehq/airbyte/pull/3110)\) - -## [04-27-2021 - 0.22.0](https://github.com/airbytehq/airbyte/releases/tag/v0.22.0-alpha) - -* **Replace timeout for sources** \([\#3031](https://github.com/airbytehq/airbyte/pull/2851)\) -* Fix UI issue where tables with the same name are selected together \([\#3032](https://github.com/airbytehq/airbyte/pull/2851)\) -* Fix feed handling when feeds are unavailable \([\#2964](https://github.com/airbytehq/airbyte/pull/2851)\) -* Export whitelisted tables \([\#3055](https://github.com/airbytehq/airbyte/pull/2851)\) -* Create a contributor bootstrap script \(\#3028\) \([\#3054](https://github.com/airbytehq/airbyte/pull/2851)\), thanks to [nclsbayona](https://github.com/nclsbayona) - -## [04-20-2021 - 0.21.0](https://github.com/airbytehq/airbyte/releases/tag/v0.21.0-alpha) - -* **Namespace support**: supported source-destination pairs will now sync data into the same namespace as the source \(\#2862\) -* Add **“Refresh Schema”** button \([\#2943](https://github.com/airbytehq/airbyte/pull/2943)\) -* In the Settings, you can now **add a webhook to get notified when a sync fails** -* Add destinationSyncModes to connection form -* Add tooltips for connection status icons - -## [04-12-2021 - 0.20.0](https://github.com/airbytehq/airbyte/releases/tag/v0.20.0-alpha) - -* **Change Data Capture \(CDC\)** is now supported for Postgres, thanks to [@jrhizor](https://github.com/jrhizor) and [@cgardens](https://github.com/cgardens). We will now expand it to MySQL and MSSQL in the coming weeks. -* When displaying the schema for a source, you can now search for table names, thanks to [@jamakase](https://github.com/jamakase) -* Better feedback UX when manually triggering a sync with “Sync now” - -## [04-07-2021 - 0.19.0](https://github.com/airbytehq/airbyte/releases/tag/v0.19.0-alpha) - -* New **Connections** page where you can see the list of all your connections and their statuses. -* New **Settings** page to update your preferences. -* Bugfix where very large schemas caused schema discovery to fail. - -## [03-29-2021 - 0.18.1](https://github.com/airbytehq/airbyte/releases/tag/v0.18.1-alpha) - -* Surface the **health of each connection** so that a user can spot any problems at a glance. -* Added support for deduplicating records in the destination using a primary key using incremental dedupe - -* A source’s extraction mode \(incremental, full refresh\) is now decoupled from the destination’s write mode -- so you can repeatedly append full refreshes to get repeated snapshots of data in your source. -* New **Upgrade all** button in Admin to upgrade all your connectors at once -* New **Cancel** job button in Connections Status page when a sync job is running, so you can stop never-ending processes. - -## [03-22-2021 - 0.17.2](https://github.com/airbytehq/airbyte/releases/tag/v0.17.2-alpha) - -* Improved the speed of get spec, check connection, and discover schema by migrating to the Temporal workflow engine. -* Exposed cancellation for sync jobs in the API \(will be exposed in the UI in the next week!\). -* Bug fix: Fix issue where migration app was OOMing. - -## [03-15-2021 - 0.17.1](https://github.com/airbytehq/airbyte/releases/tag/v0.17.1-alpha) - -* **Creating and deleting multiple workspaces** is now supported via the API. Thanks to [@Samuel Gordalina](https://github.com/gordalina) for contributing this feature! -* Normalization now supports numeric types with precision greater than 32 bits -* Normalization now supports union data types -* Support longform text inputs in the UI for cases where you need to preserve formatting on connector inputs like .pem keys -* Expose the latest available connector versions in the API -* Airflow: published a new [tutorial](https://docs.airbyte.com/operator-guides/using-the-airflow-airbyte-operator/) for how to use the Airbyte operator. Thanks [@Marcos Marx](https://github.com/marcosmarxm) for writing the tutorial! -* Connector Contributions: All connectors now describe how to contribute to them without having to touch Airbyte’s monorepo build system -- just work on the connector in your favorite dev setup! - -## [03-08-2021 - 0.17](https://github.com/airbytehq/airbyte/releases/tag/v0.17.0-alpha) - -* **Integration with Airflow** is here. Thanks to @Marcos Marx, you can now run Airbyte jobs from Airflow directly. A tutorial is on the way and should be coming this week! -* Add a prefix for tables, so that tables with the same name don't clobber each other in the destination - -## [03-01-2021 - 0.16](https://github.com/airbytehq/airbyte/milestone/22?closed=1) - -* We made some progress to address **nested tables in our normalization.** - - Previously, basic normalization would output nested tables as-is and append a number for duplicate tables. For example, Stripe’s nested address fields go from: - - ```text - Address - address_1 - ``` - - To - - ```text - Charges_source_owner_755_address - customers_shipping_c70_address - ``` - - After the change, the parent tables are combined with the name of the nested table to show where the nested table originated. **This is a breaking change for the consumers of nested tables. Consumers will need to update to point at the new tables.** - -## [02-19-2021 - 0.15](https://github.com/airbytehq/airbyte/milestone/22?closed=1) - -* We now handle nested tables with the normalization steps. Check out the video below to see how it works. - -{% embed url="https://youtu.be/I4fngMnkJzY" caption="" %} - -## [02-12-2021 - 0.14](https://github.com/airbytehq/airbyte/milestone/21?closed=1) - -* Front-end changes: - * Display Airbyte's version number - * Describe schemas using JsonSchema - * Better feedback on buttons - -## [Beta launch - 0.13](https://github.com/airbytehq/airbyte/milestone/15?closed=1) - Released 02/02/2021 - -* Add connector build status dashboard -* Support Schema Changes in Sources -* Support Import / Export of Airbyte Data in the Admin section of the UI -* Bug fixes: - * If Airbyte is closed during a sync the running job is not marked as failed - * Airbyte should fail when deployment version doesn't match data version - * Upgrade Airbyte Version without losing existing configuration / data - -## [0.12-alpha](https://github.com/airbytehq/airbyte/milestone/14?closed=1) - Released 01/20/2021 - -* Ability to skip onboarding -* Miscellaneous bug fixes: - * A long discovery request causes a timeout in the UI type/bug - * Out of Memory when replicating large table from MySQL - -## 0.11.2-alpha - Released 01/18/2021 - -* Increase timeout for long running catalog discovery operations from 3 minutes to 30 minutes to avoid prematurely failing long-running operations - -## 0.11.1-alpha - Released 01/17/2021 - -### Bugfixes - -* Writing boolean columns to Redshift destination now works correctly - -## [0.11.0-alpha](https://github.com/airbytehq/airbyte/milestone/12?closed=1) - Delivered 01/14/2021 - -### New features - -* Allow skipping the onboarding flow in the UI -* Add the ability to reset a connection's schema when the underlying data source schema changes - -### Bugfixes - -* Fix UI race condition which showed config for the wrong connector when rapidly choosing between different connector -* Fix a bug in MSSQL and Redshift source connectors where custom SQL types weren't being handled correctly. [Pull request](https://github.com/airbytehq/airbyte/pull/1576) -* Support incremental sync for Salesforce, Slack, and Braintree sources -* Gracefully handle invalid nuemric values \(e.g NaN or Infinity\) in MySQL, MSSQL, and Postgtres DB sources -* Fix flashing red sources/destinations fields after success submit -* Fix a bug which caused getting a connector's specification to hang indefinitely if the connector docker image failed to download - -### New connectors - -* Tempo -* Appstore - -## [0.10.0](https://github.com/airbytehq/airbyte/milestone/12?closed=1) - delivered on 01/04/2021 - -* You can now **deploy Airbyte on** [**Kuberbetes**](https://docs.airbyte.com/deploying-airbyte/on-kubernetes) _\*\*_\(alpha version\) -* **Support incremental sync** for Mixpanel and HubSpot sources -* **Fixes on connectors:** - * Fixed a bug in the GitHub connector where the connector didn’t verify the provided API token was granted the correct permissions - * Fixed a bug in the Google Sheets connector where rate limits were not always respected - * Alpha version of Facebook marketing API v9. This connector is a native Airbyte connector \(current is Singer based\). -* **New source:** Plaid \(contributed by [@tgiardina](https://github.com/tgiardina) - thanks Thomas!\) - -## [0.9.0](https://github.com/airbytehq/airbyte/milestone/11?closed=1) - delivered on 12/23/2020 - -* **New chat app from the web app** so you can directly chat with the team for any issues you run into -* **Debugging** has been made easier in the UI, with checks, discover logs, and sync download logs -* Support of **Kubernetes in local**. GKE will come at the next release. -* **New source:** Looker _\*\*_ - -## [0.8.0](https://github.com/airbytehq/airbyte/milestone/10?closed=1) - delivered on 12/17/2020 - -* **Incremental - Append"** - * We now allow sources to replicate only new or modified data. This enables to avoid re-fetching data that you have already replicated from a source. - * The delta from a sync will be _appended_ to the existing data in the data warehouse. - * Here are [all the details of this feature](../../understanding-airbyte/connections/incremental-append.md). - * It has been released for 15 connectors, including Postgres, MySQL, Intercom, Zendesk, Stripe, Twilio, Marketo, Shopify, GitHub, and all the destination connectors. We will expand it to all the connectors in the next couple of weeks. -* **Other features:** - * Improve interface for writing python sources \(should make writing new python sources easier and clearer\). - * Add support for running Standard Source Tests with files \(making them easy to run for any language a source is written in\) - * Add ability to reset data for a connection. -* **Bug fixes:** - * Update version of test containers we use to avoid pull issues while running tests. - * Fix issue where jobs were not sorted by created at in connection detail view. -* **New sources:** Intercom, Mixpanel, Jira Cloud, Zoom, Drift, Microsoft Teams - -## [0.7.0](https://github.com/airbytehq/airbyte/milestone/8?closed=1) - delivered on 12/07/2020 - -* **New destination:** our own **Redshift** warehouse connector. You can also use this connector for Panoply. -* **New sources**: 8 additional source connectors including Recurly, Twilio, Freshdesk. Greenhouse, Redshift \(source\), Braintree, Slack, Zendesk Support -* Bug fixes - -## [0.6.0](https://github.com/airbytehq/airbyte/milestone/6?closed=1) - delivered on 11/23/2020 - -* Support **multiple destinations** -* **New source:** Sendgrid -* Support **basic normalization** -* Bug fixes - -## [0.5.0](https://github.com/airbytehq/airbyte/milestone/5?closed=1) - delivered on 11/18/2020 - -* **New sources:** 10 additional source connectors, including Files \(CSV, HTML, JSON...\), Shopify, MSSQL, Mailchimp - -## [0.4.0](https://github.com/airbytehq/airbyte/milestone/4?closed=1) - delivered on 11/04/2020 - -Here is what we are working on right now: - -* **New destination**: our own **Snowflake** warehouse connector -* **New sources:** Facebook Ads, Google Ads. - -## [0.3.0](https://github.com/airbytehq/airbyte/milestone/3?closed=1) - delivered on 10/30/2020 - -* **New sources:** Salesforce, GitHub, Google Sheets, Google Analytics, HubSpot, Rest APIs, and MySQL -* Integration test suite for sources -* Improve build speed - -## [0.2.0](https://github.com/airbytehq/airbyte/milestone/2?closed=1) - delivered on 10/21/2020 - -* **a new Admin section** to enable users to add their own connectors, in addition to upgrading the ones they currently use -* improve the developer experience \(DX\) for **contributing new connectors** with additional documentation and a connector protocol -* our own **BigQuery** warehouse connector -* our own **Postgres** warehouse connector -* simplify the process of supporting new Singer taps, ideally make it a 1-day process - -## [0.1.0](https://github.com/airbytehq/airbyte/milestone/1?closed=1) - delivered on 09/23/2020 - -This is our very first release after 2 months of work. - -* **New sources:** Stripe, Postgres -* **New destinations:** BigQuery, Postgres -* **Only one destination**: we only support one destination in that 1st release, but you will soon be able to add as many as you need. -* **Logs & monitoring**: you can now see your detailed logs -* **Scheduler:** you now have 10 different frequency options for your recurring syncs -* **Deployment:** you can now deploy Airbyte via a simple Docker image, or directly on AWS and GCP -* **New website**: this is the day we launch our website - airbyte.io. Let us know what you think -* **New documentation:** this is the 1st day for our documentation too -* **New blog:** we published a few articles on our startup journey, but also about our vision to making data integrations a commodity. - -Stay tuned, we will have new sources and destinations very soon! Don't hesitate to subscribe to our [newsletter](https://airbyte.io/#subscribe-newsletter) to receive our product updates and community news. - diff --git a/docs/archive/examples/README.md b/docs/archive/examples/README.md deleted file mode 100644 index e62ee1c8eb21..000000000000 --- a/docs/archive/examples/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Example Use Cases - diff --git a/docs/archive/examples/build-a-slack-activity-dashboard.md b/docs/archive/examples/build-a-slack-activity-dashboard.md deleted file mode 100644 index b63a2b65babb..000000000000 --- a/docs/archive/examples/build-a-slack-activity-dashboard.md +++ /dev/null @@ -1,424 +0,0 @@ ---- -description: Using Airbyte and Apache Superset ---- - -# Build a Slack Activity Dashboard - -![](../../.gitbook/assets/46.png) - -This article will show how to use [Airbyte](http://airbyte.com) - an open-source data integration platform - and [Apache Superset](https://superset.apache.org/) - an open-source data exploration platform - in order to build a Slack activity dashboard showing: - -* Total number of members of a Slack workspace -* The evolution of the number of Slack workspace members -* Evolution of weekly messages -* Evolution of messages per channel -* Members per time zone - -Before we get started, let’s take a high-level look at how we are going to achieve creating a Slack dashboard using Airbyte and Apache Superset. - -1. We will use the Airbyte’s Slack connector to get the data off a Slack workspace \(we will be using Airbyte’s own Slack workspace for this tutorial\). -2. We will save the data onto a PostgreSQL database. -3. Finally, using Apache Superset, we will implement the various metrics we care about. - -Got it? Now let’s get started. - -## 1. Replicating Data from Slack to Postgres with Airbyte - -### a. Deploying Airbyte - -There are several easy ways to deploy Airbyte, as listed [here](https://docs.airbyte.com/). For this tutorial, I will just use the [Docker Compose method](https://docs.airbyte.com/deploying-airbyte/local-deployment) from my workstation: - -```text -# In your workstation terminal -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -docker-compose up -``` - -The above command will make the Airbyte app available on `localhost:8000`. Visit the URL on your favorite browser, and you should see Airbyte’s dashboard \(if this is your first time, you will be prompted to enter your email to get started\). - -If you haven’t set Docker up, follow the [instructions here](https://docs.docker.com/desktop/) to set it up on your machine. - -### b. Setting Up Airbyte’s Slack Source Connector - -Airbyte’s Slack connector will give us access to the data. So, we are going to kick things off by setting this connector to be our data source in Airbyte’s web app. I am assuming you already have Airbyte and Docker set up on your local machine. We will be using Docker to create our PostgreSQL database container later on. - -Now, let’s proceed. If you already went through the onboarding, click on the “new source” button at the top right of the Sources section. If you're going through the onboarding, then follow the instructions. - -You will be requested to enter a name for the source you are about to create. You can call it “slack-source”. Then, in the Source Type combo box, look for “Slack,” and then select it. Airbyte will then present the configuration fields needed for the Slack connector. So you should be seeing something like this on the Airbyte App: - -![](../../.gitbook/assets/1.png) - -The first thing you will notice is that this connector requires a Slack token. So, we have to obtain one. If you are not a workspace admin, you will need to ask for permission. - -Let’s walk through how we would get the Slack token we need. - -Assuming you are a workspace admin, open the Slack workspace and navigate to \[Workspace Name\] > Administration > Customize \[Workspace Name\]. In our case, it will be Airbyte > Administration > Customize Airbyte \(as shown below\): - -![](../../.gitbook/assets/2.png) - -In the new page that opens up in your browser, you will then need to navigate to **Configure apps**. - -![](../../.gitbook/assets/3.png) - -In the new window that opens up, click on **Build** in the top right corner. - -![](../../.gitbook/assets/4.png) - -Click on the **Create an App** button. - -![](../../.gitbook/assets/5.png) - -In the modal form that follows, give your app a name - you can name it `airbyte_superset`, then select your workspace from the Development Slack Workspace. - -![](../../.gitbook/assets/6.png) - -Next, click on the **Create App** button. You will then be presented with a screen where we are going to set permissions for our `airbyte_superset` app, by clicking on the **Permissions** button on this page. - -![](../../.gitbook/assets/7.png) - -In the next screen, navigate to the scope section. Then, click on the **Add an OAuth Scope** button. This will allow you to add permission scopes for your app. At a minimum, your app should have the following permission scopes: - -![](../../.gitbook/assets/8.png) - -Then, we are going to add our created app to the workspace by clicking the **Install to Workspace** button. - -![](../../.gitbook/assets/9.png) - -Slack will prompt you that your app is requesting permission to access your workspace of choice. Click Allow. - -![](../../.gitbook/assets/10.png) - -After the app has been successfully installed, you will be navigated to Slack’s dashboard, where you will see the Bot User OAuth Access Token. - -This is the token you will provide back on the Airbyte page, where we dropped off to obtain this token. So make sure to copy it and keep it in a safe place. - -Now that we are done with obtaining a Slack token, let’s go back to the Airbyte page we dropped off and add the token in there. - -We will also need to provide Airbyte with `start_date`. This is the date from which we want Airbyte to start replicating data from the Slack API, and we define that in the format: `YYYY-MM-DDT00:00:00Z`. - -We will specify ours as `2020-09-01T00:00:00Z`. We will also tell Airbyte to exclude archived channels and not include private channels, and also to join public channels, so the latter part of the form should look like this: - -![](../../.gitbook/assets/11.png) - -Finally, click on the **Set up source** button for Airbyte to set the Slack source up. - -If the source was set up correctly, you will be taken to the destination section of Airbyte’s dashboard, where you will tell Airbyte where to store the replicated data. - -### c. Setting Up Airbyte’s Postgres Destination Connector - -For our use case, we will be using PostgreSQL as the destination. - -Click the **add destination** button in the top right corner, then click on **add a new destination**. - -![](../../.gitbook/assets/12.png) - -In the next screen, Airbyte will validate the source, and then present you with a form to give your destination a name. We’ll call this destination slack-destination. Then, we will select the Postgres destination type. Your screen should look like this now: - -![](../../.gitbook/assets/13.png) - -Great! We have a form to enter Postgres connection credentials, but we haven’t set up a Postgres database. Let’s do that! - -Since we already have Docker installed, we can spin off a Postgres container with the following command in our terminal: - -```text -docker run --rm --name slack-db -e POSTGRES_PASSWORD=password -p 2000:5432 -d postgres -``` - -\(Note that the Docker compose file for Superset ships with a Postgres database, as you can see [here](https://github.com/apache/superset/blob/master/docker-compose.yml#L40)\). - -The above command will do the following: - -* create a Postgres container with the name slack-db, -* set the password to password, -* expose the container’s port 5432, as our machine’s port 2000. -* create a database and a user, both called postgres. - -With this, we can go back to the Airbyte screen and supply the information needed. Your form should look like this: - -![](../../.gitbook/assets/14.png) - -Then click on the **Set up destination** button. - -### d. Setting Up the Replication - -You should now see the following screen: - -![](../../.gitbook/assets/15.png) - -Airbyte will then fetch the schema for the data coming from the Slack API for your workspace. You should leave all boxes checked and then choose the sync frequency - this is the interval in which Airbyte will sync the data coming from your workspace. Let’s set the sync interval to every 24 hours. - -Then click on the **Set up connection** button. - -Airbyte will now take you to the destination dashboard, where you will see the destination you just set up. Click on it to see more details about this destination. - -![](../../.gitbook/assets/16.png) - -You will see Airbyte running the very first sync. Depending on the size of the data Airbyte is replicating, it might take a while before syncing is complete. - -![](../../.gitbook/assets/17.png) - -When it’s done, you will see the **Running status** change to **Succeeded**, and the size of the data Airbyte replicated as well as the number of records being stored on the Postgres database. - -![](../../.gitbook/assets/18.png) - -To test if the sync worked, run the following in your terminal: - -```text -docker exec slack-source psql -U postgres -c "SELECT * FROM public.users;" -``` - -This should output the rows in the users’ table. - -To get the count of the users’ table as well, you can also run: - -```text -docker exec slack-db psql -U postgres -c "SELECT count(*) FROM public.users;" -``` - -Now that we have the data from the Slack workspace in our Postgres destination, we will head on to creating the Slack dashboard with Apache Superset. - -## 2. Setting Up Apache Superset for the Dashboards - -### a. Installing Apache Superset - -Apache Superset, or simply Superset, is a modern data exploration and visualization platform. To get started using it, we will be cloning the Superset repo. Navigate to a destination in your terminal where you want to clone the Superset repo to and run: - -```text -git clone https://github.com/apache/superset.git -``` - -It’s recommended to check out the latest branch of Superset, so run: - -```text -cd superset -``` - -And then run: - -```text -git checkout latest -``` - -Superset needs you to install and build its frontend dependencies and assets. So, we will start by installing the frontend dependencies: - -```text -npm install -``` - -Note: The above command assumes you have both Node and NPM installed on your machine. - -Finally, for the frontend, we will build the assets by running: - -```text -npm run build -``` - -After that, go back up one directory into the Superset directory by running: - -```text -cd.. -``` - -Then run: - -```text -docker-compose up -``` - -This will download the Docker images Superset needs and build containers and start services Superset needs to run locally on your machine. - -Once that’s done, you should be able to access Superset on your browser by visiting [`http://localhost:8088`](http://localhost:8088), and you should be presented with the Superset login screen. - -Enter username: **admin** and Password: **admin** to be taken to your Superset dashboard. - -Great! You’ve got Superset set up. Now let’s tell Superset about our Postgres Database holding the Slack data from Airbyte. - -### b. Setting Up a Postgres Database in Superset - -To do this, on the top menu in your Superset dashboard, hover on the Data dropdown and click on **Databases**. - -![](../../.gitbook/assets/19.png) - -In the page that opens up, click on the **+ Database** button in the top right corner. - -![](../../.gitbook/assets/20.png) - -Then, you will be presented with a modal to add your Database Name and the connection URI. - -![](../../.gitbook/assets/21.png) - -Let’s call our Database `slack_db`, and then add the following URI as the connection URI: - -```text -postgresql://postgres:password@docker.for.mac.localhost:2000/postgres -``` - -If you are on a Windows Machine, yours will be: - -```text -postgresql://postgres:password@docker.for.win.localhost:2000/postgres -``` - -Note: We are using `docker.for.[mac|win].localhost` in order to access the localhost of your machine, because using just localhost will point to the Docker container network and not your machine’s network. - -Your Superset UI should look like this: - -![](../../.gitbook/assets/22.png) - -We will need to enable some settings on this connection. Click on the **SQL LAB SETTINGS** and check the following boxes: - -![](../../.gitbook/assets/23.png) - -Afterwards, click on the **ADD** button, and you will see your database on the data page of Superset. - -![](../../.gitbook/assets/24.png) - -### c. Importing our dataset - -Now that you’ve added the database, you will need to hover over the data menu again; now click on **Datasets**. - -![](../../.gitbook/assets/25.png) - -Then, you will be taken to the datasets page: - -![](../../.gitbook/assets/26.png) - -We want to only see the datasets that are in our `slack_db` database, so in the Database that is currently showing All, select `slack_db` and you will see that we don’t have any datasets at the moment. - -![](../../.gitbook/assets/27.png) - -![](../../.gitbook/assets/28.png) - -You can fix this by clicking on the **+ DATASET** button and adding the following datasets. - -Note: Make sure you select the public schema under the Schema dropdown. - -![](../../.gitbook/assets/29.png) - -Now that we have set up Superset and given it our Slack data, let’s proceed to creating the visualizations we need. - -Still remember them? Here they are again: - -* Total number of members of a Slack workspace -* The evolution of the number of Slack workspace members -* Evolution of weekly messages -* Evolution of weekly threads created -* Evolution of messages per channel -* Members per time zone - -## 3. Creating Our Dashboards with Superset - -### a. Total number of members of a Slack workspace - -To get this, we will first click on the users’ dataset of our `slack_db` on the Superset dashboard. - -![](../../.gitbook/assets/30.png) - -Next, change **untitled** at the top to **Number of Members**. - -![](../../.gitbook/assets/31.png) - -Now change the **Visualization Type** to **Big Number,** remove the **Time Range** filter, and add a Subheader named “Slack Members.” So your UI should look like this: - -![](../../.gitbook/assets/32.png) - -Then, click on the **RUN QUERY** button, and you should now see the total number of members. - -Pretty cool, right? Now let’s save this chart by clicking on the **SAVE** button. - -![](../../.gitbook/assets/33.png) - -Then, in the **ADD TO DASHBOARD** section, type in “Slack Dashboard”, click on the “Create Slack Dashboard” button, and then click the **Save** button. - -Great! We have successfully created our first Chart, and we also created the Dashboard. Subsequently, we will be following this flow to add the other charts to the created Slack Dashboard. - -### b. Casting the ts column - -Before we proceed with the rest of the charts for our dashboard, if you inspect the **ts** column on either the **messages** table or the **threads** table, you will see it’s of the type `VARCHAR`. We can’t really use this for our charts, so we have to cast both the **messages** and **threads**’ **ts** column as `TIMESTAMP`. Then, we can create our charts from the results of those queries. Let’s do this. - -First, navigate to the **Data** menu, and click on the **Datasets** link. In the list of datasets, click the **Edit** button for the **messages** table. - -![](../../.gitbook/assets/34.png) - -You’re now in the Edit Dataset view. Click the **Lock** button to enable editing of the dataset. Then, navigate to the **Columns** tab, expand the **ts** dropdown, and then tick the **Is Temporal** box. - -![](../../.gitbook/assets/35.png) - -Persist the changes by clicking the Save button. - -### c. The evolution of the number of Slack workspace members - -In the exploration page, let’s first get the chart showing the evolution of the number of Slack members. To do this, make your settings on this page match the screenshot below: - -![](../../.gitbook/assets/36.png) - -Save this chart onto the Slack Dashboard. - -### d. Evolution of weekly messages posted - -Now, we will look at the evolution of weekly messages posted. Let’s configure the chart settings on the same page as the previous one. - -![](../../.gitbook/assets/37.png) - -Remember, your visualization will differ based on the data you have. - -### e. Evolution of weekly threads created - -Now, we are finished with creating the message chart. Let's go over to the thread chart. You will recall that we will need to cast the **ts** column as stated earlier. So, do that and get to the exploration page, and make it match the screenshot below to achieve the required visualization: - -![](../../.gitbook/assets/38.png) - -### f. Evolution of messages per channel - -For this visualization, we will need a more complex SQL query. Here’s the query we used \(as you can see in the screenshot below\): - -```text -SELECT CAST(m.ts as TIMESTAMP), c.name, m.text -FROM public.messages m -INNER JOIN public.channels c -ON m.channel_id = c_id -``` - -![](../../.gitbook/assets/39.png) - -Next, click on **EXPLORE** to be taken to the exploration page; make it match the screenshot below: - -![](../../.gitbook/assets/40.png) - -Save this chart to the dashboard. - -### g. Members per time zone - -Finally, we will be visualizing members per time zone. To do this, instead of casting in the SQL lab as we’ve previously done, we will explore another method to achieve casting by using Superset’s Virtual calculated column feature. This feature allows us to write SQL queries that customize the appearance and behavior of a specific column. - -For our use case, we will need the updated column of the users table to be a `TIMESTAMP`, in order to perform the visualization we need for Members per time zone. Let’s start on clicking the edit icon on the users table in Superset. - -![](../../.gitbook/assets/41.png) - -You will be presented with a modal like so: - -![](../../.gitbook/assets/42.png) - -Click on the **CALCULATED COLUMNS** tab: - -![](../../.gitbook/assets/43.png) - -Then, click on the **+ ADD ITEM** button, and make your settings match the screenshot below. - -![](../../.gitbook/assets/44.png) - -Then, go to the **exploration** page and make it match the settings below: - -![](../../.gitbook/assets/45.png) - -Now save this last chart, and head over to your Slack Dashboard. It should look like this: - -![](../../.gitbook/assets/46.png) - -Of course, you can edit how the dashboard looks to fit what you want on it. - -## Conclusion - -In this article, we looked at using Airbyte’s Slack connector to get the data from a Slack workspace into a Postgres database, and then used Apache Superset to craft a dashboard of visualizations.If you have any questions about Airbyte, don’t hesitate to ask questions on our [Slack](https://slack.airbyte.io)! If you have questions about Superset, you can join the [Superset Community Slack](https://superset.apache.org/community/)! - diff --git a/docs/archive/examples/postgres-replication.md b/docs/archive/examples/postgres-replication.md deleted file mode 100644 index 160da6d20f7a..000000000000 --- a/docs/archive/examples/postgres-replication.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -description: Start syncing data in minutes with Airbyte ---- - -# Postgres Replication - -Let's see how you can spin up a local instance of Airbyte and syncing data from one Postgres database to another. - -Here's a 6-minute video showing you how you can do it. - -{% embed url="https://www.youtube.com/watch?v=Rcpt5SVsMpk" caption="" %} - -First of all, make sure you have Docker and Docker Compose installed. If this isn't the case, follow the [guide](../../deploying-airbyte/local-deployment.md) for the recommended approach to install Docker. - -Once Docker is installed successfully, run the following commands: - -```text -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -docker-compose up -``` - -Once you see an Airbyte banner, the UI is ready to go at [http://localhost:8000/](http://localhost:8000/). - -## 1. Set up your preferences - -You should see an onboarding page. Enter your email and continue. - -![](../../.gitbook/assets/airbyte_get-started.png) - -## 2. Set up your first connection - -We support a growing [list of source connectors](https://docs.airbyte.com/category/sources). For now, we will start out with a Postgres source and destination. - -**If you don't have a readily available Postgres database to sync, here are some quick instructions:** -Run the following commands in a new terminal window to start backgrounded source and destination databases: - -```text -docker run --rm --name airbyte-source -e POSTGRES_PASSWORD=password -p 2000:5432 -d postgres -docker run --rm --name airbyte-destination -e POSTGRES_PASSWORD=password -p 3000:5432 -d postgres -``` - -Add a table with a few rows to the source database: - -```text -docker exec -it airbyte-source psql -U postgres -c "CREATE TABLE users(id SERIAL PRIMARY KEY, col1 VARCHAR(200));" -docker exec -it airbyte-source psql -U postgres -c "INSERT INTO public.users(col1) VALUES('record1');" -docker exec -it airbyte-source psql -U postgres -c "INSERT INTO public.users(col1) VALUES('record2');" -docker exec -it airbyte-source psql -U postgres -c "INSERT INTO public.users(col1) VALUES('record3');" -``` - -You now have a Postgres database ready to be replicated! - -### **Connect the Postgres database** - -In the UI, you will see a wizard that allows you choose the data you want to send through Airbyte. - -![](../../.gitbook/assets/02_set-up-sources.png) - -Use the name `airbyte-source` for the name and `Postgres`as the type. If you used our instructions to create a Postgres database, fill in the configuration fields as follows: - -```text -Host: localhost -Port: 2000 -User: postgres -Password: password -DB Name: postgres -``` - -Click on `Set Up Source` and the wizard should move on to allow you to configure a destination. - -We support a growing list of data warehouses, lakes and databases. For now, use the name `airbyte-destination`, and configure the destination Postgres database: - -```text -Host: localhost -Port: 3000 -User: postgres -Password: password -DB Name: postgres -``` - -After adding the destination, you can choose what tables and columns you want to sync. - -![](../../.gitbook/assets/03_set-up-connection.png) - -For this demo, we recommend leaving the defaults and selecting "Every 5 Minutes" as the frequency. Click `Set Up Connection` to finish setting up the sync. - -## 3. Check the logs of your first sync - -You should now see a list of sources with the source you just added. Click on it to find more information about your connection. This is the page where you can update any settings about this source and how it syncs. There should be a `Completed` job under the history section. If you click on that run, it will show logs from that run. - -![](../../.gitbook/assets/04_source-details.png) - -One of biggest problems we've seen in tools like Fivetran is the lack of visibility when debugging. In Airbyte, allowing full log access and the ability to debug and fix connector problems is one of our highest priorities. We'll be working hard to make these logs accessible and understandable. - -## 4. Check if the syncing actually worked - -Now let's verify that this worked. Let's output the contents of the destination db: - -```text -docker exec airbyte-destination psql -U postgres -c "SELECT * FROM public.users;" -``` - -:::info - -Don't worry about the awkward `public_users` name for now; we are currently working on an update to allow users to configure their destination table names! - -::: - -You should see the rows from the source database inside the destination database! - -And there you have it. You've taken data from one database and replicated it to another. All of the actual configuration for this replication only took place in the UI. - -That's it! This is just the beginning of Airbyte. If you have any questions at all, please reach out to us on [Slack](https://slack.airbyte.io/). We’re still in alpha, so if you see any rough edges or want to request a connector you need, please create an issue on our [Github](https://github.com/airbytehq/airbyte) or leave a thumbs up on an existing issue. - -Thank you and we hope you enjoy using Airbyte. diff --git a/docs/archive/examples/slack-history.md b/docs/archive/examples/slack-history.md deleted file mode 100644 index 6305798bffee..000000000000 --- a/docs/archive/examples/slack-history.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -description: Using Airbyte and MeiliSearch ---- - -# Save and Search Through Your Slack History on a Free Slack Plan - -![](../../.gitbook/assets/slack-history-ui-title.png) - -The [Slack free tier](https://slack.com/pricing/paid-vs-free) saves only the last 10K messages. For social Slack instances, it may be impractical to upgrade to a paid plan to retain these messages. Similarly, for an open-source project like [Airbyte](../../understanding-airbyte/airbyte-protocol.md#catalog) where we interact with our community through a public Slack instance, the cost of paying for a seat for every Slack member is prohibitive. - -However, searching through old messages can be really helpful. Losing that history feels like some advanced form of memory loss. What was that joke about Java 8 Streams? This contributor question sounds familiar—haven't we seen it before? But you just can't remember! - -This tutorial will show you how you can, for free, use Airbyte to save these messages \(even after Slack removes access to them\). It will also provide you a convenient way to search through them. - -Specifically, we will export messages from your Slack instance into an open-source search engine called [MeiliSearch](https://github.com/meilisearch/meilisearch). We will be focusing on getting this setup running from your local workstation. We will mention at the end how you can set up a more productionized version of this pipeline. - -We want to make this process easy, so while we will link to some external documentation for further exploration, we will provide all the instructions you need here to get this up and running. - -## 1. Set Up MeiliSearch - -First, let's get MeiliSearch running on our workstation. MeiliSearch has extensive docs for [getting started](https://docs.meilisearch.com/reference/features/installation.html#download-and-launch). For this tutorial, however, we will give you all the instructions you need to set up MeiliSearch using Docker. - -```text -docker run -it --rm \ - -p 7700:7700 \ - -v $(pwd)/data.ms:/data.ms \ - getmeili/meilisearch -``` - -That's it! - -:::info - -MeiliSearch stores data in $\(pwd\)/data.ms, so if you prefer to store it somewhere else, just adjust this path. - -::: - -## 2. Replicate Your Slack Messages to MeiliSearch - -### a. Set Up Airbyte - -Make sure you have Docker and Docker Compose installed. If you haven’t set Docker up, follow the [instructions here](https://docs.docker.com/desktop/) to set it up on your machine. Then, run the following commands: - -```bash -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -docker-compose up -``` - -If you run into any problems, feel free to check out our more extensive [Getting Started FAQ](https://discuss.airbyte.io/c/faq/15) for help. - -Once you see an Airbyte banner, the UI is ready to go at [http://localhost:8000/](http://localhost:8000/). Once you have set your user preferences, you will be brought to a page that asks you to set up a source. In the next step, we'll go over how to do that. - -### b. Set Up Airbyte’s Slack Source Connector - -In the Airbyte UI, select Slack from the dropdown. We provide step-by-step instructions for setting up the Slack source in Airbyte [here](https://docs.airbyte.com/integrations/sources/slack#setup-guide). These will walk you through how to complete the form on this page. - -![](../../.gitbook/assets/slack-history-setup-wizard.png) - -By the end of these instructions, you should have created a Slack source in the Airbyte UI. For now, just add your Slack app to a single public channel \(you can add it to more channels later\). Only messages from that channel will be replicated. - -The Airbyte app will now prompt you to set up a destination. Next, we will walk through how to set up MeiliSearch. - -### c. Set Up Airbyte’s MeiliSearch Destination Connector - -Head back to the Airbyte UI. It should still be prompting you to set up a destination. Select "MeiliSearch" from the dropdown. For the `host` field, set: `http://localhost:7700`. The `api_key` can be left blank. - -### d. Set Up the Replication - -On the next page, you will be asked to select which streams of data you'd like to replicate. We recommend unchecking "files" and "remote files" since you won't really be able to search them easily in this search engine. - -![](../../.gitbook/assets/airbyte_connection-settings.png) - -For frequency, we recommend every 24 hours. - -## 3. Search MeiliSearch - -After the connection has been saved, Airbyte should start replicating the data immediately. When it completes you should see the following: - -![](../../.gitbook/assets/slack-history-sync.png) - -When the sync is done, you can sanity check that this is all working by making a search request to MeiliSearch. Replication can take several minutes depending on the size of your Slack instance. - -```bash -curl 'http://localhost:7700/indexes/messages/search' --data '{ "q": "" }' -``` - -For example, I have the following message in one of the messages that I replicated: "welcome to airbyte". - -```bash -curl 'http://localhost:7700/indexes/messages/search' --data '{ "q": "welcome to" }' -# => {"hits":[{"_ab_pk":"7ff9a858_6959_45e7_ad6b_16f9e0e91098","channel_id":"C01M2UUP87P","client_msg_id":"77022f01-3846-4b9d-a6d3-120a26b2c2ac","type":"message","text":"welcome to airbyte.","user":"U01AS8LGX41","ts":"2021-02-05T17:26:01.000000Z","team":"T01AB4DDR2N","blocks":[{"type":"rich_text"}],"file_ids":[],"thread_ts":"1612545961.000800"}],"offset":0,"limit":20,"nbHits":2,"exhaustiveNbHits":false,"processingTimeMs":21,"query":"test-72"} -``` - -## 4. Search via a UI - -Making curl requests to search your Slack History is a little clunky, so we have modified the example UI that MeiliSearch provides in [their docs](https://docs.meilisearch.com/learn/tutorials/getting_started.html#integrate-with-your-project) to search through the Slack results. - -Download \(or copy and paste\) this [html file](https://github.com/airbytehq/airbyte/blob/master/docs/examples/slack-history/index.html) to your workstation. Then, open it using a browser. You should now be able to write search terms in the search bar and get results instantly! - -![](../../.gitbook/assets/slack-history-ui.png) - -## 5. "Productionizing" Saving Slack History - -You can find instructions for how to host Airbyte on various cloud platforms [here](../../deploying-airbyte/README.md). - -Documentation on how to host MeiliSearch on cloud platforms can be found [here](https://docs.meilisearch.com/running-production/#a-quick-introduction). - -If you want to use the UI mentioned in the section above, we recommend statically hosting it on S3, GCS, or equivalent. diff --git a/docs/archive/examples/slack-history/index.html b/docs/archive/examples/slack-history/index.html deleted file mode 100644 index 0812368137cd..000000000000 --- a/docs/archive/examples/slack-history/index.html +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - - - -
    - -
    -
    - -
    - - - - - - - diff --git a/docs/archive/examples/zoom-activity-dashboard.md b/docs/archive/examples/zoom-activity-dashboard.md deleted file mode 100644 index a141f2da418a..000000000000 --- a/docs/archive/examples/zoom-activity-dashboard.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -description: Using Airbyte and Tableau ---- - -# Visualizing the Time Spent by Your Team in Zoom Calls - -In this article, we will show you how you can understand how much your team leverages Zoom, or spends time in meetings, in a couple of minutes. We will be using [Airbyte](https://airbyte.com) \(an open-source data integration platform\) and [Tableau](https://www.tableau.com) \(a business intelligence and analytics software\) for this tutorial. - -Here is what we will cover: - -1. Replicating data from Zoom to a PostgreSQL database, using Airbyte -2. Connecting the PostgreSQL database to Tableau -3. Creating charts in Tableau with Zoom data - -We will produce the following charts in Tableau: - -* Meetings per week in a team -* Hours a team spends in meetings per week -* Listing of team members with the number of meetings per week and number of hours spent in meetings, ranked -* Webinars per week in a team -* Hours a team spends in webinars per week -* Participants for all webinars in a team per week -* Listing of team members with the number of webinars per week and number of hours spent in meetings, ranked - -Let’s get started by replicating Zoom data using Airbyte. - -## Step 1: Replicating Zoom data to PostgreSQL - -### Launching Airbyte - -In order to replicate Zoom data, we will need to use [Airbyte’s Zoom connector](https://docs.airbyte.com/integrations/sources/zoom). To do this, you need to start off Airbyte’s web app by opening up your terminal and navigating to Airbyte and running: - -`docker-compose up` - -You can find more details about this in the [Getting Started FAQ](https://discuss.airbyte.io/c/faq/15) on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions). - -This will start up Airbyte on `localhost:8000`; open that address in your browser to access the Airbyte dashboard. - -![](../../.gitbook/assets/01_airbyte-dashboard.png) - -If you haven't gone through the onboarding yet, you will be prompted to connect a source and a destination. Then just follow the instructions. If you've gone through it, then you will see the screenshot above. In the top right corner of the Airbyte dashboard, click on the **+ new source** button to add a new Airbyte source. In the screen to set up the new source, enter the source name \(we will use airbyte-zoom\) and select **Zoom** as source type. - -Choosing Zoom as **source type** will cause Airbyte to display the configuration parameters needed to set up the Zoom source. - -![](../../.gitbook/assets/02_setting-zoom-connector-name.png) - -The Zoom connector for Airbyte requires you to provide it with a Zoom JWT token. Let’s take a detour and look at how to obtain one from Zoom. - -### Obtaining a Zoom JWT Token - -To obtain a Zoom JWT Token, login to your Zoom account and go to the [Zoom Marketplace](https://marketplace.zoom.us/). If this is your first time in the marketplace, you will need to agree to the Zoom’s marketplace terms of use. - -Once you are in, you need to click on the **Develop** dropdown and then click on **Build App.** - -![](../../.gitbook/assets/03_click.png) - -Clicking on **Build App** for the first time will display a modal for you to accept the Zoom’s API license and terms of use. Do accept if you agree and you will be presented with the below screen. - -![](../../.gitbook/assets/zoom-marketplace-build-screen%20(3)%20(3).png) - -Select **JWT** as the app you want to build and click on the **Create** button on the card. You will be presented with a modal to enter the app name; type in `airbyte-zoom`. - -![](../../.gitbook/assets/05_app-name-modal.png) - -Next, click on the **Create** button on the modal. - -You will then be taken to the **App Information** page of the app you just created. Fill in the required information. - -![](../../.gitbook/assets/06_app-information.png) - -After filling in the needed information, click on the **Continue** button. You will be taken to the **App Credentials** page. Here, click on the **View JWT Token** dropdown. - -![](../../.gitbook/assets/07_view-jwt-token.png) - -There you can set the expiration time of the token \(we will leave the default 90 minutes\), and then you click on the **Copy** button of the **JWT Token**. - -After copying it, click on the **Continue** button. - -![](../../.gitbook/assets/08_activate-webhook.png) - -You will be taken to a screen to activate **Event Subscriptions**. Just leave it as is, as we won’t be needing Webhooks. Click on **Continue**, and your app should be marked as activated. - -### Connecting Zoom on Airbyte - -So let’s go back to the Airbyte web UI and provide it with the JWT token we copied from our Zoom app. - -Now click on the **Set up source** button. You will see the below success message when the connection is made successfully. - -![](../../.gitbook/assets/setup-successful%20(3)%20(2).png) - -And you will be taken to the page to add your destination. - -### Connecting PostgreSQL on Airbyte - -![](../../.gitbook/assets/10_destination.png) - -For our destination, we will be using a PostgreSQL database, since Tableau supports PostgreSQL as a data source. Click on the **add destination** button, and then in the drop down click on **+ add a new destination**. In the page that presents itself, add the destination name and choose the Postgres destination. - -![](../../.gitbook/assets/11_choose-postgres-destination.png) - -To supply Airbyte with the PostgreSQL configuration parameters needed to make a PostgreSQL destination, we will spin off a PostgreSQL container with Docker using the following command in our terminal. - -`docker run --rm --name airbyte-zoom-db -e POSTGRES_PASSWORD=password -v airbyte_zoom_data:/var/lib/postgresql/data -p 2000:5432 -d postgres` - -This will spin a docker container and persist the data we will be replicating in the PostgreSQL database in a Docker volume `airbyte_zoom_data`. - -Now, let’s supply the above credentials to the Airbyte UI requiring those credentials. - -![](../../.gitbook/assets/postgres_credentials%20(3)%20(3).png) - -Then click on the **Set up destination** button. - -After the connection has been made to your PostgreSQL database successfully, Airbyte will generate the schema of the data to be replicated in your database from the Zoom source. - -Leave all the fields checked. - -![](../../.gitbook/assets/schema%20(3)%20(3).png) - -Select a **Sync frequency** of **manual** and then click on **Set up connection**. - -After successfully making the connection, you will see your PostgreSQL destination. Click on the Launch button to start the data replication. - -![](../../.gitbook/assets/launch%20(3)%20(3).png) - -Then click on the **airbyte-zoom-destination** to see the Sync page. - -![](../../.gitbook/assets/sync-screen%20(3)%20(3).png) - -Syncing should take a few minutes or longer depending on the size of the data being replicated. Once Airbyte is done replicating the data, you will get a **succeeded** status. - -Then, you can run the following SQL command on the PostgreSQL container to confirm that the sync was done successfully. - -`docker exec airbyte-zoom-db psql -U postgres -c "SELECT * FROM public.users;"` - -Now that we have our Zoom data replicated successfully via Airbyte, let’s move on and set up Tableau to make the various visualizations and analytics we want. - -## Step 2: Connect the PostgreSQL database to Tableau - -Tableau helps people and organizations to get answers from their data. It’s a visual analytic platform that makes it easy to explore and manage data. - -To get started with Tableau, you can opt in for a [free trial period](https://www.tableau.com/products/trial) by providing your email and clicking the **DOWNLOAD FREE TRIAL** button to download the Tableau desktop app. The download should automatically detect your machine type \(Windows/Mac\). - -Go ahead and install Tableau on your machine. After the installation is complete, you will need to fill in some more details to activate your free trial. - -Once your activation is successful, you will see your Tableau dashboard. - -![](../../.gitbook/assets/tableau-dashboard%20(3)%20(3).png) - -On the sidebar menu under the **To a Server** section, click on the **More…** menu. You will see a list of datasource connectors you can connect Tableau with. - -![](../../.gitbook/assets/datasources%20(4)%20(4).png) - -Select **PostgreSQL** and you will be presented with a connection credentials modal. - -Fill in the same details of the PostgreSQL database we used as the destination in Airbyte. - -![](../../.gitbook/assets/18_fill-in-connection-details.png) - -Next, click on the **Sign In** button. If the connection was made successfully, you will see the Tableau dashboard for the database you just connected. - -_Note: If you are having trouble connecting PostgreSQL with Tableau, it might be because the driver Tableau comes with for PostgreSQL might not work for newer versions of PostgreSQL. You can download the JDBC driver for PostgreSQL_ [_here_](https://www.tableau.com/support/drivers?_ga=2.62351404.1800241672.1616922684-1838321730.1615100968) _and follow the setup instructions._ - -Now that we have replicated our Zoom data into a PostgreSQL database using Airbyte’s Zoom connector, and connected Tableau with our PostgreSQL database containing our Zoom data, let’s proceed to creating the charts we need to visualize the time spent by a team in Zoom calls. - -## Step 3: Create the charts on Tableau with the Zoom data - -### Meetings per week in a team - -To create this chart, we will need to use the count of the meetings and the **createdAt** field of the **meetings** table. Currently, we haven’t selected a table to work on in Tableau. So you will see a prompt to **Drag tables here**. - -![](../../.gitbook/assets/19_tableau-view-with-all-tables.png) - -Drag the **meetings** table from the sidebar onto the space with the prompt. - -Now that we have the meetings table, we can start building out the chart by clicking on **Sheet 1** at the bottom left of Tableau. - -![](../../.gitbook/assets/20_empty-meeting-sheet.png) - -As stated earlier, we need **Created At**, but currently it’s a String data type. Let’s change that by converting it to a data time. So right click on **Created At**, then select `ChangeDataType` and choose Date & Time. And that’s it! That field is now of type **Date** & **Time**. - -![](../../.gitbook/assets/21_change-to-date-time.png) - -Next, drag **Created At** to **Columns**. - -![](../../.gitbook/assets/22_drag-created-at.png) - -Currently, we get the Created At in **YEAR**, but per our requirement we want them in Weeks, so right click on the **YEAR\(Created At\)** and choose **Week Number**. - -![](../../.gitbook/assets/change-to-per-week%20(3)%20(3).png) - -Tableau should now look like this: - -![](../../.gitbook/assets/24_meetings-per-week.png) - -Now, to finish up, we need to add the **meetings\(Count\) measure** Tableau already calculated for us in the **Rows** section. So drag **meetings\(Count\)** onto the Columns section to complete the chart. - -![](../../.gitbook/assets/evolution-of-meetings-per-week%20(3)%20(3).png) - -And now we are done with the very first chart. Let's save the sheet and create a new Dashboard that we will add this sheet to as well as the others we will be creating. - -Currently the sheet shows **Sheet 1**; right click on **Sheet 1** at the bottom left and rename it to **Weekly Meetings**. - -To create our Dashboard, we can right click on the sheet we just renamed and choose **new Dashboard**. Rename the Dashboard to Zoom Dashboard and drag the sheet into it to have something like this: - -![](../../.gitbook/assets/26_zoom-dashboard.png) - -Now that we have this first chart out of the way, we just need to replicate most of the process we used for this one to create the other charts. Because the steps are so similar, we will mostly be showing the finished screenshots of the charts except when we need to conform to the chart requirements. - -### Hours a team spends in meetings per week - -For this chart, we need the sum of the duration spent in weekly meetings. We already have a Duration field, which is currently displaying durations in minutes. We can derive a calculated field off this field since we want the duration in hours \(we just need to divide the duration field by 60\). - -To do this, right click on the Duration field and select **create**, then click on **calculatedField**. Change the name to **Duration in Hours**, and then the calculation should be **\[Duration\]/60**. Click ok to create the field. - -So now we can drag the Duration in Hours and Created At fields onto your sheet like so: - -![](../../.gitbook/assets/27_hours-spent-in-weekly-meetings.png) - -Note: We are adding a filter on the Duration to filter out null values. You can do this by right clicking on the **SUM\(Duration\)** pill and clicking filter, then make sure the **include null values** checkbox is unchecked. - -### Participants for all meetings per week - -For this chart, we will need to have a calculated field called **\# of meetings attended**, which will be an aggregate of the counts of rows matching a particular user's email in the `report_meeting_participants` table plotted against the **Created At** field of the **meetings** table. To get this done, right click on the **User Email** field. Select **create** and click on **calculatedField**, then enter the title of the field as **\# of meetings attended**. Next, enter the below formula: - -`COUNT(IF [User Email] == [User Email] THEN [Id (Report Meeting Participants)] END)` - -Then click on apply. Finally, drag the **Created At** fields \(make sure it’s on the **Weekly** number\) and the calculated field you just created to match the below screenshot: - -![](../../.gitbook/assets/number_of_participants_per_weekly_meetings.png) - -### Listing of team members with the number of meetings per week and number of hours spent in meetings, ranked. - -To get this chart, we need to create a relationship between the **meetings table** and the `report_meeting_participants` table. You can do this by dragging the `report_meeting_participants` table in as a source alongside the **meetings** table and relate both via the **meeting id**. Then you will be able to create a new worksheet that looks like this: - -![](../../.gitbook/assets/meetings-participant-ranked%20(3)%20(3).png) - -Note: To achieve the ranking, we simply use the sort menu icon on the top menu bar. - -### Webinars per week in a team - -The rest of the charts will be needing the **webinars** and `report_webinar_participants` tables. Similar to the number of meetings per week in a team, we will be plotting the Count of webinars against the **Created At** property. - -![](../../.gitbook/assets/30_weekly-webinars.png) - -### Hours a week spends in webinars per week - -For this chart, as for the meeting’s counterpart, we will get a calculated field off the Duration field to get the **Webinar Duration in Hours**, and then plot **Created At** against the **Sum of Webinar Duration in Hours**, as shown in the screenshot below. Note: Make sure you create a new sheet for each of these graphs. - -### Participants for all webinars per week - -This calculation is the same as the number of participants for all meetings per week, but instead of using the **meetings** and `report_meeting_participants` tables, we will use the webinars and `report_webinar_participants` tables. - -Also, the formula will now be: - -`COUNT(IF [User Email] == [User Email] THEN [Id (Report Webinar Participants)] END)` - -Below is the chart: - -![](../../.gitbook/assets/32_number_of_webinar_attended_per_week.png) - -#### Listing of team members with the number of webinars per week and number of hours spent in meetings, ranked - -Below is the chart with these specs - -![](../../.gitbook/assets/33_number-of-webinars-participants.png) - -## Conclusion - -In this article, we see how we can use Airbyte to get data off the Zoom API onto a PostgreSQL database, and then use that data to create some chart visualizations in Tableau. - -You can leverage Airbyte and Tableau to produce graphs on any collaboration tool. We just used Zoom to illustrate how it can be done. Hope this is helpful! - diff --git a/docs/archive/faq/README.md b/docs/archive/faq/README.md deleted file mode 100644 index 1f6a217b74c7..000000000000 --- a/docs/archive/faq/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# FAQ - -Our FAQ is now a section on our Airbyte Forum. Check it out [here](https://github.com/airbytehq/airbyte/discussions)! - -If you don't see your question answered, feel free to open up a new topic for it. \ No newline at end of file diff --git a/docs/archive/faq/data-loading.md b/docs/archive/faq/data-loading.md deleted file mode 100644 index 4ae20d834edc..000000000000 --- a/docs/archive/faq/data-loading.md +++ /dev/null @@ -1,124 +0,0 @@ -# Data Loading - -## **Why don’t I see any data in my destination yet?** - -It can take a while for Airbyte to load data into your destination. Some sources have restrictive API limits which constrain how much -data we can sync in a given time. Large amounts of data in your source can also make the initial sync take longer. You can check your -sync status in your connection detail page that you can access through the destination detail page or the source one. - -## **Why my final tables are being recreated everytime?** - -Airbyte ingests data into raw tables and applies the process of normalization if you selected it in the connection page. -The normalization runs a full refresh each sync and for some destinations like Snowflake, Redshift, Bigquery this may incur more -resource consumption and more costs. You need to pay attention to the frequency that you're retrieving your data to avoid issues. -For example, if you create a connection to sync every 5 minutes with incremental sync on, it will only retrieve new records into the raw tables but will apply normalization -to *all* the data in every sync! If you have tons of data, this may not be the right sync frequency for you. - -There is a [Github issue](https://github.com/airbytehq/airbyte/issues/4286) to implement normalization using incremental, which will reduce -costs and resources in your destination. - -## **What happens if a sync fails?** - -You won't lose data when a sync fails, however, no data will be added or updated in your destination. - -Airbyte will automatically attempt to replicate data 3 times. You can see and export the logs for those attempts in the connection -detail page. You can access this page through the Source or Destination detail page. - -You can configure a Slack webhook to warn you when a sync fails. - -In the future you will be able to configure other notification method (email, Sentry) and an option to create a -GitHub issue with the logs. We’re still working on it, and the purpose would be to help the community and the Airbyte team to fix the -issue as soon as possible, especially if it is a connector issue. - -Until Airbyte has this system in place, here is what you can do: - -* File a GitHub issue: go [here](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=type%2Fbug&template=bug-report.md&title=) - and file an issue with the detailed logs copied in the issue’s description. The team will be notified about your issue and will update - it for any progress or comment on it. -* Fix the issue yourself: Airbyte is open source so you don’t need to wait for anybody to fix your issue if it is important to you. - To do so, just fork the [GitHub project](https://github.com/airbytehq/airbyte) and fix the piece of code that need fixing. If you’re okay - with contributing your fix to the community, you can submit a pull request. We will review it ASAP. -* Ask on Slack: don’t hesitate to ping the team on [Slack](https://slack.airbyte.io). - -Once all this is done, Airbyte resumes your sync from where it left off. - -We truly appreciate any contribution you make to help the community. Airbyte will become the open-source standard only if everybody participates. - -## **Can Airbyte support 2-way sync i.e. changes from A go to B and changes from B go to A?** - -Airbyte actually does not support this right now. There are some details around how we handle schema and tables names that isn't going to -work for you in the current iteration. -If you attempt to do a circular dependency between source and destination, you'll end up with the following -A.public.table_foo writes to B.public.public_table_foo to A.public.public_public_table_foo. You won't be writing into your original table, -which I think is your intention. - - -## **What happens to data in the pipeline if the destination gets disconnected? Could I lose data, or wind up with duplicate data when the pipeline is reconnected?** - -Airbyte is architected to prevent data loss or duplication. Airbyte will display a failure for the sync, and re-attempt it at the next syncing, -according to the frequency you set. - -## **How frequently can Airbyte sync data?** - -You can adjust the load time to run as frequent as every hour or as infrequent as once a year using [Cron expressions](https://docs.airbyte.com/cloud/managing-airbyte-cloud/edit-stream-configuration). - -## **Why wouldn’t I choose to load all of my data more frequently?** - -While frequent data loads will give you more up-to-date data, there are a few reasons you wouldn’t want to load your too frequently, including: - -* Higher API usage may cause you to hit a limit that could impact other systems that rely on that API. -* Higher cost of loading data into your warehouse. -* More frequent delays, resulting in increased delay notification emails. For instance, if the data source generally takes several hours to - update but you wanted five-minute increments, you may receive a delay notification every sync. - -Generally is recommended setting the incremental loads to every hour to help limit API calls. - -## **Is there a way to know the estimated time to completion for the first historic sync?** - -Unfortunately not yet. - -## **Do you support change data capture \(CDC\) or logical replication for databases?** - -Airbyte currently supports [CDC for Postgres and Mysql](../../understanding-airbyte/cdc.md). Airbyte is adding support for a few other -databases you can check in the roadmap. - -## Using incremental sync, is it possible to add more fields when some new columns are added to a source table, or when a new table is added? - -For the moment, incremental sync doesn't support schema changes, so you would need to perform a full refresh whenever that happens. -Here’s a related [Github issue](https://github.com/airbytehq/airbyte/issues/1601). - -## There is a limit of how many tables one connection can handle? - -Yes, for more than 6000 thousand tables could be a problem to load the information on UI. - -There are two Github issues about this limitation: [Issue #3942](https://github.com/airbytehq/airbyte/issues/3942) -and [Issue #3943](https://github.com/airbytehq/airbyte/issues/3943). - -## Help, Airbyte is hanging/taking a long time to discover my source's schema! - -This usually happens for database sources that contain a lot of tables. This should resolve itself in half an hour or so. - -If the source contains more than 6k tables, see the [above question](#there-is-a-limit-of-how-many-tables-one-connection-can-handle). - -There is a known issue with [Oracle databases](https://github.com/airbytehq/airbyte/issues/4944). - -## **I see you support a lot of connectors – what about connectors Airbyte doesn’t support yet?** - -You can either: - -* Submit a [connector request](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=area%2Fintegration%2C+new-integration&template=new-integration-request.md&title=) on our Github project, and be notified once we or the community build a connector for it. -* Build a connector yourself by forking our [GitHub project](https://github.com/airbytehq/airbyte) and submitting a pull request. Here - are the [instructions how to build a connector](../../contributing-to-airbyte/README.md). -* Ask on Slack: don’t hesitate to ping the team on [Slack](https://slack.airbyte.io). - -## **What kind of notifications do I get?** - -For the moment, the UI will only display one kind of notification: when a sync fails, Airbyte will display the failure at the source/destination -level in the list of sources/destinations, and in the connection detail page along with the logs. - -However, there are other types of notifications: - -* When a connector that you use is no longer up to date -* When your connections fails -* When core isn't up to date - diff --git a/docs/archive/faq/deploying-on-other-os.md b/docs/archive/faq/deploying-on-other-os.md deleted file mode 100644 index 0b493c3db200..000000000000 --- a/docs/archive/faq/deploying-on-other-os.md +++ /dev/null @@ -1,40 +0,0 @@ -# Deploying Airbyte on a Non-Standard Operating System - -## CentOS 8 - -From clean install: - -``` -firewall-cmd --zone=public --add-port=8000/tcp --permanent -firewall-cmd --zone=public --add-port=8001/tcp --permanent -firewall-cmd --zone=public --add-port=7233/tcp --permanent -systemctl restart firewalld -``` -OR... if you prefer iptables: -``` -iptables -A INPUT -p tcp -m tcp --dport 8000 -j ACCEPT -iptables -A INPUT -p tcp -m tcp --dport 8001 -j ACCEPT -iptables -A INPUT -p tcp -m tcp --dport 7233 -j ACCEPT -systemctl restart iptables -``` -Setup the docker repo: -``` -dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo` -dnf install docker-ce --nobest -systemctl enable --now docker -usermod -aG docker $USER -``` -You'll need to get docker-compose separately. -``` -dnf install wget git curl -curl -L https://github.com/docker/compose/releases/download/1.25.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose -chmod +x /usr/local/bin/docker-compose -``` -Now we can install Airbyte. In this example, we will install it under `/opt/` -``` -cd /opt -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -docker-compose up -docker-compose ps -``` \ No newline at end of file diff --git a/docs/archive/faq/differences-with/README.md b/docs/archive/faq/differences-with/README.md deleted file mode 100644 index d020cfd1db38..000000000000 --- a/docs/archive/faq/differences-with/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Differences with - diff --git a/docs/archive/faq/differences-with/fivetran-vs-airbyte.md b/docs/archive/faq/differences-with/fivetran-vs-airbyte.md deleted file mode 100644 index 9a9fe1045660..000000000000 --- a/docs/archive/faq/differences-with/fivetran-vs-airbyte.md +++ /dev/null @@ -1,27 +0,0 @@ -# Fivetran vs Airbyte - -We wrote an article, “[Open-source vs. Commercial Software: How to Solve the Data Integration Problem](https://airbyte.com/articles/data-engineering-thoughts/open-source-vs-commercial-software-how-to-better-solve-data-integration/),” in which we describe the pros and cons of Fivetran’s commercial approach and Airbyte’s open-source approach. Don’t hesitate to check it out for more detailed arguments. As a summary, here are the differences: - -![](https://airbyte.com/wp-content/uploads/2021/01/Airbyte-vs-Fivetran.png) - -## **Fivetran:** - -* **Limited high-quality connectors:** after 8 years in business, Fivetran supports 150 connectors. The more connectors, the more difficult it is for Fivetran to keep the same level of maintenance across all connectors. They will always have a ROI consideration to maintaining long-tailed connectors. -* **Pricing indexed on usage:** Fivetran’s pricing is indexed on the number of active rows \(rows added or edited\) per month. Teams always need to keep that in mind and are not free to move data without thinking about cost, as the costs can grow fast. -* **Security and privacy compliance:** all companies are subject to privacy compliance laws, such as GDPR, CCPA, HIPAA, etc. As a matter of fact, above a certain stage \(about 100 employees\) in a company, all external products need to go through a security compliance process that can take several months. -* **No moving data between internal databases:** Fivetran sits in the cloud, so if you have to replicate data from an internal database to another, it makes no sense to have the data move through them \(Fivetran\) for privacy and cost reasons. - -## **Airbyte:** - -* **Free, as open source, so no more pricing based on usage**: learn more about our [future business model](https://handbook.airbyte.io/strategy/business-model) \(connectors will always remain open source\). -* **Supporting 60 connectors within 8 months from inception**. Our goal is to reach 200+ connectors by the end of 2021. -* **Building new connectors made trivial, in the language of your choice:** Airbyte makes it a lot easier to create your own connector, vs. building them yourself in-house \(with Airflow or other tools\). Scheduling, orchestration, and monitoring comes out of the box with Airbyte. -* **Addressing the long tail of connectors:** with the help of the community, Airbyte ambitions to support thousands of connectors. -* **Adapt existing connectors to your needs:** you can adapt any existing connector to address your own unique edge case. -* **Using data integration in a workflow:** Airbyte’s API lets engineering teams add data integration jobs into their workflow seamlessly. -* **Integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. -* **Debugging autonomy:** if you experience any connector issue, you won’t need to wait for Fivetran’s customer support team to get back to you, if you can fix the issue fast yourself. -* **No more security and privacy compliance, as self-hosted, source-available and open-sourced \(MIT\)**. Any team can directly address their integration needs. - -Your data stays in your cloud. Have full control over your data, and the costs of your data transfers. - diff --git a/docs/archive/faq/differences-with/meltano-vs-airbyte.md b/docs/archive/faq/differences-with/meltano-vs-airbyte.md deleted file mode 100644 index f8e2ff5fba64..000000000000 --- a/docs/archive/faq/differences-with/meltano-vs-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Meltano vs Airbyte - -We wrote an article, “[The State of Open-Source Data Integration and ETL](https://airbyte.com/articles/data-engineering-thoughts/the-state-of-open-source-data-integration-and-etl/),” in which we list and compare all ETL-related open-source projects, including Meltano and Airbyte. Don’t hesitate to check it out for more detailed arguments. As a summary, here are the differences: - -## **Meltano:** - -* **Meltano is built on top of the Singer protocol, whereas Airbyte is built on top of the Airbyte protocol**. Having initially created Airbyte on top of Singer, we wrote about why we didn't move forward with it [here](https://airbyte.com/blog/why-you-should-not-build-your-data-pipeline-on-top-of-singer) and [here](https://airbyte.com/blog/airbyte-vs-singer-why-airbyte-is-not-built-on-top-of-singer). Summarized, the reasons were: Singer connectors didn't always adhere to the Singer protocol, had poor standardization and visibility in terms of quality, and community governance and support was abandoned by Stitch. By contrast, we aim to make Airbyte a product that ["just works"](https://airbyte.com/blog/our-truth-for-2021-airbyte-just-works) and always plan to maximize engagement within the Airbyte community. -* **CLI-first approach:** Meltano was primarily built with a command line interface in mind. In that sense, they seem to target engineers with a preference for that interface. -* **Integration with Airflow for orchestration:** You can either use Meltano alone for orchestration or with Airflow; Meltano works both ways. -* All connectors must use Python. -* Meltano works with any of Singer's 200+ available connectors. However, in our experience, quality has been hit or miss. - -## **Airbyte:** - -In contrast, Airbyte is a company fully committed to the open-source project and has a [business model](https://handbook.airbyte.io/strategy/business-model) in mind around this project. Our [team](https://airbyte.com/about-us) are data integration experts that have built more than 1,000 integrations collectively at large scale. The team now counts 20 engineers working full-time on Airbyte. - -* **Airbyte supports more than 100 connectors after only 1 year since its inception**, 20% of which were built by the community. Our ambition is to support **200+ connectors by the end of 2021.** -* Airbyte’s connectors are **usable out of the box through a UI and API,** with monitoring, scheduling and orchestration. Airbyte was built on the premise that a user, whatever their background, should be able to move data in 2 minutes. Data engineers might want to use raw data and their own transformation processes, or to use Airbyte’s API to include data integration in their workflows. On the other hand, analysts and data scientists might want to use normalized consolidated data in their database or data warehouses. Airbyte supports all these use cases. -* **One platform, one project with standards:** This will help consolidate the developments behind one single project, some standardization and specific data protocol that can benefit all teams and specific cases. -* **Not limited by Singer’s data protocol:** In contrast to Meltano, Airbyte was not built on top of Singer, but its data protocol is compatible with Singer’s. This means Airbyte can go beyond Singer, but Meltano will remain limited. -* **Connectors can be built in the language of your choice,** as Airbyte runs them as Docker containers. -* **Airbyte integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. - -## **Other noteworthy differences:** - -* In terms of community, Meltano's Slack community got 430 new members in the last 6 months, while Airbyte got 800. -* The difference in velocity in terms of feature progress is easily measurable as both are open-source projects. Meltano closes about 30 issues per month, while Airbyte closes about 120. - diff --git a/docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md b/docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md deleted file mode 100644 index adcc9c2bf376..000000000000 --- a/docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md +++ /dev/null @@ -1,25 +0,0 @@ -# Pipelinewise vs Airbyte - -## **PipelineWise:** - -PipelineWise is an open-source project by Transferwise that was built with the primary goal of serving their own needs. There is no business model attached to the project, and no apparent interest in growing the community. - -* **Supports 21 connectors,** and only adds new ones based on the needs of the mother company, Transferwise. -* **No business model attached to the project,** and no apparent interest from the company in growing the community. -* **As close to the original format as possible:** PipelineWise aims to reproduce the data from the source to an Analytics-Data-Store in as close to the original format as possible. Some minor load time transformations are supported, but complex mapping and joins have to be done in the Analytics-Data-Store to extract meaning. -* **Managed Schema Changes:** When source data changes, PipelineWise detects the change and alters the schema in your Analytics-Data-Store automatically. -* **YAML based configuration:** Data pipelines are defined as YAML files, ensuring that the entire configuration is kept under version control. -* **Lightweight:** No daemons or database setup are required. - -## **Airbyte:** - -In contrast, Airbyte is a company fully committed to the open-source project and has a [business model in mind](https://handbook.airbyte.io/) around this project. - -* Our ambition is to support **300+ connectors by the end of 2021.** We already supported about 50 connectors at the end of 2020, just 5 months after its inception. -* Airbyte’s connectors are **usable out of the box through a UI and API,** with monitoring, scheduling and orchestration. Airbyte was built on the premise that a user, whatever their background, should be able to move data in 2 minutes. Data engineers might want to use raw data and their own transformation processes, or to use Airbyte’s API to include data integration in their workflows. On the other hand, analysts and data scientists might want to use normalized consolidated data in their database or data warehouses. Airbyte supports all these use cases. -* **One platform, one project with standards:** This will help consolidate the developments behind one single project, some standardization and specific data protocol that can benefit all teams and specific cases. -* **Connectors can be built in the language of your choice,** as Airbyte runs them as Docker containers. -* **Airbyte integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. - -The data protocols for both projects are compatible with Singer’s. So it is easy to migrate a Singer tap or target onto Airbyte or PipelineWise. - diff --git a/docs/archive/faq/differences-with/singer-vs-airbyte.md b/docs/archive/faq/differences-with/singer-vs-airbyte.md deleted file mode 100644 index 58edd43eedb0..000000000000 --- a/docs/archive/faq/differences-with/singer-vs-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Singer vs Airbyte - -If you want to understand the difference between Airbyte and Singer, you might be interested in 2 articles we wrote: - -* “[Airbyte vs. Singer: Why Airbyte is not built on top of Singer](https://airbyte.com/articles/data-engineering-thoughts/airbyte-vs-singer-why-airbyte-is-not-built-on-top-of-singer/).” -* “[The State of Open-Source Data Integration and ETL](https://airbyte.com/articles/data-engineering-thoughts/the-state-of-open-source-data-integration-and-etl/),” in which we list and compare all ETL-related open-source projects, including Singer and Airbyte. As a summary, here are the differences: - -![](https://airbyte.com/wp-content/uploads/2020/10/Landscape-of-open-source-data-integration-platforms-4.png) - -## **Singer:** - -* **Supports 96 connectors after 4 years.** -* **Increasingly outdated connectors:** Talend \(acquirer of StitchData\) seems to have stopped investing in maintaining Singer’s community and connectors. As most connectors see schema changes several times a year, more and more Singer’s taps and targets are not actively maintained and are becoming outdated. -* **Absence of standardization:** each connector is its own open-source project. So you never know the quality of a tap or target until you have actually used it. There is no guarantee whatsoever about what you’ll get. -* **Singer’s connectors are standalone binaries:** you still need to build everything around to make them work \(e.g. UI, configuration validation, state management, normalization, schema migration, monitoring, etc\). -* **No full commitment to open sourcing all connectors,** as some connectors are only offered by StitchData under a paid plan. _\*\*_ - -## **Airbyte:** - -* Our ambition is to support **300+ connectors by the end of 2021.** We already supported about 50 connectors at the end of 2020, just 5 months after its inception. -* Airbyte’s connectors are **usable out of the box through a UI and API**, with monitoring, scheduling and orchestration. Airbyte was built on the premise that a user, whatever their background, should be able to move data in 2 minutes. Data engineers might want to use raw data and their own transformation processes, or to use Airbyte’s API to include data integration in their workflows. On the other hand, analysts and data scientists might want to use normalized consolidated data in their database or data warehouses. Airbyte supports all these use cases. -* **One platform, one project with standards:** This will help consolidate the developments behind one single project, some standardization and specific data protocol that can benefit all teams and specific cases. -* **Connectors can be built in the language of your choice,** as Airbyte runs them as Docker containers. -* **Airbyte integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. -* **A full commitment to the open-source MIT project** with the promise not to hide some connectors behind paid walls. - -Note that Airbyte’s data protocol is compatible with Singer’s. So it is easy to migrate a Singer tap onto Airbyte. - diff --git a/docs/archive/faq/differences-with/stitchdata-vs-airbyte.md b/docs/archive/faq/differences-with/stitchdata-vs-airbyte.md deleted file mode 100644 index ec612ea9b2b1..000000000000 --- a/docs/archive/faq/differences-with/stitchdata-vs-airbyte.md +++ /dev/null @@ -1,29 +0,0 @@ -# StitchData vs Airbyte - -We wrote an article, “[Open-source vs. Commercial Software: How to Solve the Data Integration Problem](https://airbyte.com/articles/data-engineering-thoughts/open-source-vs-commercial-software-how-to-better-solve-data-integration/),” in which we describe the pros and cons of StitchData’s commercial approach and Airbyte’s open-source approach. Don’t hesitate to check it out for more detailed arguments. As a summary, here are the differences: - -![](https://airbyte.com/wp-content/uploads/2020/10/Open-source-vs-commercial-approach-2048x1843.png) - -## StitchData: - -* **Limited deprecating connectors:** Stitch only supports 150 connectors. Talend has stopped investing in StitchData and its connectors. And on Singer, each connector is its own open-source project. So you never know the quality of a tap or target until you have actually used it. There is no guarantee whatsoever about what you’ll get. -* **Pricing indexed on usage:** StitchData’s pricing is indexed on the connectors used and the volume of data transferred. Teams always need to keep that in mind and are not free to move data without thinking about cost. -* **Security and privacy compliance:** all companies are subject to privacy compliance laws, such as GDPR, CCPA, HIPAA, etc. As a matter of fact, above a certain stage \(about 100 employees\) in a company, all external products need to go through a security compliance process that can take several months. -* **No moving data between internal databases:** StitchData sits in the cloud, so if you have to replicate data from an internal database to another, it makes no sense to have the data move through their cloud for privacy and cost reasons. -* **StitchData’s Singer connectors are standalone binaries:** you still need to build everything around to make them work. And it’s hard to update some pre-built connectors, as they are of poor quality. - -## Airbyte: - -* **Free, as open source, so no more pricing based on usage:** learn more about our [future business model](https://handbook.airbyte.io/strategy/business-model) \(connectors will always remain open-source\). -* **Supporting 50+ connectors by the end of 2020** \(so in only 5 months of existence\). Our goal is to reach 300+ connectors by the end of 2021. -* **Building new connectors made trivial, in the language of your choice:** Airbyte makes it a lot easier to create your own connector, vs. building them yourself in-house \(with Airflow or other tools\). Scheduling, orchestration, and monitoring comes out of the box with Airbyte. -* **Maintenance-free connectors you can use in minutes.** Just authenticate your sources and warehouse, and get connectors that adapt to schema and API changes for you. -* **Addressing the long tail of connectors:** with the help of the community, Airbyte ambitions to support thousands of connectors. -* **Adapt existing connectors to your needs:** you can adapt any existing connector to address your own unique edge case. -* **Using data integration in a workflow:** Airbyte’s API lets engineering teams add data integration jobs into their workflow seamlessly. -* **Integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. -* **Debugging autonomy:** if you experience any connector issue, you won’t need to wait for Fivetran’s customer support team to get back to you, if you can fix the issue fast yourself. -* **Your data stays in your cloud.** Have full control over your data, and the costs of your data transfers. -* **No more security and privacy compliance, as self-hosted and open-sourced \(MIT\).** Any team can directly address their integration needs. -* **Premium support directly on our Slack for free**. Our time to resolution is about 3-4 hours in average. - diff --git a/docs/archive/faq/getting-started.md b/docs/archive/faq/getting-started.md deleted file mode 100644 index fd4ce42d47f6..000000000000 --- a/docs/archive/faq/getting-started.md +++ /dev/null @@ -1,50 +0,0 @@ -# Getting Started - -## **What do I need to get started using Airbyte?** - -You can deploy Airbyte in several ways, as [documented here](../../deploying-airbyte/README.md). Airbyte will then help you replicate data between a source and a destination. If you don’t see the connector you need, you can [build your connector yourself](../../connector-development) and benefit from Airbyte’s optional scheduling, orchestration and monitoring modules. - -## **How long does it take to set up Airbyte?** - -It depends on your source and destination. Check our setup guides to see the tasks for your source and destination. Each source and destination also has a list of prerequisites for setup. To make setup faster, get your prerequisites ready before you start to set up your connector. During the setup process, you may need to contact others \(like a database administrator or AWS account owner\) for help, which might slow you down. But if you have access to the connection information, it can take 2 minutes: see [demo video. ](https://www.youtube.com/watch?v=jWVYpUV9vEg) - -## **What data sources does Airbyte offer connectors for?** - -We already offer 100+ connectors, and will focus all our effort in ramping up the number of connectors and strengthening them. If you don’t see a source you need, you can file a [connector request here](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=area%2Fintegration%2C+new-integration&template=new-integration-request.md&title=). - -## **Where can I see my data in Airbyte?** - -You can’t see your data in Airbyte, because we don’t store it. The sync loads your data into your destination \(data warehouse, data lake, etc.\). While you can’t see your data directly in Airbyte, you can check your schema and sync status on the source detail page in Airbyte. - -## **Can I add multiple destinations?** - -Sure, you can. Just go to the "Destinations" section and click on the top right "+ new destination" button. You can have multiple destinations for the same source, and multiple sources for the same destination. - -## Am I limited to GUI interaction or is there a way to set up / run / interact with Airbyte programmatically? - -You can use the API to do anything you do today from the UI. Though, word of notice, the API is in alpha and may change. You won’t lose any functionality, but you may need to update your code to catch up to any backwards incompatible changes in the API. - -## How does Airbyte handle connecting to databases that are behind a firewall / NAT? - -We don’t. Airbyte is to be self-hosted in your own private cloud. - -## Can I set a start time for my integration? - -[Here](../../understanding-airbyte/connections#sync-schedules) is the link to the docs on scheduling syncs. - -## **Can I disable analytics in Airbyte?** - -Yes, you can control what's sent outside of Airbyte for analytics purposes. - -We added the following telemetry to Airbyte to ensure the best experience for users: - -* Measure usage of features & connectors -* Measure failure rate of connectors to address bugs quickly -* Reach out to our users about Airbyte community updates if they opt-in -* ... - -To disable telemetry, modify the `.env` file and define the two following environment variables: - -```text -TRACKING_STRATEGY=logging -``` diff --git a/docs/archive/faq/security-and-data-audits.md b/docs/archive/faq/security-and-data-audits.md deleted file mode 100644 index e56db4de7ac3..000000000000 --- a/docs/archive/faq/security-and-data-audits.md +++ /dev/null @@ -1,14 +0,0 @@ -# Security & Data Audits - -## **How secure is Airbyte?** - -Airbyte is an open-source self-hosted solution, so let’s say it is as safe as your data infrastructure. _\*\*_ - -## **Is Airbyte GDPR compliant?** - -Airbyte is a self-hosted solution, so it doesn’t bring any security or privacy risk to your infrastructure. We do intend to add data quality and privacy compliance features in the future, in order to give you more visibility on that topic. - -## **How does Airbyte charge?** - -We don’t. All connectors are all under the MIT license. If you are curious about the business model we have in mind, please check our [company handbook](https://handbook.airbyte.io/strategy/business-model). - diff --git a/docs/archive/faq/transformation-and-schemas.md b/docs/archive/faq/transformation-and-schemas.md deleted file mode 100644 index 554b11b558fd..000000000000 --- a/docs/archive/faq/transformation-and-schemas.md +++ /dev/null @@ -1,20 +0,0 @@ -# Transformation and Schemas - -## **Where's the T in Airbyte’s ETL tool?** - -Airbyte is actually an ELT tool, and you have the freedom to use it as an EL-only tool. The transformation part is done by default, but it is optional. You can choose to receive the data in raw \(JSON file for instance\) in your destination. - -We do provide normalization \(if option is still on\) so that data analysts / scientists / any users of the data can use it without much effort. - -We also intend to integrate deeply with dbt to make it easier for your team to continue relying you on them, if this was what you were doing. - -## **How does Airbyte handle replication when a data source changes its schema?** - -Airbyte continues to sync data using the configured schema until that schema is updated. Because Airbyte treats all fields as optional, if a field is renamed or deleted in the source, that field simply will no longer be replicated, but all remaining fields will. The same is true for streams as well. - -For now, the schema can only be updated manually in the UI \(by clicking "Update Schema" in the settings page for the connection\). When a schema is updated Airbyte will re-sync all data for that source using the new schema. - -## **How does Airbyte handle namespaces \(or schemas for the DB-inclined\)?** - -Airbyte respects source-defined namespaces when syncing data with a namespace-supported destination. See [this](../../understanding-airbyte/namespaces.md) for more details. - diff --git a/docs/archive/mongodb.md b/docs/archive/mongodb.md deleted file mode 100644 index d239da867673..000000000000 --- a/docs/archive/mongodb.md +++ /dev/null @@ -1,102 +0,0 @@ -# Mongo DB - -The MongoDB source supports Full Refresh and Incremental sync strategies. - -## Resulting schema - -MongoDB does not have anything like table definition, thus we have to define column types from actual attributes and their values. Discover phase have two steps: - -### Step 1. Find all unique properties - -Connector runs the map-reduce command which returns all unique document props in the collection. Map-reduce approach should be sufficient even for large clusters. - -#### Note - -To work with Atlas MongoDB, a **non-free** tier is required, as the free tier does not support the ability to perform the mapReduce operation. - -### Step 2. Determine property types - -For each property found, connector selects 10k documents from the collection where this property is not empty. If all the selected values have the same type - connector will set appropriate type to the property. In all other cases connector will fallback to `string` type. - -## Features - -| Feature | Supported | -| :--- | :--- | -| Full Refresh Sync | Yes | -| Incremental - Append Sync | Yes | -| Replicate Incremental Deletes | No | -| Namespaces | No | - -### Full Refresh sync - -Works as usual full refresh sync. - -### Incremental sync - -Cursor field can not be nested. Currently only top level document properties are supported. - -Cursor should **never** be blank. In case cursor is blank - the incremental sync results might be unpredictable and will totally rely on MongoDB comparison algorithm. - -Only `datetime` and `integer` cursor types are supported. Cursor type is determined based on the cursor field name: - -* `datetime` - if cursor field name contains a string from: `time`, `date`, `_at`, `timestamp`, `ts` -* `integer` - otherwise - -## Getting started - -This guide describes in details how you can configure MongoDB for integration with Airbyte. - -### Create users - -Run `mongo` shell, switch to `admin` database and create a `READ_ONLY_USER`. `READ_ONLY_USER` will be used for Airbyte integration. Please make sure that user has read-only privileges. - -```javascript -mongo -use admin; -db.createUser({user: "READ_ONLY_USER", pwd: "READ_ONLY_PASSWORD", roles: [{role: "read", db: "TARGET_DATABASE"}]} -``` - -Make sure the user have appropriate access levels. - -### Configure application - -In case your application uses MongoDB without authentication you will have to adjust code base and MongoDB config to enable MongoDB authentication. **Otherwise your application might go down once MongoDB authentication will be enabled.** - -### Enable MongoDB authentication - -Open `/etc/mongod.conf` and add/replace specific keys: - -```yaml -net: - bindIp: 0.0.0.0 - -security: - authorization: enabled -``` - -Binding to `0.0.0.0` will allow to connect to database from any IP address. - -The last line will enable MongoDB security. Now only authenticated users will be able to access the database. - -### Configure firewall - -Make sure that MongoDB is accessible from external servers. Specific commands will depend on the firewall you are using \(UFW/iptables/AWS/etc\). Please refer to appropriate documentation. - -Your `READ_ONLY_USER` should now be ready for use with Airbyte. - - -#### Possible configuration Parameters - -* [Authentication Source](https://docs.mongodb.com/manual/reference/connection-string/#mongodb-urioption-urioption.authSource) -* Host: URL of the database -* Port: Port to use for connecting to the database -* User: username to use when connecting -* Password: used to authenticate the user -* [Replica Set](https://docs.mongodb.com/manual/reference/connection-string/#mongodb-urioption-urioption.replicaSet) -* Whether to enable SSL - - -## Changelog -| Version | Date | Pull Request | Subject | -| :------ | :-------- | :----- | :------ | -| 0.2.3 | 2021-07-20 | [4669](https://github.com/airbytehq/airbyte/pull/4669) | Subscriptions Stream now returns all kinds of subscriptions (including expired and canceled)| diff --git a/docs/archive/securing-airbyte.md b/docs/archive/securing-airbyte.md deleted file mode 100644 index 727ff5043eeb..000000000000 --- a/docs/archive/securing-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Securing Airbyte access - -## Reporting Vulnerabilities -⚠️ Please do not file GitHub issues or post on our public forum for security vulnerabilities as they are public! ⚠️ - -Airbyte takes security issues very seriously. If you have any concern around Airbyte or believe you have uncovered a vulnerability, please get in touch via the e-mail address security@airbyte.io. In the message, try to provide a description of the issue and ideally a way of reproducing it. The security team will get back to you as soon as possible. - -Note that this security address should be used only for undisclosed vulnerabilities. Dealing with fixed issues or general questions on how to use the security features should be handled regularly via the user and the dev lists. Please report any security problems to us before disclosing it publicly. - -## Access control - -Airbyte, in its open-source version, does not support RBAC to manage access to the UI. - -However, multiple options exist for the operators to implement access control themselves. - -To secure access to Airbyte you have three options: -* Networking restrictions: deploy Airbyte in a private network or use a firewall to filter which IP is allowed to access your host. -* Put Airbyte behind a reverse proxy and handle the access control on the reverse proxy side. -* If you deployed Airbyte on a cloud provider: - * GCP: use the [Identity-Aware proxy](https://cloud.google.com/iap) service - * AWS: use the [AWS Systems Manager Session Manager](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager.html) service - -**Non exhaustive** online resources list to set up auth on your reverse proxy: -* [Configure HTTP Basic Auth on NGINX for Airbyte](https://shadabshaukat.medium.com/deploy-and-secure-airbyte-with-nginx-reverse-proxy-basic-authentication-lets-encrypt-ssl-72bee223a4d9) -* [Kubernetes: Basic auth on a Nginx ingress controller](https://kubernetes.github.io/ingress-nginx/examples/auth/basic/) -* [How to set up Okta SSO on an NGINX reverse proxy](https://developer.okta.com/blog/2018/08/28/nginx-auth-request) -* [How to enable HTTP Basic Auth on Caddy](https://caddyserver.com/docs/caddyfile/directives/basicauth) -* [SSO for Traefik](https://github.com/thomseddon/traefik-forward-auth) diff --git a/docs/cloud/core-concepts.md b/docs/cloud/core-concepts.md deleted file mode 100644 index 9383c6ffd036..000000000000 --- a/docs/cloud/core-concepts.md +++ /dev/null @@ -1,165 +0,0 @@ -# Core Concepts - -Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, what format the data is written to in the destination, and if the data is stored in raw tables format or basic normalized (or JSON) format. - -This page describes the concepts you need to know to use Airbyte. - -## Source - -A source is an API, file, database, or data warehouse that you want to ingest data from. - -## Destination - -A destination is a data warehouse, data lake, database, or an analytics tool where you want to load your ingested data. - -## Connector - -An Airbyte component which pulls data from a source or pushes data to a destination. - -## Connection - -A connection is an automated data pipeline that replicates data from a source to a destination. - -Setting up a connection involves configuring the following parameters: - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Parameter - Description -
    Sync schedule - When should a data sync be triggered? -
    Destination Namespace and stream names - Where should the replicated data be written? -
    Catalog selection - What data should be replicated from the source to the destination? -
    Sync mode - How should the streams be replicated (read and written)? -
    Optional transformations - How should Airbyte protocol messages (raw JSON blob) data be converted into other data representations? -
    - -## Stream - -A stream is a group of related records. - -Examples of streams: - -- A table in a relational database -- A resource or API endpoint for a REST API -- The records from a directory containing many files in a filesystem - -## Field - -A field is an attribute of a record in a stream. - -Examples of fields: - -- A column in the table in a relational database -- A field in an API response - -## Namespace - -Namespace is a group of streams in a source or destination. Common use cases for namespaces are enforcing permissions, segregating test and production data, and general data organization. - -A schema in a relational database system is an example of a namespace. - -In a source, the namespace is the location from where the data is replicated to the destination. - -In a destination, the namespace is the location where the replicated data is stored in the destination. Airbyte supports the following configuration options for destination namespaces: - - - - - - - - - - - - - - - - - - -
    Configuration - Description -
    Mirror source structure - Some sources (for example, databases) provide namespace information for a stream. If a source provides the namespace information, the destination will reproduce the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. -
    Destination default - All streams will be replicated and stored in the default namespace defined on the destination settings page. For settings for popular destinations, see ​​Destination Connector Settings -
    Custom format - All streams will be replicated and stored in a user-defined custom format. See Custom format for more details. -
    - -## Connection sync modes - -A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. - -- **Full Refresh | Overwrite:** Sync all records from the source and replace data in destination by overwriting it. -- **Full Refresh | Append:** Sync all records from the source and add them to the destination without deleting any data. -- **Incremental Sync | Append:** Sync new records from the source and add them to the destination without deleting any data. -- **Incremental Sync | Append + Deduped:** Sync new records from the source and add them to the destination. Also provides a de-duplicated view mirroring the state of the stream in the source. - -## Normalization - -Normalization is the process of structuring data from the source into a format appropriate for consumption in the destination. For example, when writing data from a nested, dynamically typed source like a JSON API to a relational destination like Postgres, normalization is the process which un-nests JSON from the source into a relational table format which uses the appropriate column types in the destination. - -Note that normalization is only relevant for the following relational database & warehouse destinations: - -- BigQuery -- Snowflake -- Redshift -- Postgres -- Oracle -- MySQL -- MSSQL - -Other destinations do not support normalization as described in this section, though they may normalize data in a format that makes sense for them. For example, the S3 destination connector offers the option of writing JSON files in S3, but also offers the option of writing statically typed files such as Parquet or Avro. - -After a sync is complete, Airbyte normalizes the data. When setting up a connection, you can choose one of the following normalization options: - -- Raw data (no normalization): Airbyte places the JSON blob version of your data in a table called `_airbyte_raw_` -- Basic Normalization: Airbyte converts the raw JSON blob version of your data to the format of your destination. _Note: Not all destinations support normalization._ -- [dbt Cloud integration](https://docs.airbyte.com/cloud/managing-airbyte-cloud/dbt-cloud-integration): Airbyte's dbt Cloud integration allows you to use dbt Cloud for transforming and cleaning your data during the normalization process. - -:::note - -Normalizing data may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is normalized and is not related to Airbyte credit usage. - -::: - -## Workspace - -A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. - -When you [sign up](http://cloud.airbyte.com/signup) for Airbyte Cloud, we automatically create your first workspace where you are the only user with access. You can set up your sources and destinations to start syncing data and invite other users to join your workspace. - -## Glossary of Terms - -You find and extended list of [Airbyte specific terms](https://glossary.airbyte.com/term/airbyte-glossary-of-terms/), [data engineering concepts](https://glossary.airbyte.com/term/data-engineering-concepts) or many [other data related terms](https://glossary.airbyte.com/). diff --git a/docs/cloud/getting-started-with-airbyte-cloud.md b/docs/cloud/getting-started-with-airbyte-cloud.md deleted file mode 100644 index 0c4cc4d284cc..000000000000 --- a/docs/cloud/getting-started-with-airbyte-cloud.md +++ /dev/null @@ -1,212 +0,0 @@ -# Getting Started with Airbyte Cloud - -This page guides you through setting up your Airbyte Cloud account, setting up a source, destination, and connection, verifying the sync, and allowlisting an IP address. - -## Set up your Airbyte Cloud account - -To use Airbyte Cloud: - -1. If you haven't already, [sign up for Airbyte Cloud](https://cloud.airbyte.com/signup?utm_campaign=22Q1_AirbyteCloudSignUpCampaign_Trial&utm_source=Docs&utm_content=SetupGuide) using your email address, Google login, or GitHub login. - - Airbyte Cloud offers a 14-day free trial that begins after your first successful sync. For more information, see [Pricing](https://airbyte.com/pricing). - - :::note - If you are invited to a workspace, you cannot use your Google login to create a new Airbyte account. - ::: - -2. If you signed up using your email address, Airbyte will send you an email with a verification link. On clicking the link, you'll be taken to your new workspace. - - :::info - A workspace lets you collaborate with team members and share resources across your team under a shared billing account. - ::: - -## Set up a source - -:::info -A source is an API, file, database, or data warehouse that you want to ingest data from. -::: - -To set up a source: - -:::note - -Set your [default data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) before creating a new source to ensure your data is processed in the correct region. - -::: - -1. On the Airbyte Cloud dashboard, click **Sources** and then click **+ New source**. -2. On the Set up the source page, select the source you want to set up from the **Source** catalog. - - The fields relevant to your source are displayed. The Setup Guide provides information to help you fill out the fields for your selected source. - -3. Click **Set up source**. - -## Set up a destination - -:::info -A destination is a data warehouse, data lake, database, or an analytics tool where you want to load your extracted data. -::: - -To set up a destination: - -1. On the Airbyte Cloud dashboard, click **Destinations** and then click **+ New destination**. -2. On the Set up the destination page, select the destination you want to set up from the **Destination** catalog. - - The fields relevant to your destination are displayed. The Setup Guide provides information to help you fill out the fields for your selected destination. - -3. Click **Set up destination**. - -## Set up a connection - -:::info -A connection is an automated data pipeline that replicates data from a source to a destination. -::: - -Setting up a connection involves configuring the following parameters: - -| Parameter | Description | -| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| Replication frequency | How often should the data sync? | -| [Data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-the-data-residency-for-a-connection) | Where should the data be processed? | -| Destination Namespace and stream prefix | Where should the replicated data be written? | -| Catalog selection | Which streams and fields should be replicated from the source to the destination? | -| Sync mode | How should the streams be replicated (read and written)? | - -For more information, see [Connections and Sync Modes](../understanding-airbyte/connections/README.md) and [Namespaces](../understanding-airbyte/namespaces.md) - -If you need to use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html): - -1. In the **Replication Frequency** dropdown, click **Cron**. -2. Enter a cron expression and choose a time zone to create a sync schedule. - -:::note - -- Only one sync per connection can run at a time. -- If cron schedules a sync to run before the last one finishes, the scheduled sync will start after the last sync completes. - -::: - -To set up a connection: - -:::note - -Set your [default data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) before creating a new connection to ensure your data is processed in the correct region. - -::: - -1. On the Airbyte Cloud dashboard, click **Connections** and then click **+ New connection**. -2. On the New connection page, select a source: - - - To use an existing source, select your desired source from the **Sources**. Click the source to use it. - - To set up a new source, select "+ New source". Select a destination from the catalog. The fields relevant to your source are displayed. The Setup Guide provides information to help you fill out the fields for your selected source. Click **Set up source**. - -3. Select a destination: - - - To use an existing destination, select your desired destination from the existing destinations. Click the destination to use it. - - To set up a new destination, select "+ New destination". Select a destination from the catalog. The fields relevant to your destination are displayed. The Setup Guide provides information to help you fill out the fields for your selected destination. Click **Set up destination**. - - The Set up the connection page is displayed. - -4. From the **Replication frequency** dropdown, select how often you want the data to sync from the source to the destination. - - **Note:** The default replication frequency is **Every 24 hours**. - -5. From the **Destination Namespace** dropdown, select the format in which you want to store the data in the destination: - - **Note:** The default configuration is **Mirror source structure**. - - - - - - - - - - - - - - - - - - -
    Configuration - Description -
    Mirror source structure - Some sources (for example, databases) provide namespace information for a stream. If a source provides the namespace information, the destination will reproduce the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option -
    Destination default - All streams will be replicated and stored in the default namespace defined on the Destination Settings page. For more information, see ​​Destination Connector Settings -
    Custom format - All streams will be replicated and stored in a custom format. See Custom format for more details -
    - -:::tip -To better understand the destination namespace configurations, see [Destination Namespace example](../understanding-airbyte/namespaces.md#examples) -::: - -6. (Optional) In the **Destination Stream Prefix (Optional)** field, add a prefix to stream names (for example, adding a prefix `airbyte_` renames `projects` to `airbyte_projects`). -7. Activate the streams you want to sync: - - (Optional) If your source has many tables, type the name of the stream you want to enable in the **Search stream name** search box. -8. Configure the sync settings: - - 1. Toggle the **Sync** button to enable sync for the stream. - 2. **Source stream name**: The table name in the source - 3. **Sync mode**: Select how you want the data to be replicated from the source to the destination: - - For the source: - - - Select **Full Refresh** to copy the entire dataset each time you sync - - Select **Incremental** to replicate only the new or modified data - - For the destination: - - - Select **Overwrite** to erase the old data and replace it completely - - Select **Append** to capture changes to your table - **Note:** This creates duplicate records - - Select **Append + Deduped** to mirror your source while keeping records unique - - **Note:** Some sync modes may not yet be available for the source or destination. - - 4. **Cursor field**: Used in **Incremental** sync mode to determine which records to sync. Airbyte pre-selects the cursor field for you (example: updated date). If you have multiple cursor fields, select the one you want. - 5. **Primary key**: Used in **Append + Deduped** sync mode to determine the unique identifier. - 6. Choose which fields to sync. By default, all fields are synced. - -10. Click **Set up connection**. -11. Airbyte tests the connection. If the sync is successful, the Connection page is displayed. - -## Verify the connection - -Verify the sync by checking the logs: - -1. On the Airbyte Cloud dashboard, click **Connections**. The list of connections is displayed. Click on the connection you just set up. -2. The Sync History is displayed. Click on the first log in the sync history to view the log details. -3. Check the data at your destination. If you added a Destination Stream Prefix while setting up the connection, make sure to search for the stream name with the prefix. - -## Allowlist IP addresses - -Depending on your [data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) location, you may need to allowlist the following IP addresses to enable access to Airbyte: - -### United States and Airbyte Default - -#### GCP region: us-west3 - -[comment]: # "IMPORTANT: if changing the list of IP addresses below, you must also update the connector.airbyteCloudIpAddresses LaunchDarkly flag to show the new list so that the correct list is shown in the Airbyte Cloud UI, then reach out to the frontend team and ask them to update the default value in the useAirbyteCloudIps hook!" - -- 34.106.109.131 -- 34.106.196.165 -- 34.106.60.246 -- 34.106.229.69 -- 34.106.127.139 -- 34.106.218.58 -- 34.106.115.240 -- 34.106.225.141 - -### European Union - -#### AWS region: eu-west-3 - -- 13.37.4.46 -- 13.37.142.60 -- 35.181.124.238 diff --git a/docs/cloud/managing-airbyte-cloud/configuring-connections.md b/docs/cloud/managing-airbyte-cloud/configuring-connections.md index 49e6fd43bbaf..bc896004eb30 100644 --- a/docs/cloud/managing-airbyte-cloud/configuring-connections.md +++ b/docs/cloud/managing-airbyte-cloud/configuring-connections.md @@ -1,20 +1,18 @@ # Configuring connections -After you have created a connection, you can change how your data syncs to the destination by modifying the [configuration settings](#configure-connection-settings) and the [stream settings](#modify-streams-in-your-connection). +A connection links a source to a destination and defines how your data will sync. After you have created a connection, you can modify any of the configuration settings or stream settings. -## Configure connection settings +## Configure Connection Settings Configuring the connection settings allows you to manage various aspects of the sync, such as how often data syncs and where data is written. To configure these settings: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to change. +1. In the Airbyte UI, click **Connections** and then click the connection you want to change. 2. Click the **Replication** tab. -3. Click the **Configuration** dropdown. - -You can configure the following settings: +3. Click the **Configuration** dropdown to expand the options. :::note @@ -22,30 +20,19 @@ These settings apply to all streams in the connection. ::: +You can configure the following settings: + | Setting | Description | |--------------------------------------|-------------------------------------------------------------------------------------| -| Replication frequency | How often the data syncs | -| Destination namespace | Where the replicated data is written | +| [Replication frequency](/using-airbyte/core-concepts/sync-schedules.md) | How often the data syncs | +| [Destination namespace](/using-airbyte/core-concepts/namespaces.md) | Where the replicated data is written | | Destination stream prefix | How you identify streams from different connectors | -| [Detect and propagate schema changes](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes/#review-non-breaking-schema-changes) | How Airbyte handles syncs when it detects schema changes in the source | - -To use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html): - -1. In the **Replication Frequency** dropdown, click **Cron**. - -2. Enter a cron expression and choose a time zone to create a sync schedule. - -:::note - -* Only one sync per connection can run at a time. -* If a sync is scheduled to run before the previous sync finishes, the scheduled sync will start after the completion of the previous sync. -* Reach out to [Sales](https://airbyte.com/company/talk-to-sales) to enable syncs more frequently than once per hour. - -::: +| [Detect and propagate schema changes](/cloud/managing-airbyte-cloud/manage-schema-changes.md) | How Airbyte handles syncs when it detects schema changes in the source | +| [Connection Data Residency](/cloud/managing-airbyte-cloud/manage-data-residency.md) | Where data will be processed | ## Modify streams in your connection -In the **Activate the streams you want to sync** table, you can choose which streams to sync and how they are loaded to the destination. +In the **Activate the streams you want to sync** table, you choose which streams to sync and how they are loaded to the destination. :::info A connection's schema consists of one or many streams. Each stream is most commonly associated with a database table or an API endpoint. Within a stream, there can be one or many fields or columns. @@ -53,7 +40,7 @@ A connection's schema consists of one or many streams. Each stream is most commo To modify streams: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to change. +1. In the Airbyte UI, click **Connections** and then click the connection you want to change. 2. Click the **Replication** tab. @@ -71,9 +58,9 @@ Source-defined cursors and primary keys are selected automatically and cannot be ::: -3. Click on a stream to display the stream details panel. +3. Click on a stream to display the stream details panel. You'll see each column we detect from the source. -4. Toggle individual fields to include or exclude them in the sync, or use the toggle in the table header to select all fields at once. +4. Column selection is available to protect PII or sensitive data from being synced to the destination. Toggle individual fields to include or exclude them in the sync, or use the toggle in the table header to select all fields at once. :::info diff --git a/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md b/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md index 96510918cfd9..777433ec72e3 100644 --- a/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md +++ b/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md @@ -1,7 +1,15 @@ # Use the dbt Cloud integration + + By using the dbt Cloud integration, you can create and run dbt transformations during syncs in Airbyte Cloud. This allows you to transform raw data into a format that is suitable for analysis and reporting, including cleaning and enriching the data. +:::note + +Normalizing data may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is normalized and is not related to Airbyte credit usage. + +::: + ## Step 1: Generate a service token Generate a [service token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens#generating-service-account-tokens) for your dbt Cloud transformation. @@ -17,7 +25,7 @@ Generate a [service token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-t To set up the dbt Cloud integration in Airbyte Cloud: -1. On the Airbyte Cloud dashboard, click **Settings**. +1. In the Airbyte UI, click **Settings**. 2. Click **dbt Cloud integration**. diff --git a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md index 49a663b451c9..2b39a0bb1893 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md +++ b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md @@ -1,6 +1,6 @@ # Manage notifications -This page provides guidance on how to manage notifications for Airbyte Cloud, allowing you to stay up-to-date on the activities in your workspace. +This page provides guidance on how to manage notifications for Airbyte, allowing you to stay up-to-date on the activities in your workspace. ## Notification Event Types @@ -10,40 +10,76 @@ This page provides guidance on how to manage notifications for Airbyte Cloud, al | Successful Syncs | A sync from any of your connections succeeds. Note that if sync runs frequently or if there are many syncs in the workspace these types of events can be noisy | Automated Connection Updates | A connection is updated automatically (ex. a source schema is automatically updated) | | Connection Updates Requiring Action | A connection update requires you to take action (ex. a breaking schema change is detected) | -| Sync Disabled Warning | A connection will be disabled soon due to repeated failures. It has failed 50 times consecutively or there were only failed jobs in the past 7 days | -| Sync Disabled | A connection was automatically disabled due to repeated failures. It will be disabled when it has failed 100 times consecutively or has been failing for 14 days in a row | +| Warning - Repeated Failures | A connection will be disabled soon due to repeated failures. It has failed 50 times consecutively or there were only failed jobs in the past 7 days | +| Sync Disabled - Repeated Failures | A connection was automatically disabled due to repeated failures. It will be disabled when it has failed 100 times consecutively or has been failing for 14 days in a row | +| Warning - Upgrade Required (Cloud only) | A new connector version is available and requires manual upgrade | +| Sync Disabled - Upgrade Required (Cloud only) | One or more connections were automatically disabled due to a connector upgrade deadline passing -## Configure Notification Settings +## Configure Email Notification Settings + + To set up email notifications: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. In the Airbyte UI, click **Settings** and navigate to **Notifications**. -2. Click **Notifications**. +2. Toggle which messages you'd like to receive from Airbyte. All email notifications will be sent by default to the creator of the workspace. To change the recipient, edit and save the **notification email recipient**. If you would like to send email notifications to more than one recipient, you can enter an email distribution list (ie Google Group) as the recipient. -3. Toggle which messages you'd like to receive from Airbyte. All email notifications will be sent by default to the creator of the workspace. To change the recipient, edit and save the **notification email recipient**. If you would like to send email notifications to more than one recipient, you can enter an email distribution list (ie Google Group) as the recipient. +3. Click **Save changes**. -4. Click **Save changes**. +:::note +All email notifications except for Successful Syncs are enabled by default. +::: + +## Configure Slack Notification settings + +To set up Slack notifications: + +If you're more of a visual learner, just head over to [this video](https://www.youtube.com/watch?v=NjYm8F-KiFc&ab_channel=Airbyte) to learn how to do this. You can also refer to the Slack documentation on how to [create an incoming webhook for Slack](https://api.slack.com/messaging/webhooks). + +### Create a Slack app + +1. **Create a Slack App**: Navigate to https://api.slack.com/apps/. Select `Create an App`. + +![](../../.gitbook/assets/notifications_create_slack_app.png) + +2. Select `From Scratch`. Enter your App Name (e.g. Airbyte Sync Notifications) and pick your desired Slack workspace. + +3. **Set up the webhook URL.**: in the left sidebar, click on `Incoming Webhooks`. Click the slider button in the top right to turn the feature on. Then click `Add New Webhook to Workspace`. -To set up webhook notifications: +![](../../.gitbook/assets/notifications_add_new_webhook.png) -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +4. Pick the channel that you want to receive Airbyte notifications in (ideally a dedicated one), and click `Allow` to give it permissions to access the channel. You should see the bot show up in the selected channel now. You will see an active webhook right above the `Add New Webhook to Workspace` button. -2. Click **Notifications**. +![](../../.gitbook/assets/notifications_webhook_url.png) -3. Have a webhook URL ready if you plan to use webhook notifications. Using a Slack webook is recommended. [Create an Incoming Webhook for Slack](https://api.slack.com/messaging/webhooks). +5. Click `Copy.` to copy the link to your clipboard, which you will need to enter into Airbyte. -4. Toggle the type of events you are interested to receive notifications for. - 1. To enable webhook notifications, the webhook URL is required. For your convenience, we provide a 'test' function to send a test message to your webhook URL so you can make sure it's working as expected. +Your Webhook URL should look something like this: -5. Click **Save changes**. +![](../../.gitbook/assets/notifications_airbyte_notification_settings.png) + + +### Enable the Slack notification in Airbyte + +1. In the Airbyte UI, click **Settings** and navigate to **Notifications**. + +2. Paste the copied webhook URL to `Webhook URL`. Using a Slack webook is recommended. On this page, you can toggle each slider decide whether you want notifications on each notification type. + +3. **Test it out.**: you can click `Test` to send a test message to the channel. Or, just run a sync now and try it out! If all goes well, you should receive a notification in your selected channel that looks like this: + +![](../../.gitbook/assets/notifications_slack_message.png) + +You're done! + +4. Click **Save changes**. ## Enable schema update notifications -To get notified when your source schema changes: -1. Make sure you have `Automatic Connection Updates` and `Connection Updates Requiring Action` turned on for your desired notification channels; If these are off, even if you turned on schema update notifications in a connection's settings, Airbyte will *NOT* send out any notifications related to these types of events. +To be notified of any source schema changes: +1. Make sure you have enabled `Automatic Connection Updates` and `Connection Updates Requiring Action` notifications. If these are off, even if you turned on schema update notifications in a connection's settings, Airbyte will *NOT* send out any notifications related to these types of events. -2. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection you want to receive notifications for. +2. In the Airbyte UI, click **Connections** and select the connection you want to receive notifications for. 3. Click the **Settings** tab on the Connection page. diff --git a/docs/cloud/managing-airbyte-cloud/manage-connection-state.md b/docs/cloud/managing-airbyte-cloud/manage-connection-state.md index 929a56834534..23d25db6be99 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-connection-state.md +++ b/docs/cloud/managing-airbyte-cloud/manage-connection-state.md @@ -3,7 +3,7 @@ The connection state provides additional information about incremental syncs. It includes the most recent values for the global or stream-level cursors, which can aid in debugging or determining which data will be included in the next sync. To review the connection state: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to display. +1. In the Airbyte UI, click **Connections** and then click the connection you want to display. 2. Click the **Settings** tab on the Connection page. @@ -11,7 +11,7 @@ To review the connection state: **Connection State** displays. -To edit the connection state: +Editing the connection state allows the sync to start from any date in the past. If the state is edited, Airbyte will start syncing incrementally from the new date. This is helpful if you do not want to fully resync your data. To edit the connection state: :::warning Updates to connection state should be handled with extreme care. Updates may break your syncs, requiring a reset to fix. Make changes only as directed by the Airbyte team. diff --git a/docs/cloud/managing-airbyte-cloud/manage-credits.md b/docs/cloud/managing-airbyte-cloud/manage-credits.md index 040a083e58d5..ed54d783d6ae 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-credits.md +++ b/docs/cloud/managing-airbyte-cloud/manage-credits.md @@ -1,14 +1,16 @@ # Manage credits + + ## Buy credits Airbyte [credits](https://airbyte.com/pricing) are used to pay for Airbyte resources when you run a sync. You can purchase credits on Airbyte Cloud to keep your data flowing without interruption. To buy credits: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Billing** in the navigation bar. +1. In the Airbyte UI, click **Billing** in the navigation bar. -2. If you are unsure of how many credits you need, use our [Cost Estimator](https://cost.airbyte.com/) or click **Talk to Sales** to find the right amount for your team. +2. If you are unsure of how many credits you need, use our [Cost Estimator](https://www.airbyte.com/pricing) or click **Talk to Sales** to find the right amount for your team. 3. Click **Buy credits**. @@ -18,11 +20,11 @@ To buy credits: Purchase limits: * Minimum: 20 credits - * Maximum: 2,500 credits + * Maximum: 6,000 credits ::: - To buy more credits or a custom plan, reach out to [Sales](https://airbyte.com/talk-to-sales). + To buy more credits or discuss a custom plan, reach out to [Sales](https://airbyte.com/talk-to-sales). 5. Fill out the payment information. @@ -44,7 +46,7 @@ To buy credits: You can enroll in automatic top-ups of your credit balance. This is a beta feature for those who do not want to manually add credits each time. -To enroll, [email us](mailto:natalie@airbyte.io) with: +To enroll, [email us](mailto:billing@airbyte.io) with: 1. A link to your workspace that you'd like to enable this feature for. 2. **Recharge threshold** The number under what credit balance you would like the automatic top up to occur. @@ -59,11 +61,11 @@ To take a real example, if: Note that the difference between the recharge credit amount and recharge threshold must be at least 20 as our minimum purchase is 20 credits. -If you are enrolled and want to change your limits or cancel your enrollment, [email us](mailto:natalie@airbyte.io). +If you are enrolled and want to change your limits or cancel your enrollment, [email us](mailto:billing@airbyte.io). ## View invoice history -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Billing** in the navigation bar. +1. In the Airbyte UI, click **Billing** in the navigation bar. 2. Click **Invoice History**. You will be redirected to a Stripe portal. diff --git a/docs/cloud/managing-airbyte-cloud/manage-data-residency.md b/docs/cloud/managing-airbyte-cloud/manage-data-residency.md index da02874006ce..384d18337bb5 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-data-residency.md +++ b/docs/cloud/managing-airbyte-cloud/manage-data-residency.md @@ -1,5 +1,7 @@ # Manage data residency + + In Airbyte Cloud, you can set the default data residency and choose the data residency for individual connections, which can help you comply with data localization requirements. ## Choose your default data residency @@ -12,11 +14,11 @@ While the data is processed in a data plane of the chosen residency, the cursor ::: -When you set the default data residency, it applies to new connections only. If you do not set the default data residency, the [Airbyte Default](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud/#united-states-and-airbyte-default) region is used. If you want to change the data residency for a connection, you can do so in its [connection settings](#choose-the-data-residency-for-a-connection). +When you set the default data residency, it applies to new connections only. If you do not set the default data residency, the [Airbyte Default](configuring-connections.md) region is used. If you want to change the data residency for a connection, you can do so in its [connection settings](configuring-connections.md). To choose your default data residency: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. In the Airbyte UI, click **Settings**. 2. Click **Data Residency**. @@ -26,16 +28,16 @@ To choose your default data residency: :::info -Depending on your network configuration, you may need to add [IP addresses](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud/#allowlist-ip-addresses) to your allowlist. +Depending on your network configuration, you may need to add [IP addresses](/operating-airbyte/security.md#network-security-1) to your allowlist. ::: ## Choose the data residency for a connection -You can choose the data residency for your connection in the connection settings. You can also choose data residency when creating a [new connection](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud#set-up-a-connection), or you can set the [default data residency](#choose-your-default-data-residency) for your workspace. +You can choose the data residency for your connection in the connection settings. You can also choose data residency when creating a new connection, or you can set the default data residency for your workspace. To choose the data residency for your connection: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection that you want to change. +1. In the Airbyte UI, click **Connections** and then click the connection that you want to change. 2. Click the **Settings** tab. diff --git a/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md b/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md index c6113b461c65..f938008b52ff 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md +++ b/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md @@ -2,30 +2,44 @@ You can specify for each connection how Airbyte should handle any change of schema in the source. This process helps ensure accurate and efficient data syncs, minimizing errors and saving you time and effort in managing your data pipelines. -Airbyte checks for any changes in your source schema before syncing, at most once every 24 hours. - -Based on your configured settings for "Detect and propagate schema changes", Airbyte can automatically sync those changes or ignore them: -* **Propagate all changes** automatically propagates stream changes (additions or deletions) or column changes (additions or deletions) detected in the source -* **Propagate column changes only** automatically propagates column changes detected in the source -* **Ignore** any schema change, in which case the schema you’ve set up will not change even if the source schema changes until you approve the changes manually -* **Pause connection** disables the connection from syncing further once a change is detected - -When a new column is detected and propagated, values for that column will be filled in for the updated rows. If you are missing values for rows not updated, a backfill can be done by completing a full refresh. - -When a column is deleted, the values for that column will stop updating for the updated rows and be filled with Null values. - -When a new stream is detected and propagated, the first sync will fill all data in as if it is a historical sync. When a stream is deleted from the source, the stream will stop updating, and we leave any existing data in the destination. The rest of the enabled streams will continue syncing. +Airbyte checks for any changes in your source schema immediately before syncing, at most once every 24 hours. + +## Detection and Propagate Schema Changes +Based on your configured settings for **Detect and propagate schema changes**, Airbyte will automatically sync those changes or ignore them: + +| Setting | Description | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| Propagate all changes | All new tables and column changes from the source will automatically be propagated and reflected in the destination. This includes stream changes (additions or deletions), column changes (additions or deletions) and data type changes +| Propagate column changes only (default) | Only column changes will be propagated +| Ignore | Schema changes will be detected, but not propagated. Syncs will continue running with the schema you've set up. To propagate the detected schema changes, you will need to approve the changes manually | +| Pause Connection | Connections will be automatically disabled as soon as any schema changes are detected | + +## Types of Schema Changes +When propagation is enabled, your data in the destination will automatically shift to bring in the new changes. + +| Type of Schema Change | Propagation Behavior | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| New Column | The new colummn will be created in the destination. Values for the column will be filled in for the updated rows. If you are missing values for rows not updated, a backfill can be done by completing a full resync. +| Removal of column | The old column will be removed from the destination. +| New stream | The first sync will create the new stream in the destination and fill all data in as if it is a historical sync. | +| Removal of stream | The stream will stop updating, and any existing data in the destination will remain. | +| Column data type changes | The data in the destination will remain the same. Any new or updated rows with incompatible data types will result in a row error in the raw Airbyte tables. You will need to refresh the schema and do a full resync to ensure the data types are consistent. + +:::tip +Ensure you receive webhook notifications for your connection by enabling `Schema update notifications` in the connection's settings. +::: -In all cases, if a breaking change is detected, the connection will be paused for manual review to prevent future syncs from failing. Breaking schema changes occur when: +In all cases, if a breaking schema change is detected, the connection will be paused immediately for manual review to prevent future syncs from failing. Breaking schema changes occur when: * An existing primary key is removed from the source * An existing cursor is removed from the source -See "Fix breaking schema changes" to understand how to resolve these types of changes. +To re-enable the streams, ensure the correct **Primary Key** and **Cursor** are selected for each stream and save the connection. ## Review non-breaking schema changes -To review non-breaking schema changes: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection with non-breaking changes (indicated by a **yellow exclamation mark** icon). +If the connection is set to **Ignore** any schema changes, Airbyte continues syncing according to your last saved schema. You need to manually approve any detected schema changes for the schema in the destination to change. + +1. In the Airbyte UI, click **Connections**. Select a connection and navigate to the **Replication** tab. If schema changes are detected, you'll see a blue "i" icon next to the Replication ab. 2. Click **Review changes**. @@ -35,54 +49,41 @@ To review non-breaking schema changes: 5. Scroll to the bottom of the page and click **Save changes**. -:::note - - By default, Airbyte ignores non-breaking changes and continues syncing. You can configure how Airbyte handles syncs when it detects non-breaking changes by [editing the stream configuration](https://docs.airbyte.com/cloud/managing-airbyte-cloud/edit-stream-configuration). - -::: - -## Resolve breaking changes +## Resolving breaking changes Breaking changes require your attention to resolve. They may immediately cause the connection to be disabled, or you can upgrade the connector manually within a time period once reviewing the changes. -A connection will automatically be disabled if: -* An existing primary key is removed -* An existing cursor field is removed +A connection will always automatically be disabled if an existing primary key or cursor field is removed. You must review and fix the changes before editing the connection or resuming syncs. + +Breaking changes can also occur when a new version of the connector is released. In these cases, the connection will alert you of a breaking change but continue to sync until the cutoff date for upgrade. On the cutoff date, the connection will automatically be disabled on that date to prevent failure or unexpected behavior. It is **highly recommended** to upgrade before the cutoff date to ensure you continue syncing without interruption. -If the breaking change is due to a new version, the connection will alert you of a breaking change but continue to sync until the cutoff date. On the cutoff date, the connection will automatically be disabled on that date to prevent failure or unexpected behavior. These breaking changes include: +A major version upgrade will include a breaking change if any of these apply: | Type of Change | Description | |------------------|---------------------------------------------------------------------------------------------------------------------| -| Spec Change | The configuration required by users of this connector has been changed and syncs will fail until users reconfigure or re-authenticate. | -| Schema Change | The type of property previously present within a record has changed +| Connector Spec Change | The configuration has been changed and syncs will fail until users reconfigure or re-authenticate. | +| Schema Change | The type of property previously present within a record has changed and a refresh of the source schema is required. | Stream or Property Removal | Data that was previously being synced is no longer going to be synced | -| Destination Format / Normalization Change | The way the destination writes the final data or how normalization cleans that data is changing in a way that requires a full refresh | +| Destination Format / Normalization Change | The way the destination writes the final data or how Airbyte cleans that data is changing in a way that requires a full refresh | | State Changes | The format of the source’s state has changed, and the full dataset will need to be re-synced | To review and fix breaking schema changes: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection with breaking changes (indicated by a **red exclamation mark** icon). +1. In the Airbyte UI, click **Connections** and select the connection with breaking changes. -2. Review the description of what has changed. The breaking change will require you to upgrade your source or destination to a new version. +2. Review the description of what has changed in the new version. The breaking change will require you to upgrade your source or destination to a new version by a specific cutoff date. 3. Update the source or destination to the new version to continue syncing. -:::note -If a connection’s source schema has breaking changes (an existing cursor or primary key is removed), it will stop syncing immediately. You must review and fix the changes before editing the connection or resuming syncs. -::: - ### Manually refresh the source schema In addition to Airbyte Cloud’s automatic schema change detection, you can manually refresh the source schema to stay up to date with changes in your schema. To manually refresh the source schema: - 1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to refresh. + 1. In the Airbyte UI, click **Connections** and then click the connection you want to refresh. 2. Click the **Replication** tab. 3. In the **Activate the streams you want to sync** table, click **Refresh source schema** to fetch the schema of your data source. - 4. If there are changes to the schema, you can review them in the **Refreshed source schema** dialog. - -## Manage Schema Change Notifications -[Refer to our notification documentation](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications#enable-schema-update-notifications) to understand how to stay updated on any schema updates to your connections. \ No newline at end of file + 4. If there are changes to the schema, you can review them in the **Refreshed source schema** dialog. \ No newline at end of file diff --git a/docs/cloud/managing-airbyte-cloud/review-connection-status.md b/docs/cloud/managing-airbyte-cloud/review-connection-status.md index d9ee57020af7..5970e3756f4b 100644 --- a/docs/cloud/managing-airbyte-cloud/review-connection-status.md +++ b/docs/cloud/managing-airbyte-cloud/review-connection-status.md @@ -2,9 +2,9 @@ The connection status displays information about the connection and of each stream being synced. Reviewing this summary allows you to assess the connection's current status and understand when the next sync will be run. To review the connection status: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections**. +1. In the Airbyte UI, click **Connections**. -2. Click a connection in the list to view its status. +2. Click a connection in the list to view its status. | Status | Description | |------------------|---------------------------------------------------------------------------------------------------------------------| @@ -13,10 +13,20 @@ To review the connection status: | Delayed | The connection has not loaded data within the scheduled replication frequency. For example, if the replication frequency is 1 hour, the connection has not loaded data for more than 1 hour | | Error | The connection has not loaded data in more than two times the scheduled replication frequency. For example, if the replication frequency is 1 hour, the connection has not loaded data for more than 2 hours | | Action Required | A breaking change related to the source or destination requires attention to resolve | -| Pending | The connection has not been run yet, so no status exists | -| Disabled | The connection has been disabled and is not scheduled to run | | In Progress | The connection is currently extracting or loading data | +| Disabled | The connection has been disabled and is not scheduled to run | +| Pending | The connection has not been run yet, so no status exists | +If the most recent sync failed, you'll see the error message that will help diagnose if the failure is due to a source or destination configuration error. [Reach out](/community/getting-support.md) to us if you need any help to ensure you data continues syncing. + +:::info +If a sync starts to fail, it will automatically be disabled after 100 consecutive failures or 14 consecutive days of failure. +::: + +If a new major version of the connector has been released, you will also see a banner on this page indicating the cutoff date for the version. Airbyte recommends upgrading before the cutoff date to ensure your data continues syncing. If you do not upgrade before the cutoff date, Airbyte will automatically disable your connection. + +Learn more about version upgrades in our [resolving breaking change documentation](/cloud/managing-airbyte-cloud/manage-schema-changes#resolving-breaking-changes). + ## Review the stream status The stream status allows you to monitor each stream's latest status. The stream will be highlighted with a grey pending bar to indicate the sync is actively extracting or loading data. @@ -28,6 +38,7 @@ The stream status allows you to monitor each stream's latest status. The stream Each stream shows the last record loaded to the destination. Toggle the header to display the exact datetime the last record was loaded. -You can reset an individual stream without resetting all streams in a connection by clicking the three grey dots next to any stream. It is recommended to start a new sync after a reset. +You can [reset](/operator-guides/reset.md) an individual stream without resetting all streams in a connection by clicking the three grey dots next to any stream. + +You can also navigate directly to the stream's configuration by click the three grey dots next to any stream and selecting "Open details" to be redirected to the stream configuration. -You can also navigate directly to the stream's configuration by click the three grey dots next to any stream and selecting "Open details" to be redirected to the stream configuration. \ No newline at end of file diff --git a/docs/cloud/managing-airbyte-cloud/review-sync-history.md b/docs/cloud/managing-airbyte-cloud/review-sync-history.md index b5a1f06ba903..485d981fc92f 100644 --- a/docs/cloud/managing-airbyte-cloud/review-sync-history.md +++ b/docs/cloud/managing-airbyte-cloud/review-sync-history.md @@ -2,34 +2,19 @@ The job history displays information about synced data, such as the amount of data moved, the number of records read and committed, and the total sync time. Reviewing this summary can help you monitor the sync performance and identify any potential issues. -To review the sync history: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections**. - -2. Click a connection in the list to view its sync history. Sync History displays the sync status or [reset](https://docs.airbyte.com/operator-guides/reset/) status. The sync status is defined as: +To review the sync history, click a connection in the list to view its sync history. Sync History displays the sync status or [reset](/operator-guides/reset.md) status. The sync status is defined as: | Status | Description | |---------------------|---------------------------------------------------------------------------------------------------------------------| | Succeeded | 100% of the data has been extracted and loaded to the destination | -| Partially Succeeded | a subset of the data has been loaded to the destination -| Failed | none of the data has been loaded to the destination | -| Cancelled | the sync was cancelled manually before finishing | -| Running | the sync is currently running | - -:::note - -In the event of a failure, Airbyte will make several attempts to sync your data before waiting for the next sync to retry. The latest rules can be read about [here](../../understanding-airbyte/jobs.md#retry-rules). - -::: - -3. To view the full sync log, click the three grey dots next to any sync job. Select "View logs" to open the logs in the browser. - -4. To find a link to the job, click the three grey dots next to any sync job. Select "Copy link to job" to copy the link to your clipboard. - -5. To download a copy of the logs locally, click the three grey dots next to any sync job. Select "Donwload logs". +| Partially Succeeded | A subset of the data has been loaded to the destination +| Failed |Nnone of the data has been loaded to the destination | +| Cancelled | The sync was cancelled manually before finishing | +| Running | The sync is currently running | ## Sync summary -Each sync shows the time the sync was initiated and additional metadata. +Each sync shows the time the sync was initiated and additional metadata. This information can help in understanding sync performance over time. | Data | Description | |------------------------------------------|--------------------------------------------------------------------------------------| @@ -38,3 +23,11 @@ Each sync shows the time the sync was initiated and additional metadata. | x loaded records | Number of records the destination confirmed it received. | | xh xm xs | Total time (hours, minutes, seconds) for the sync to complete | + +:::note + +In the event of a failure, Airbyte will make several attempts to sync your data before waiting for the next sync to retry. The latest rules can be read about [here](../../understanding-airbyte/jobs.md#retry-rules). + +::: + +On this page, you can also view the complete logs and find any relevant errors, find a link to the job to share with Support, or download a copy of the logs locally. \ No newline at end of file diff --git a/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md b/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md index bbc2211fd2e6..47bc59ea6b19 100644 --- a/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md +++ b/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md @@ -1,16 +1,12 @@ -# Understand Airbyte Cloud limits +# Airbyte Cloud limits Understanding the following limitations will help you more effectively manage Airbyte Cloud. * Max number of workspaces per user: 3* * Max number of instances of the same source connector: 10* * Max number of destinations in a workspace: 20* -* Max number of consecutive sync failures before a connection is paused: 100 -* Max number of days with consecutive sync failures before a connection is paused: 14 days * Max number of streams that can be returned by a source in a discover call: 1K * Max number of streams that can be configured to sync in a single connection: 1K * Size of a single record: 20MB -* Shortest sync schedule: Every 60 min -* Schedule accuracy: +/- 30 min *Limits on workspaces, sources, and destinations do not apply to customers of [Powered by Airbyte](https://airbyte.com/solutions/powered-by-airbyte). To learn more [contact us](https://airbyte.com/talk-to-sales)! diff --git a/docs/community/code-of-conduct.md b/docs/community/code-of-conduct.md new file mode 100644 index 000000000000..4cb81d4468fc --- /dev/null +++ b/docs/community/code-of-conduct.md @@ -0,0 +1,91 @@ +--- +description: Our Community Code of Conduct +--- + +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others’ private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [conduct@airbyte.io](mailto:conduct@airbyte.io). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html) + +## Slack Code of Conduct + +Airbyte's Slack community is growing incredibly fast. We're home to over 1500 data professionals and are growing at an awesome pace. We are proud of our community, and have provided these guidelines to support new members in maintaining the wholesome spirit we have developed here. We appreciate your continued commitment to making this a community we are all excited to be a part of. + +### Rule 1: Be respectful. + +Our desire is for everyone to have a positive, fulfilling experience in Airbyte Slack, and we sincerely appreciate your help in making this happen. +All of the guidelines we provide below are important, but there’s a reason respect is the first rule. We take it seriously, and while the occasional breach of etiquette around Slack is forgivable, we cannot condone disrespectful behavior. + +### Rule 2: Use the most relevant channels. + +We deliberately use topic-specific Slack channels so members of the community can opt-in on various types of conversations. Our members take care to post their messages in the most relevant channel, and you’ll often see reminders about the best place to post a message (respectfully written, of course!). If you're looking for help directly from the Community Assistance Team or other Airbyte employees, please stick to posting in the airbyte-help channel, so we know you're asking us specifically! + +### Rule 3: Don’t double-post. + +Please be considerate of our community members’ time. We know your question is important, but please keep in mind that Airbyte Slack is not a customer service platform but a community of volunteers who will help you as they are able around their own work schedule. You have access to all the history, so it’s easy to check if your question has already been asked. + +### Rule 4: Check question for clarity and thoughtfulness. + +Airbyte Slack is a community of volunteers. Our members enjoy helping others; they are knowledgeable, gracious, and willing to give their time and expertise for free. Putting some effort into a well-researched and thoughtful post shows consideration for their time and will gain more responses. + +### Rule 5: Keep it public. + +This is a public forum; please do not contact individual members of this community without their express permission, regardless of whether you are trying to recruit someone, sell a product, or solicit help. + +### Rule 6: No soliciting! + +The purpose of the Airbyte Slack community is to provide a forum for data practitioners to discuss their work and share their ideas and learnings. It is not intended as a place to generate leads for vendors or recruiters, and may not be used as such. + +If you’re a vendor, you may advertise your product in #shameless-plugs. Advertising your product anywhere else is strictly against the rules. + +### Rule 7: Don't spam tags, or use @here or @channel. + +Using the @here and @channel keywords in a post will not help, as they are disabled in Slack for everyone excluding admins. Nonetheless, if you use them we will remind you with a link to this rule, to help you better understand the way Airbyte Slack operates. + +Do not tag specific individuals for help on your questions. If someone chooses to respond to your question, they will do so. You will find that our community of volunteers is generally very responsive and amazingly helpful! + +### Rule 8: Use threads for discussion. + +The simplest way to keep conversations on track in Slack is to use threads. The Airbyte Slack community relies heavily on threads, and if you break from this convention, rest assured one of our community members will respectfully inform you quickly! + +_If you see a message or receive a direct message that violates any of these rules, please contact an Airbyte team member and we will take the appropriate moderation action immediately. We have zero tolerance for intentional rule-breaking and hate speech._ + diff --git a/docs/operator-guides/contact-support.md b/docs/community/getting-support.md similarity index 88% rename from docs/operator-guides/contact-support.md rename to docs/community/getting-support.md index db42a9aef36f..03b1ff795560 100644 --- a/docs/operator-guides/contact-support.md +++ b/docs/community/getting-support.md @@ -1,4 +1,4 @@ -# Airbyte Support +# Getting Support Hold up! Have you looked at [our docs](https://docs.airbyte.com/) yet? We recommend searching the wealth of knowledge in our documentation as many times the answer you are looking for is there! @@ -6,14 +6,26 @@ Hold up! Have you looked at [our docs](https://docs.airbyte.com/) yet? We recomm Running Airbyte Open Source and have questions that our docs could not clear up? Post your questions on our [Github Discussions](https://github.com/airbytehq/airbyte/discussions?_gl=1*70s0c6*_ga*MTc1OTkyOTYzNi4xNjQxMjQyMjA0*_ga_HDBMVFQGBH*MTY4OTY5MDQyOC4zNDEuMC4xNjg5NjkwNDI4LjAuMC4w) and also join our community Slack to connect with other Airbyte users. +### Community Slack **Join our Slack community** [HERE](https://slack.airbyte.com/?_gl=1*1h8mjfe*_gcl_au*MTc4MjAxMDQzOS4xNjgyOTczMDYy*_ga*MTc1OTkyOTYzNi4xNjQxMjQyMjA0*_ga_HDBMVFQGBH*MTY4Nzg4OTQ4MC4zMjUuMS4xNjg3ODkwMjE1LjAuMC4w&_ga=2.58571491.813788522.1687789276-1759929636.1641242204)! -Ask your questions first in the #ask-ai channel and if our bot can not assist you, reach out to our community in the #ask-community-for-troubleshooting channel. - +Ask your questions first in the #ask-ai channel and if our bot can not assist you, reach out to our community in the #ask-community-for-troubleshooting channel. If you require personalized support, reach out to our sales team to inquire about [Airbyte Enterprise](https://airbyte.com/airbyte-enterprise). +### Airbyte Forum + +We are driving our community support from our [forum](https://github.com/airbytehq/airbyte/discussions) on GitHub. + +### Office Hour + +Airbyte provides a [Daily Office Hour](https://airbyte.com/daily-office-hour) to discuss issues. +It is a 45 minute meeting, the first 20 minutes are reserved to a weekly topic presentation about Airbyte concepts and the others 25 minutes are for general questions. The schedule is: +* Monday, Wednesday and Fridays: 1 PM PST/PDT +* Tuesday and Thursday: 4 PM CEST + + ## Airbyte Cloud Support If you have questions about connector setup, error resolution, or want to report a bug, Airbyte Support is available to assist you. We recommend checking [our documentation](https://docs.airbyte.com/) and searching our [Help Center](https://support.airbyte.com/hc/en-us) before opening a support ticket. @@ -59,5 +71,4 @@ Although we strive to offer our utmost assistance, there are certain requests th * Curating unique documentation and training materials * Configuring Airbyte to meet security requirements -If you think you will need asssitance when upgrading, we recommend upgrading during our support hours, Monday-Friday 7AM - 7PM ET so we can assist if support is needed. If you upgrade outside of support hours, please submit a ticket and we will assist when we are back online. - +If you think you will need assistance when upgrading, we recommend upgrading during our support hours, Monday-Friday 7AM - 7PM ET so we can assist if support is needed. If you upgrade outside of support hours, please submit a ticket and we will assist when we are back online. diff --git a/docs/connector-development/cdk-python/README.md b/docs/connector-development/cdk-python/README.md index 3809d26ee331..f0eb2387c8b0 100644 --- a/docs/connector-development/cdk-python/README.md +++ b/docs/connector-development/cdk-python/README.md @@ -74,7 +74,7 @@ You can find a complete tutorial for implementing an HTTP source connector in [t **Simple Python connectors using the barebones `Source` abstraction**: -- [Google Sheets](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py) +- [Google Sheets](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py) - [Mailchimp](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py) ## Contributing diff --git a/docs/connector-development/connector-builder-ui/incremental-sync.md b/docs/connector-development/connector-builder-ui/incremental-sync.md index 5801267fea9d..0a4db2bc7a54 100644 --- a/docs/connector-development/connector-builder-ui/incremental-sync.md +++ b/docs/connector-development/connector-builder-ui/incremental-sync.md @@ -12,7 +12,7 @@ To use incremental syncs, the API endpoint needs to fullfil the following requir - If the record's cursor field is nested, you can use an "Add Field" transformation to copy it to the top-level, and a Remove Field to remove it from the object. This will effectively move the field to the top-level of the record - It's possible to filter/request records by the cursor field -The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/understanding-airbyte/connections/incremental-append/) and [Incremental Sync - Append + Deduped](/understanding-airbyte/connections/incremental-append-deduped) pages. +The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/using-airbyte/core-concepts/sync-modes/incremental-append/) and [Incremental Sync - Append + Deduped](/using-airbyte/core-concepts/sync-modes/incremental-append-deduped) pages. ## Configuration @@ -132,7 +132,7 @@ Some APIs update records over time but do not allow to filter or search by modif In these cases, there are two options: -- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/understanding-airbyte/connections/incremental-append-deduped) +- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/using-airbyte/core-concepts/sync-modes/incremental-append-deduped) - **Configure the "Lookback window"** to not only sync exclusively new records, but resync some portion of records before the cutoff date to catch changes that were made to existing records, trading off data consistency and the amount of synced records. In the case of the API of The Guardian, news articles tend to only be updated for a few days after the initial release date, so this strategy should be able to catch most updates without having to resync all articles. Reiterating the example from above with a "Lookback window" of 2 days configured, let's assume the last encountered article looked like this: diff --git a/docs/connector-development/connector-builder-ui/record-processing.mdx b/docs/connector-development/connector-builder-ui/record-processing.mdx index d5ac0dbb88de..41a57d2351a9 100644 --- a/docs/connector-development/connector-builder-ui/record-processing.mdx +++ b/docs/connector-development/connector-builder-ui/record-processing.mdx @@ -321,7 +321,7 @@ Besides bringing the records in the right shape, it's important to communicate s ### Primary key -The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/understanding-airbyte/connections/incremental-append-deduped)). +The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/using-airbyte/core-concepts/sync-modes/incremental-append-deduped)). In a lot of cases, like for the EmailOctopus example from above, there is a dedicated id field that can be used for this purpose. It's important that the value of the id field is guaranteed to only occur once for a single record. diff --git a/docs/connector-development/tutorials/adding-incremental-sync.md b/docs/connector-development/tutorials/adding-incremental-sync.md index 992c9d9ed4b5..b463503a795b 100644 --- a/docs/connector-development/tutorials/adding-incremental-sync.md +++ b/docs/connector-development/tutorials/adding-incremental-sync.md @@ -2,7 +2,7 @@ ## Overview -This tutorial will assume that you already have a working source. If you do not, feel free to refer to the [Building a Toy Connector](building-a-python-source.md) tutorial. This tutorial will build directly off the example from that article. We will also assume that you have a basic understanding of how Airbyte's Incremental-Append replication strategy works. We have a brief explanation of it [here](../../understanding-airbyte/connections/incremental-append.md). +This tutorial will assume that you already have a working source. If you do not, feel free to refer to the [Building a Toy Connector](building-a-python-source.md) tutorial. This tutorial will build directly off the example from that article. We will also assume that you have a basic understanding of how Airbyte's Incremental-Append replication strategy works. We have a brief explanation of it [here](/using-airbyte/core-concepts/sync-modes/incremental-append.md). ## Update Catalog in `discover` @@ -293,6 +293,6 @@ Bonus points: go to Airbyte UI and reconfigure the connection to use incremental Incremental definitely requires more configurability than full refresh, so your implementation may deviate slightly depending on whether your cursor field is source defined or user-defined. If you think you are running into one of those cases, check out -our [incremental](../../understanding-airbyte/connections/incremental-append.md) documentation for more information on different types of +our [incremental](/using-airbyte/core-concepts/sync-modes/incremental-append.md) documentation for more information on different types of configuration. diff --git a/docs/connector-development/tutorials/build-a-connector-the-hard-way.md b/docs/connector-development/tutorials/build-a-connector-the-hard-way.md index fe2ea339bd51..9fb9a71aac70 100644 --- a/docs/connector-development/tutorials/build-a-connector-the-hard-way.md +++ b/docs/connector-development/tutorials/build-a-connector-the-hard-way.md @@ -57,7 +57,7 @@ Here's the outline of what we'll do to build our connector: Once we've completed the above steps, we will have built a functioning connector. Then, we'll add some optional functionality: -- Support [incremental sync](../../understanding-airbyte/connections/incremental-append.md) +- Support [incremental sync](/using-airbyte/core-concepts/sync-modes/incremental-append.md) - Add custom integration tests ### 1. Bootstrap the connector package diff --git a/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md b/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md index 711880cb0460..8cdee893e5ab 100644 --- a/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md +++ b/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md @@ -132,7 +132,7 @@ To add incremental sync, we'll do a few things: 6. Update the `path` method to specify the date to pull exchange rates for. 7. Update the configured catalog to use `incremental` sync when we're testing the stream. -We'll describe what each of these methods do below. Before we begin, it may help to familiarize yourself with how incremental sync works in Airbyte by reading the [docs on incremental](../../../understanding-airbyte/connections/incremental-append.md). +We'll describe what each of these methods do below. Before we begin, it may help to familiarize yourself with how incremental sync works in Airbyte by reading the [docs on incremental](/using-airbyte/core-concepts/sync-modes/incremental-append.md). To keep things concise, we'll only show functions as we edit them one by one. diff --git a/docs/contributing-to-airbyte/README.md b/docs/contributing-to-airbyte/README.md index e2b9669e46ea..6683cd77fbb5 100644 --- a/docs/contributing-to-airbyte/README.md +++ b/docs/contributing-to-airbyte/README.md @@ -8,7 +8,7 @@ Thank you for your interest in contributing! We love community contributions. Read on to learn how to contribute to Airbyte. We appreciate first time contributors and we are happy to assist you in getting started. In case of questions, just reach out to us via [email](mailto:hey@airbyte.io) or [Slack](https://slack.airbyte.io)! -Before getting started, please review Airbyte's Code of Conduct. Everyone interacting in Slack, codebases, mailing lists, events, or other Airbyte activities is expected to follow [Code of Conduct](../project-overview/code-of-conduct.md). +Before getting started, please review Airbyte's Code of Conduct. Everyone interacting in Slack, codebases, mailing lists, events, or other Airbyte activities is expected to follow [Code of Conduct](../community/code-of-conduct.md). ## Code Contributions diff --git a/docs/contributing-to-airbyte/writing-docs.md b/docs/contributing-to-airbyte/writing-docs.md index 6e8e0b21081d..a0621d10e9e1 100644 --- a/docs/contributing-to-airbyte/writing-docs.md +++ b/docs/contributing-to-airbyte/writing-docs.md @@ -13,7 +13,7 @@ The Docs team maintains a list of [#good-first-issues](https://github.com/airbyt ## Contributing to Airbyte docs -Before contributing to Airbyte docs, read the Airbyte Community [Code of Conduct](../project-overview/code-of-conduct.md). +Before contributing to Airbyte docs, read the Airbyte Community [Code of Conduct](../community/code-of-conduct.md). :::tip If you're new to GitHub and Markdown, complete [the First Contributions tutorial](https://github.com/firstcontributions/first-contributions) and learn [Markdown basics](https://guides.github.com/features/mastering-markdown/) before contributing to Airbyte documentation. Even if you're familiar with the basics, you may be interested in Airbyte's [custom markdown extensions for connector docs](#custom-markdown-extensions-for-connector-docs). @@ -276,16 +276,7 @@ Eagle-eyed readers may note that _all_ markdown should support this feature sinc ### Adding a redirect -To add a redirect, open the [`docusaurus.config.js`](https://github.com/airbytehq/airbyte/blob/master/docusaurus/docusaurus.config.js#L22) file and locate the following commented section: - -```js -// { -// from: '/some-lame-path', -// to: '/a-much-cooler-uri', -// }, -``` - -Copy this section, replace the values, and [test the changes locally](#editing-on-your-local-machine) by going to the path you created a redirect for and verify that the address changes to the new one. +To add a redirect, open the [`docusaurus/redirects.yml`](https://github.com/airbytehq/airbyte/blob/master/docusaurus/redirects.yml) file and add an entry from which old path to which new path a redirect should happen. :::note Your path **needs** a leading slash `/` to work diff --git a/docs/deploying-airbyte/README.md b/docs/deploying-airbyte/README.md deleted file mode 100644 index 2f8a6e290a36..000000000000 --- a/docs/deploying-airbyte/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Deploy Airbyte where you want to - -![not all who wander are lost](https://user-images.githubusercontent.com/2591516/170351002-0d054d06-c901-4794-8719-97569060408f.png) - -- [Local Deployment](local-deployment.md) -- [On Airbyte Cloud](on-cloud.md) -- [On Aws](on-aws-ec2.md) -- [On Azure VM Cloud Shell](on-azure-vm-cloud-shell.md) -- [On Digital Ocean Droplet](on-digitalocean-droplet.md) -- [On GCP.md](on-gcp-compute-engine.md) -- [On Kubernetes](on-kubernetes-via-helm.md) -- [On OCI VM](on-oci-vm.md) -- [On Restack](on-restack.md) -- [On Plural](on-plural.md) -- [On AWS ECS (spoiler alert: it doesn't work)](on-aws-ecs.md) diff --git a/docs/deploying-airbyte/local-deployment.md b/docs/deploying-airbyte/local-deployment.md index ff94ad68c885..d3247a86668c 100644 --- a/docs/deploying-airbyte/local-deployment.md +++ b/docs/deploying-airbyte/local-deployment.md @@ -21,8 +21,8 @@ cd airbyte ./run-ab-platform.sh ``` -- In your browser, just visit [http://localhost:8000](http://localhost:8000) -- You will be asked for a username and password. By default, that's username `airbyte` and password `password`. Once you deploy Airbyte to your servers, be sure to change these: +- In your browser, visit [http://localhost:8000](http://localhost:8000) +- You will be asked for a username and password. By default, that's username `airbyte` and password `password`. Once you deploy Airbyte to your servers, be sure to change these in your `.env` file: ```yaml # Proxy Configuration @@ -66,5 +66,11 @@ bash run-ab-platform.sh - Start moving some data! ## Troubleshooting +If you have any questions about the local setup and deployment process, head over to our [Getting Started FAQ](https://github.com/airbytehq/airbyte/discussions/categories/questions) on our Airbyte Forum that answers the following questions and more: + +- How long does it take to set up Airbyte? +- Where can I see my data once I've run a sync? +- Can I set a start time for my sync? -If you encounter any issues, just connect to our [Slack](https://slack.airbyte.io). Our community will help! We also have a [troubleshooting](../troubleshooting.md) section in our docs for common problems. +If you encounter any issues, check out [Getting Support](/community/getting-support) documentation +for options how to get in touch with the community or us. diff --git a/docs/deploying-airbyte/on-kubernetes-via-helm.md b/docs/deploying-airbyte/on-kubernetes-via-helm.md index 12f192db3174..818dec3f78f5 100644 --- a/docs/deploying-airbyte/on-kubernetes-via-helm.md +++ b/docs/deploying-airbyte/on-kubernetes-via-helm.md @@ -122,41 +122,6 @@ After specifying your own configuration, run the following command: helm install --values path/to/values.yaml %release_name% airbyte/airbyte ``` -### (Early Access) Airbyte Enterprise deployment - -[Airbyte Enterprise](/airbyte-enterprise) is in an early access stage for select priority users. Once you [are qualified for an Airbyte Enterprise license key](https://airbyte.com/company/talk-to-sales), you can install Airbyte Enterprise via helm by following these steps: - -1. Checkout the latest revision of the [airbyte-platform repository](https://github.com/airbytehq/airbyte-platform) - -2. Add your Airbyte Enterprise license key and [auth configuration details](/airbyte-enterprise#single-sign-on-sso) to a file called `airbyte.yml` in the `configs` directory of `airbyte-platform`. You can copy `airbyte.sample.yml` to use as a template: - -```sh -cp configs/airbyte.sample.yml configs/airbyte.yml -``` - -Then, open up `airbyte.yml` in your text editor to fill in the indicated fields. - -:::caution - -For now, auth configurations aren't easy to modify once initially installed, so please double check them to make sure they're accurate before proceeding! This will be improved in the near future. - -::: - -3. Make sure your helm repository is up to date: - -```text -helm repo update -``` - -4. Install Airbyte Enterprise on helm using the following command: - -```text -./tools/bin/install_airbyte_pro_on_helm.sh -``` - -The default release name is `airbyte-pro`. You can change this via the `RELEASE_NAME` environment -variable. - ## Migrate from old charts to new ones Starting from `0.39.37-alpha` we've revisited helm charts structure and separated all components of airbyte into their own independent charts, thus by allowing our developers to test single component without deploying airbyte as a whole and by upgrading single component at a time. diff --git a/docs/project-overview/licenses/README.md b/docs/developer-guides/licenses/README.md similarity index 100% rename from docs/project-overview/licenses/README.md rename to docs/developer-guides/licenses/README.md diff --git a/docs/project-overview/licenses/elv2-license.md b/docs/developer-guides/licenses/elv2-license.md similarity index 100% rename from docs/project-overview/licenses/elv2-license.md rename to docs/developer-guides/licenses/elv2-license.md diff --git a/docs/project-overview/licenses/examples.md b/docs/developer-guides/licenses/examples.md similarity index 100% rename from docs/project-overview/licenses/examples.md rename to docs/developer-guides/licenses/examples.md diff --git a/docs/project-overview/licenses/license-faq.md b/docs/developer-guides/licenses/license-faq.md similarity index 86% rename from docs/project-overview/licenses/license-faq.md rename to docs/developer-guides/licenses/license-faq.md index 837ae5a5fd3d..6865094e4ba4 100644 --- a/docs/project-overview/licenses/license-faq.md +++ b/docs/developer-guides/licenses/license-faq.md @@ -1,16 +1,19 @@ # License FAQ ## Airbyte Licensing Overview -* **Airbyte Connectors** are open sourced and available under the MIT License. -* **Airbyte Protocol** is open sourced and available under the MIT License. -* **Airbyte CDK** (Connector Development Kit) is open sourced and available under the MIT License. -* **Airbyte Core** is licensed under the Elastic License 2.0 (ELv2). -* **Airbyte Cloud & Airbyte Enterprise** are both closed source and require a commercial license from Airbyte. + +- **Airbyte Connectors** are open sourced and available under the [MIT](https://opensource.org/license/mit/) or [Elastic License 2.0 (ELv2)](https://www.elastic.co/licensing/elastic-license/faq) License. Each connector's `metadata.yaml` file contains more information. +- **Airbyte Protocol** is open sourced and available under the MIT License. +- **Airbyte CDK** (Connector Development Kit) is open sourced and available under the MIT License. +- **Airbyte Core** is licensed under the Elastic License 2.0 (ELv2). +- **Airbyte Cloud & Airbyte Enterprise** are both closed source and require a commercial license from Airbyte. ![Diagram of license structure](../../.gitbook/assets/license_faq_diagram.png) ## About Elastic License 2.0 (ELv2) + ELv2 is a simple, non-copyleft license, allowing for the right to “use, copy, distribute, make available, and prepare derivative works of the software”. Anyone can use Airbyte, free of charge. You can run the software at scale on your infrastructure. There are only three high-level limitations. You cannot: + 1. Provide the products to others as a managed service ([read more](#what-is-the-managed-service-use-case-that-is-not-allowed-under-elv2)); 2. Circumvent the license key functionality or remove/obscure features protected by license keys; or 3. Remove or obscure any licensing, copyright, or other notices. @@ -20,60 +23,75 @@ In case you want to work with Airbyte without these limitations, we offer altern [View License](elv2-license.md) ## FAQ + ### What limitations does ELv2 impose on my use of Airbyte? + If you are an Airbyte Cloud customer, nothing changes for you. For open-source users, everyone can continue to use Airbyte as they are doing today: no limitations on volume, number of users, number of connections… There are only a few high-level limitations. You cannot: + 1. Provide the products to others as a managed service. For example, you cannot sell a cloud service that provides users with direct access to Airbyte. You can sell access to applications built and run using Airbyte ([read more](#what-is-the-managed-service-use-case-that-is-not-allowed-under-elv2)). 2. Circumvent the license key functionality or remove/obscure features protected by license keys. For example, our code may contain watermarks or keys to unlock proprietary functionality. Those elements of our code will be marked in our source code. You can’t remove or change them. ### Why did Airbyte adopt ELv2? + We are releasing Airbyte Cloud, a managed version of Airbyte that will offer alternatives to how our users operate Airbyte, including additional features and new execution models. We want to find a great way to execute our mission to commoditize data integration with open source and our ambition to create a sustainable business. -ELv2 gives us the best of both worlds. +ELv2 gives us the best of both worlds. On one hand, our users can continue to use Airbyte freely, and on the other hand, we can safely create a sustainable business and continue to invest in our community, project and product. We don’t have to worry about other large companies taking the product to monetize it for themselves, thus hurting our community. ### Will Airbyte connectors continue to be open source? + Our own connectors remain open-source, and our contributors can also develop their own connectors and continue to choose whichever license they prefer. This is our way to accomplish Airbyte’s vision of commoditizing data integration: access to data shouldn’t be behind a paywall. Also, we want Airbyte’s licensing to work well with applications that are integrated using connectors. We are continuously investing in Airbyte's data protocol and all the tooling around it. The Connector Development Kit (CDK), which helps our community and our team build and maintain connectors at scale, is a cornerstone of our commoditization strategy and also remains open-source. ### How do I continue to contribute to Airbyte under ELv2? + Airbyte’s projects are available here. Anyone can contribute to any of these projects (including those licensed with ELv2). We are introducing a Contributor License Agreement that you will have to sign with your first contribution. ### When will ELv2 be effective? + ELv2 will apply from the following Airbyte core version as of September 27, 2021: version 0.30.0. ### What is the “managed service” use case that is not allowed under ELv2? -We chose ELv2 because it is very permissive with what you can do with the software. + +We chose ELv2 because it is very permissive with what you can do with the software. You can basically build ANY product on top of Airbyte as long as you don’t: -* Host Airbyte yourself and sell it as an ELT/ETL tool, or a replacement for the Airbyte solution. -* Sell a product that directly exposes Airbyte’s UI or API. + +- Host Airbyte yourself and sell it as an ELT/ETL tool, or a replacement for the Airbyte solution. +- Sell a product that directly exposes Airbyte’s UI or API. Here is a non-exhaustive list of what you can do (without providing your customers direct access to Airbyte functionality): -* I am creating an analytics platform and I want to use Airbyte to bring data in on behalf of my customers. -* I am building my internal data stack and I want my team to be able to interact with Airbyte to configure the pipelines through the UI or the API. -* ... + +- I am creating an analytics platform and I want to use Airbyte to bring data in on behalf of my customers. +- I am building my internal data stack and I want my team to be able to interact with Airbyte to configure the pipelines through the UI or the API. +- ... ### My company has a policy against using code that restricts commercial use – can I still use Airbyte under ELv2? -You can use software under ELv2 for your commercial business, you simply cannot offer it as a managed service. + +You can use software under ELv2 for your commercial business, you simply cannot offer it as a managed service. ### As a Data Agency, I currently use Airbyte to fulfill my customer needs. How does ELv2 affect me? + You can continue to use Airbyte, as long as you don’t offer it as a managed service. ### I started to use Airbyte to ingest my customer’s data. What should I do? + You can continue to use Airbyte, as long as you don’t offer it as a managed service. ### Can I customize ELv2 software? + Yes, you can customize ELv2 software. ELv2 is similar in this sense to permissive open-source licenses. You can modify the software, integrate the variant into your application, and operate the modified application, as long as you don’t go against any of the limitations. ### Why didn’t you use a closed-source license for Airbyte Core? + We want to provide developers with free access to our Airbyte Core source code — including rights to modify it. Since this wouldn’t be possible with a closed-source license, we decided to use the more permissive ELv2. ### Is there any revenue sharing for those who create Airbyte connectors? -We will be introducing a new participative model in the next few months. There are still a lot of details to figure out, but the general idea is that maintainers of connectors would have the option to obtain a share of revenue when the connectors are being used in the paid version of Airbyte. In exchange, maintainers would be responsible for SLAs, new features, and bug fixes for the said connector. +We will be introducing a new participative model in the next few months. There are still a lot of details to figure out, but the general idea is that maintainers of connectors would have the option to obtain a share of revenue when the connectors are being used in the paid version of Airbyte. In exchange, maintainers would be responsible for SLAs, new features, and bug fixes for the said connector. diff --git a/docs/project-overview/licenses/mit-license.md b/docs/developer-guides/licenses/mit-license.md similarity index 100% rename from docs/project-overview/licenses/mit-license.md rename to docs/developer-guides/licenses/mit-license.md diff --git a/docs/enterprise-setup/README.md b/docs/enterprise-setup/README.md new file mode 100644 index 000000000000..9bb1a95450fa --- /dev/null +++ b/docs/enterprise-setup/README.md @@ -0,0 +1,17 @@ +# Airbyte Enterprise + +[Airbyte Enterprise](https://airbyte.com/product/airbyte-enterprise) is the best way to run Airbyte yourself. You get all 300+ pre-built connectors, data never leaves your environment, and Airbyte becomes self-serve in your organization with new tools to manage multiple users, and multiple teams using Airbyte all in one place. + +A valid license key is required to get started with Airbyte Enterprise. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. + +The following pages outline how to: +1. [Deploy Airbyte Enterprise using Kubernetes](./implementation-guide.md) +2. [Configure Okta for Single Sign-On (SSO) with Airbyte Enterprise](./sso.md) + +| Feature | Description | +|---------------------------|--------------------------------------------------------------------------------------------------------------| +| Premium Support | [Priority assistance](https://docs.airbyte.com/operator-guides/contact-support/#airbyte-enterprise-self-hosted-support) with deploying, managing and upgrading Airbyte or troubleshooting any connection issues. | +| User Management | [Okta SSO](./sso.md) to extend each Airbyte workspace to multiple users | +| Multiple Workspaces | Ability to create + manage multiple workspaces on one Airbyte instance | +| Role-Based Access Control | Isolate workspaces from one another with users roles scoped to individual workspaces | + diff --git a/docs/assets/docs/okta-app-integration-name.png b/docs/enterprise-setup/assets/okta-app-integration-name.png similarity index 100% rename from docs/assets/docs/okta-app-integration-name.png rename to docs/enterprise-setup/assets/okta-app-integration-name.png diff --git a/docs/assets/docs/okta-create-new-app-integration.png b/docs/enterprise-setup/assets/okta-create-new-app-integration.png similarity index 100% rename from docs/assets/docs/okta-create-new-app-integration.png rename to docs/enterprise-setup/assets/okta-create-new-app-integration.png diff --git a/docs/assets/docs/okta-login-redirect-uris.png b/docs/enterprise-setup/assets/okta-login-redirect-uris.png similarity index 100% rename from docs/assets/docs/okta-login-redirect-uris.png rename to docs/enterprise-setup/assets/okta-login-redirect-uris.png diff --git a/docs/enterprise-setup/implementation-guide.md b/docs/enterprise-setup/implementation-guide.md new file mode 100644 index 000000000000..6affccf7709d --- /dev/null +++ b/docs/enterprise-setup/implementation-guide.md @@ -0,0 +1,103 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Implementation Guide + +[Airbyte Enterprise](./README.md) is in an early access stage for select priority users. Once you [are qualified for an Airbyte Enterprise license key](https://airbyte.com/company/talk-to-sales), you can deploy Airbyte with the following instructions. + +Airbyte Enterprise must be deployed using Kubernetes. This is to enable Airbyte's best performance and scale. The core components \(api server, scheduler, etc\) run as deployments while the scheduler launches connector-related pods on different nodes. + +## Prerequisites + +There are three prerequisites to deploying Enterprise: installing [helm](https://helm.sh/docs/intro/install/), a Kubernetes cluster, and having configured `kubectl` to connect to the cluster. + +For production, we recommend deploying to EKS, GKE or AKS. If you are doing some local testing, follow the cluster setup instructions outlined [here](/deploying-airbyte/on-kubernetes-via-helm.md#cluster-setup). + +To install `kubectl`, please follow [these instructions](https://kubernetes.io/docs/tasks/tools/). To configure `kubectl` to connect to your cluster by using `kubectl use-context my-cluster-name`, see the following: + +
    + Configure kubectl to connect to your cluster + + +
      +
    1. Configure gcloud with gcloud auth login.
    2. +
    3. On the Google Cloud Console, the cluster page will have a "Connect" button, with a command to run locally: gcloud container clusters get-credentials $CLUSTER_NAME --zone $ZONE_NAME --project $PROJECT_NAME
    4. +
    5. Use kubectl config get-contexts to show the contexts available.
    6. +
    7. Run kubectl config use-context $GKE_CONTEXT to access the cluster from kubectl.
    8. +
    +
    + +
      +
    1. Configure your AWS CLI to connect to your project.
    2. +
    3. Install eksctl.
    4. +
    5. Run eksctl utils write-kubeconfig --cluster=$CLUSTER_NAME to make the context available to kubectl.
    6. +
    7. Use kubectl config get-contexts to show the contexts available.
    8. +
    9. Run kubectl config use-context $EKS_CONTEXT to access the cluster with kubectl.
    10. +
    +
    +
    +
    + +## Deploy Airbyte Enterprise + +### Add Airbyte Helm Repository + +Follow these instructions to add the Airbyte helm repository: +1. Run `helm repo add airbyte https://airbytehq.github.io/helm-charts`, where `airbyte` is the name of the repository that will be indexed locally. +2. Perform the repo indexing process, and ensure your helm repository is up-to-date by running `helm repo update`. +3. You can then browse all charts uploaded to your repository by running `helm search repo airbyte`. + +### Clone & Configure Airbyte + + +1. `git clone` the latest revision of the [airbyte-platform repository](https://github.com/airbytehq/airbyte-platform) + +2. Create a new `airbyte.yml` file in the `configs` directory of the `airbyte-platform` folder. You may also copy `airbyte.sample.yml` to use as a template: + +```sh +cp configs/airbyte.sample.yml configs/airbyte.yml +``` + +3. Add your Airbyte Enterprise license key to your `airbyte.yml`. + +4. Add your [auth details](/enterprise-setup/sso) to your `airbyte.yml`. Auth configurations aren't easy to modify after Airbyte is installed, so please double check them to make sure they're accurate before proceeding. + +
    + Configuring auth in your airbyte.yml file + +To configure SSO with Okta, add the following at the end of your `airbyte.yml` file: + +``` +auth: + identity-providers: + - type: okta + domain: $OKTA_DOMAIN + app-name: $OKTA_APP_INTEGRATION_NAME + client-id: $OKTA_CLIENT_ID + client-secret: $OKTA_CLIENT_SECRET +``` + +To configure basic auth (deploy without SSO), remove the entire `auth:` section from your airbyte.yml config file. You will authenticate with the instance admin user and password included in the your `airbyte.yml`. + +
    + +### Install Airbyte Enterprise + +Install Airbyte Enterprise on helm using the following command: + +```text +./tools/bin/install_airbyte_pro_on_helm.sh +``` + +The default release name is `airbyte-pro`. You can change this via the `RELEASE_NAME` environment +variable. + +### Customizing your Airbyte Enterprise Deployment + +In order to customize your deployment, you need to create `values.yaml` file in a local folder and populate it with default configuration override values. A `values.yaml` example can be located in [charts/airbyte](https://github.com/airbytehq/airbyte-platform/blob/main/charts/airbyte/values.yaml) folder of the Airbyte repository. + +After specifying your own configuration, run the following command: + +```text +./tools/bin/install_airbyte_pro_on_helm.sh --values path/to/values.yaml $RELEASE_NAME airbyte/airbyte +``` diff --git a/docs/airbyte-enterprise.md b/docs/enterprise-setup/sso.md similarity index 56% rename from docs/airbyte-enterprise.md rename to docs/enterprise-setup/sso.md index d717cd41d3ef..8aede3304284 100644 --- a/docs/airbyte-enterprise.md +++ b/docs/enterprise-setup/sso.md @@ -1,14 +1,12 @@ -# Airbyte Enterprise +# Using Single Sign-On (SSO) -[Airbyte Enterprise](https://airbyte.com/solutions/airbyte-enterprise) is a self-managed version of Airbyte with additional features for enterprise customers. Airbyte Enterprise is in an early access stage for select priority users. A valid license key is required to get started with Airbyte Enterprise. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. +Leverage your existing identity provider to enable employees to access your Airbyte instance using their corporate credentials, simplifying user provisioning. Enabling Single Sign-On extends Airbyte Self Managed to support multiple users, and multiple teams all on one instance. -The following instructions outline how to: -1. Configure Okta for Single Sign-On (SSO) with Airbyte Enterprise -2. Deploy Airbyte Enterprise using Kubernetes (License Key Required) +Airbyte Self Managed currently supports SSO via OIDC with [Okta](https://www.okta.com/) as an IdP. Support for Azure Active Directory and connecting via SAML are both coming soon. Please talk to us to learn more about upcoming [enterprise features](https://airbyte.com/company/talk-to-sales). -## Single Sign-On (SSO) - -Airbyte Enterprise supports Single Sign-On, allowing an organization to manage user access to their Airbyte Enterprise instance through the configuration of an Identity Provider (IdP). Airbyte Enterprise currently supports SSO via OIDC with [Okta](https://www.okta.com/) as an IdP. +The following instructions walk you through: +1. [Setting up the Okta OIDC App Integration to be used by your Airbyte instance](#setting-up-okta-for-sso) +2. [Configuring Airbyte Enterprise to use SSO](#deploying-airbyte-enterprise-with-okta) ### Setting up Okta for SSO @@ -16,13 +14,13 @@ You will need to create a new Okta OIDC App Integration for your Airbyte instanc You should create an app integration with **OIDC - OpenID Connect** as the sign-in method and **Web Application** as the application type: -![Screenshot of Okta app integration creation modal](./assets/docs/okta-create-new-app-integration.png) +![Screenshot of Okta app integration creation modal](./assets/okta-create-new-app-integration.png) #### App integration name Please choose a URL-friendly app integraiton name without spaces or special characters, such as `my-airbyte-app`: -![Screenshot of Okta app integration name](./assets/docs/okta-app-integration-name.png) +![Screenshot of Okta app integration name](./assets/okta-app-integration-name.png) Spaces or special characters in this field could result in invalid redirect URIs. @@ -42,7 +40,7 @@ Sign-out redirect URIs /auth/realms/airbyte/broker//endpoint/logout_response ``` -![Okta app integration name screenshot](./assets/docs/okta-login-redirect-uris.png) +![Okta app integration name screenshot](./assets/okta-login-redirect-uris.png) _Example values_ @@ -57,4 +55,4 @@ Once your Okta app is set up, you're ready to deploy Airbyte with SSO. Take note - Client ID - Client Secret -Visit [Airbyte Enterprise deployment](/deploying-airbyte/on-kubernetes-via-helm#early-access-airbyte-enterprise-deployment) for instructions on how to deploy Airbyte Enterprise using `kubernetes`, `kubectl` and `helm`. +Visit the [implementation guide](./implementation-guide.md) for instructions on how to deploy Airbyte Enterprise using `kubernetes`, `kubectl` and `helm`. diff --git a/docs/integrations/README.md b/docs/integrations/README.md index fef3f532394d..fe41578bacf5 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -10,7 +10,7 @@ Airbyte uses a two tiered system for connectors to help you understand what to e **Community**: A community connector is maintained by the Airbyte community until it becomes Certified. Airbyte has over 800 code contributors and 15,000 people in the Slack community to help. The Airbyte team is continually certifying Community connectors as usage grows. As these connectors are not maintained by Airbyte, we do not offer support SLAs around them, and we encourage caution when using them in production. -For more information about the system, see [Product Support Levels](https://docs.airbyte.com/project-overview/product-support-levels) +For more information about the system, see [Connector Support Levels](./connector-support-levels.md) _[View the connector registries in full](https://connectors.airbyte.com/files/generated_reports/connector_registry_report.html)_ diff --git a/docs/integrations/connector-support-levels.md b/docs/integrations/connector-support-levels.md new file mode 100644 index 000000000000..e684c1292b7c --- /dev/null +++ b/docs/integrations/connector-support-levels.md @@ -0,0 +1,39 @@ +# Connector Support Levels + +The following table describes the support levels of Airbyte connectors. + +| | Certified | Community | Custom | +| ------------------------------------ | ----------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Availability** | Available to all users | Available to all users | Available to all users | +| **Who builds them?** | Either the community or the Airbyte team. | Typically they are built by the community. The Airbyte team may upgrade them to Certified at any time. | Anyone can build custom connectors. We recommend using our [Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview) or [Low-code CDK](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview). | +| **Who maintains them?** | The Airbyte team | Users | Users | +| **Production Readiness** | Guaranteed by Airbyte | Not guaranteed | Not guaranteed | +| **Support: Cloud** | Supported* | No Support | Supported** | +| **Support: Powered by Airbyte** | Supported* | No Support | Supported** | +| **Support: Self-Managed Enterprise** | Supported* | No Support | Supported** | +| **Support: Community (OSS)** | Slack Support only | No Support | Slack Support only | + +\*For Certified connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. Otherwise, please use our support portal and we will address your issues as soon as possible. + +\*\*For Custom connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. This support is provided with best efforts, and maintenance/upgrades are owned by the customer. + +## Certified + +A **Certified** connector is actively maintained and supported by the Airbyte team and maintains a high quality bar. It is production ready. + +### What you should know about Certified connectors: + +- Certified connectors are available to all users. +- These connectors have been tested and vetted in order to be certified and are production ready. +- Certified connectors should go through minimal breaking change but in the event an upgrade is needed users will be given an adequate upgrade window. + +## Community + +A **Community** connector is maintained by the Airbyte community until it becomes Certified. Airbyte has over 800 code contributors and 15,000 people in the Slack community to help. The Airbyte team is continually certifying Community connectors as usage grows. As these connectors are not maintained by Airbyte, we do not offer support SLAs around them, and we encourage caution when using them in production. + +### What you should know about Community connectors: + +- Community connectors are available to all users. +- Community connectors may be upgraded to Certified at any time, and we will notify users of these upgrades via our Slack Community and in our Connector Catalog. +- Community connectors might not be feature-complete (features planned for release are under development or not prioritized) and may include backward-incompatible/breaking API changes with no or short notice. +- Community connectors have no Support SLAs. diff --git a/docs/integrations/destinations/chroma.md b/docs/integrations/destinations/chroma.md index bd7357ffc70f..f99a9cf869a5 100644 --- a/docs/integrations/destinations/chroma.md +++ b/docs/integrations/destinations/chroma.md @@ -17,7 +17,7 @@ Only one stream will exist to collect data from all source streams. This will be For each record, a UUID string is generated and used as the document id. The embeddings generated as defined will be stored as embeddings. Data in the text fields will be stored as documents and those in the metadata fields will be stored as metadata. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) You can connect to a Chroma instance either in client/server mode or in a local persistent mode. For the local persistent mode, the database file will be saved in the path defined in the `path` config parameter. Note that `path` must be an absolute path, prefixed with `/local`. @@ -56,6 +56,7 @@ Make sure your Chroma database can be accessed by Airbyte. If your database is w You should now have all the requirements needed to configure Chroma as a destination in the UI. You'll need the following information to configure the Chroma destination: - (Required) **Text fields to embed** +- (Optional) **Text splitter** Options around configuring the chunking process provided by the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). - (Required) **Fields to store as metadata** - (Required) **Collection** The name of the collection in Chroma db to store your data - (Required) Authentication method diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md index 75da81407f48..02446ba825f6 100644 --- a/docs/integrations/destinations/clickhouse.md +++ b/docs/integrations/destinations/clickhouse.md @@ -21,7 +21,7 @@ Each stream will be output into its own table in ClickHouse. Each table will con Airbyte Cloud only supports connecting to your ClickHouse instance with SSL or TLS encryption, which is supported by [ClickHouse JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc). -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/destinations/csv.md b/docs/integrations/destinations/csv.md index 4cc00f440c79..223c618b8f8b 100644 --- a/docs/integrations/destinations/csv.md +++ b/docs/integrations/destinations/csv.md @@ -69,7 +69,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path}/{filename}.csv . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. ## Changelog diff --git a/docs/integrations/destinations/databend.md b/docs/integrations/destinations/databend.md index e25a80f7ec88..444a47473a6d 100644 --- a/docs/integrations/destinations/databend.md +++ b/docs/integrations/destinations/databend.md @@ -20,7 +20,7 @@ Each stream will be output into its own table in Databend. Each table will conta ## Getting Started (Airbyte Cloud) Coming soon... -## Getting Started (Airbyte Open-Source) +## Getting Started (Airbyte Open Source) You can follow the [Connecting to a Warehouse docs](https://docs.databend.com/using-databend-cloud/warehouses/connecting-a-warehouse) to get the user, password, host etc. Or you can create such a user by running: diff --git a/docs/integrations/destinations/duckdb.md b/docs/integrations/destinations/duckdb.md index fa87f65038b9..078006e75f54 100644 --- a/docs/integrations/destinations/duckdb.md +++ b/docs/integrations/destinations/duckdb.md @@ -98,7 +98,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path} . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. diff --git a/docs/integrations/destinations/firestore.md b/docs/integrations/destinations/firestore.md index c82a9f12068e..94a6002a70c4 100644 --- a/docs/integrations/destinations/firestore.md +++ b/docs/integrations/destinations/firestore.md @@ -1,6 +1,35 @@ # Firestore -The Firestore destination for Airbyte +This destination writes data to Google Firestore. + +Google Firestore, officially known as Cloud Firestore, is a flexible, scalable database for mobile, web, and server development from Firebase and Google Cloud. It is commonly used for developing applications as a NoSQL database that provides real-time data syncing across user devices. + +## Getting started + +### Requiremnets + +- An existing GCP project +- A role with permissions to create a Service Account Key in GCP + +### Step 1: Create a Service Account +1. Log in to the Google Cloud Console. Select the project where your Firestore database is located. +2. Navigate to "IAM & Admin" and select "Service Accounts". Create a Service Account and assign appropriate roles. Ensure “Cloud Datastore User” or “Firebase Rules System” are enabled. +3. Navigate to the service account and generate the JSON key. Download and copy the contents to the configuration. + +## Sync overview + +### Output schema + +Each stream will be output into a BigQuery table. + +#### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | ✅ | | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ✅ | | +| Namespaces | ✅ | | ## Changelog diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index df8405a3448d..f272b77a9d6c 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -13,7 +13,7 @@ The Airbyte GCS destination allows you to sync data to cloud storage buckets. Ea | Feature | Support | Notes | | :----------------------------- | :-----: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/understanding-airbyte/connections/incremental-append#inclusive-cursors) | +| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/using-airbyte/core-concepts/sync-modes/incremental-append#inclusive-cursors) | | Incremental - Append + Deduped | ❌ | | | Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | diff --git a/docs/integrations/destinations/google-sheets.md b/docs/integrations/destinations/google-sheets.md index 4152e284459a..1bf21c51b225 100644 --- a/docs/integrations/destinations/google-sheets.md +++ b/docs/integrations/destinations/google-sheets.md @@ -39,12 +39,18 @@ To create a Google account, visit [Google](https://support.google.com/accounts/a **For Airbyte Open Source:** - Authentication to Google Sheets is only available using OAuth for authentication. - - 1. Select **Google Sheets** from the Source type dropdown and enter a name for this connector. -2. Follow [Google's OAuth instructions](https://developers.google.com/identity/protocols/oauth2) to create an authentication app. You will need to grant the scopes described in the [Google Sheets API](https://developers.google.com/identity/protocols/oauth2/scopes#sheets). -3. Copy your Client ID, Client secret, and Refresh Token from the previous step. -4. Copy the Google Sheet link to **Spreadsheet Link** + +Authentication to Google Sheets is only available using OAuth for authentication. + +1. Create a new [Google Cloud project](https://console.cloud.google.com/projectcreate). +2. Enable the [Google Sheets API](https://console.cloud.google.com/apis/library/sheets.googleapis.com). +3. Create a new [OAuth client ID](https://console.cloud.google.com/apis/credentials/oauthclient). Select `Web application` as the Application type, give it a `name` and add `https://developers.google.com/oauthplayground` as an Authorized redirect URI. +4. Add a `Client Secret` (Add secret), and take note of both the `Client Secret` and `Client ID`. +5. Go to [Google OAuth Playground](https://developers.google.com/oauthplayground/) +6. Click the cog in the top-right corner, select `Use your own OAuth credentials` and enter the `OAuth Client ID` and `OAuth Client secret` from the previous step. +7. In the left sidebar, find and select `Google Sheets API v4`, then choose the `https://www.googleapis.com/auth/spreadsheets` scope. Click `Authorize APIs`. +8. In **step 2**, click `Exchange authorization code for tokens`. Take note of the `Refresh token`. +9. Set up a new destination in Airbyte, select `Google Sheets` and enter the `Client ID`, `Client Secret`, `Refresh Token` and `Spreadsheet Link` from the previous steps. ### Output schema diff --git a/docs/integrations/destinations/langchain.md b/docs/integrations/destinations/langchain.md index 31a9ddcae93d..4ac1fe151906 100644 --- a/docs/integrations/destinations/langchain.md +++ b/docs/integrations/destinations/langchain.md @@ -1,5 +1,17 @@ # Vector Database (powered by LangChain) +:::warning +The vector db destination destination has been split into separate destinations per vector database. This destination will not receive any further updates and is not subject to SLAs. The separate destinations support all features of this destination and are actively maintained. Please migrate to the respective destination as soon as possible. + +Please use the respective destination for the vector database you want to use to ensure you receive updates and support. + +To following databases are supported: +* [Pinecone](https://docs.airbyte.com/integrations/destinations/pinecone) +* [Weaviate](https://docs.airbyte.com/integrations/destinations/weaviate) +* [Milvus](https://docs.airbyte.com/integrations/destinations/milvus) +* [Chroma](https://docs.airbyte.com/integrations/destinations/chroma) +* [Qdrant](https://docs.airbyte.com/integrations/destinations/qdrant) +::: ## Overview @@ -140,6 +152,7 @@ Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a M | Version | Date | Pull Request | Subject | |:--------| :--------- |:--------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.2 | 2023-11-13 | [#32455](https://github.com/airbytehq/airbyte/pull/32455) | Fix build | | 0.1.1 | 2023-09-01 | [#30282](https://github.com/airbytehq/airbyte/pull/30282) | Use embedders from CDK | | 0.1.0 | 2023-09-01 | [#30080](https://github.com/airbytehq/airbyte/pull/30080) | Fix bug with potential data loss on append+dedup syncing. 🚨 Streams using append+dedup mode need to be reset after upgrade. | | 0.0.8 | 2023-08-21 | [#29515](https://github.com/airbytehq/airbyte/pull/29515) | Clean up generated schema spec | diff --git a/docs/integrations/destinations/local-json.md b/docs/integrations/destinations/local-json.md index 11870a8d5177..45ddda3fb757 100644 --- a/docs/integrations/destinations/local-json.md +++ b/docs/integrations/destinations/local-json.md @@ -69,7 +69,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path}/{filename}.jsonl . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. ## Changelog diff --git a/docs/integrations/destinations/milvus.md b/docs/integrations/destinations/milvus.md index f5de8b2e04ad..2e7c225a0b8c 100644 --- a/docs/integrations/destinations/milvus.md +++ b/docs/integrations/destinations/milvus.md @@ -37,7 +37,7 @@ You'll need the following information to configure the destination: ### Processing -Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. +Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Options around configuring the chunking process use the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. diff --git a/docs/integrations/destinations/mongodb.md b/docs/integrations/destinations/mongodb.md index 51bd94cb8c46..6df8e95f929c 100644 --- a/docs/integrations/destinations/mongodb.md +++ b/docs/integrations/destinations/mongodb.md @@ -25,7 +25,7 @@ Each stream will be output into its own collection in MongoDB. Each collection w Airbyte Cloud only supports connecting to your MongoDB instance with TLS encryption. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/destinations/mssql.md b/docs/integrations/destinations/mssql.md index c48261be1a0b..2a4bfd50bf5a 100644 --- a/docs/integrations/destinations/mssql.md +++ b/docs/integrations/destinations/mssql.md @@ -33,7 +33,7 @@ Airbyte Cloud only supports connecting to your MSSQL instance with TLS encryptio | Incremental - Append + Deduped | Yes | | | Namespaces | Yes | | -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) ### Requirements diff --git a/docs/integrations/destinations/mysql.md b/docs/integrations/destinations/mysql.md index 3ade0339ed56..469d24d4fa59 100644 --- a/docs/integrations/destinations/mysql.md +++ b/docs/integrations/destinations/mysql.md @@ -27,7 +27,7 @@ Each stream will be output into its own table in MySQL. Each table will contain Airbyte Cloud only supports connecting to your MySQL instance with TLS encryption. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) ### Requirements diff --git a/docs/integrations/destinations/oracle.md b/docs/integrations/destinations/oracle.md index 2b26a69cbf6c..d2e9867eb04a 100644 --- a/docs/integrations/destinations/oracle.md +++ b/docs/integrations/destinations/oracle.md @@ -26,7 +26,7 @@ Enabling normalization will also create normalized, strongly typed tables. The Oracle connector is currently in Alpha on Airbyte Cloud. Only TLS encrypted connections to your DB can be made from Airbyte Cloud. Other than that, follow the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/destinations/pinecone.md b/docs/integrations/destinations/pinecone.md index 6142e7935fcc..e060cf243bf0 100644 --- a/docs/integrations/destinations/pinecone.md +++ b/docs/integrations/destinations/pinecone.md @@ -46,7 +46,7 @@ All other fields are ignored. ### Processing -Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Please note that meta data fields can only be used for filtering and not for retrieval and have to be of type string, number, boolean (all other values are ignored). Please note that there's a 40kb limit on the _total_ size of the metadata saved for each entry. +Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Please note that meta data fields can only be used for filtering and not for retrieval and have to be of type string, number, boolean (all other values are ignored). Please note that there's a 40kb limit on the _total_ size of the metadata saved for each entry. Options around configuring the chunking process use the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. diff --git a/docs/integrations/destinations/qdrant.md b/docs/integrations/destinations/qdrant.md index 549f26ada6d6..648d0b019283 100644 --- a/docs/integrations/destinations/qdrant.md +++ b/docs/integrations/destinations/qdrant.md @@ -45,6 +45,7 @@ Make sure your Qdrant database can be accessed by Airbyte. If your database is w You should now have all the requirements needed to configure Qdrant as a destination in the UI. You'll need the following information to configure the Qdrant destination: - (Required) **Text fields to embed** +- (Optional) **Text splitter** Options around configuring the chunking process provided by the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). - (Required) **Fields to store as metadata** - (Required) **Collection** The name of the collection in Qdrant db to store your data - (Required) **The field in the payload that contains the embedded text** diff --git a/docs/integrations/destinations/rockset.md b/docs/integrations/destinations/rockset.md index 0ab1709a68b6..bf685f3e4ce9 100644 --- a/docs/integrations/destinations/rockset.md +++ b/docs/integrations/destinations/rockset.md @@ -23,7 +23,7 @@ | api_server | string | api URL to rockset, specifying http protocol | | workspace | string | workspace under which rockset collections will be added/modified | -## Getting Started \(Airbyte Open-Source / Airbyte Cloud\) +## Getting Started \(Airbyte Open Source / Airbyte Cloud\) #### Requirements diff --git a/docs/integrations/destinations/s3-glue.md b/docs/integrations/destinations/s3-glue.md index 5e66cf7d6e70..f588bc1b424b 100644 --- a/docs/integrations/destinations/s3-glue.md +++ b/docs/integrations/destinations/s3-glue.md @@ -178,7 +178,7 @@ A data sync may create multiple files as the output files can be partitioned by | Feature | Support | Notes | | :----------------------------- | :-----: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/understanding-airbyte/connections/incremental-append#inclusive-cursors) | +| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/using-airbyte/core-concepts/sync-modes/incremental-append#inclusive-cursors) | | Incremental - Append + Deduped | ❌ | | | Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index 209b52a7bd31..81f796cae883 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -174,7 +174,7 @@ A data sync may create multiple files as the output files can be partitioned by | Feature | Support | Notes | | :----------------------------- | :-----: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/understanding-airbyte/connections/incremental-append#inclusive-cursors) | +| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/using-airbyte/core-concepts/sync-modes/incremental-append#inclusive-cursors) | | Incremental - Append + Deduped | ❌ | | | Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | diff --git a/docs/integrations/destinations/sqlite.md b/docs/integrations/destinations/sqlite.md index eb266b61eee8..f5c2a3193780 100644 --- a/docs/integrations/destinations/sqlite.md +++ b/docs/integrations/destinations/sqlite.md @@ -68,7 +68,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path} . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. ## Changelog diff --git a/docs/integrations/destinations/timeplus.md b/docs/integrations/destinations/timeplus.md index dcf43cc48225..d883fc1b3726 100644 --- a/docs/integrations/destinations/timeplus.md +++ b/docs/integrations/destinations/timeplus.md @@ -16,7 +16,7 @@ Each stream will be output into its own stream in Timeplus, with corresponding s ## Getting Started (Airbyte Cloud) Coming soon... -## Getting Started (Airbyte Open-Source) +## Getting Started (Airbyte Open Source) You can follow the [Quickstart with Timeplus Ingestion API](https://docs.timeplus.com/quickstart-ingest-api) to createa a workspace and API key. ### Setup the Timeplus Destination in Airbyte diff --git a/docs/integrations/destinations/weaviate.md b/docs/integrations/destinations/weaviate.md index 0d914d136619..9e36ca07fe57 100644 --- a/docs/integrations/destinations/weaviate.md +++ b/docs/integrations/destinations/weaviate.md @@ -48,7 +48,7 @@ All other fields are serialized into their JSON representation. ### Processing -Each record will be split into text fields and metadata fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Please note that metadata fields can only be used for filtering and not for retrieval and have to be of type string, number, boolean (all other values are ignored). Please note that there's a 40kb limit on the _total_ size of the metadata saved for each entry. +Each record will be split into text fields and metadata fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Options around configuring the chunking process use the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. diff --git a/docs/integrations/getting-started/destination-redshift.md b/docs/integrations/getting-started/destination-redshift.md deleted file mode 100644 index ae59b0eeff95..000000000000 --- a/docs/integrations/getting-started/destination-redshift.md +++ /dev/null @@ -1,70 +0,0 @@ -# Getting Started: Destination Redshift - -## Requirements - -1. Active Redshift cluster -2. Allow connections from Airbyte to your Redshift cluster \(if they exist in separate VPCs\) -3. A staging S3 bucket with credentials \(for the COPY strategy\). - -## Setup guide - -### 1. Make sure your cluster is active and accessible from the machine running Airbyte - -This is dependent on your networking setup. The easiest way to verify if Airbyte is able to connect to your Redshift cluster is via the check connection tool in the UI. You can check AWS Redshift documentation with a tutorial on how to properly configure your cluster's access [here](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-authorize-cluster-access.html) - -### 2. Fill up connection info - -Next is to provide the necessary information on how to connect to your cluster such as the `host` whcih is part of the connection string or Endpoint accessible [here](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-connect-to-cluster.html#rs-gsg-how-to-get-connection-string) without the `port` and `database` name \(it typically includes the cluster-id, region and end with `.redshift.amazonaws.com`\). - -You should have all the requirements needed to configure Redshift as a destination in the UI. You'll need the following information to configure the destination: - -* **Host** -* **Port** -* **Username** -* **Password** -* **Schema** -* **Database** - * This database needs to exist within the cluster provided. - -### 2a. Fill up S3 info \(for COPY strategy\) - -Provide the required S3 info. - -* **S3 Bucket Name** - * See [this](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) to create an S3 bucket. -* **S3 Bucket Region** - * Place the S3 bucket and the Redshift cluster in the same region to save on networking costs. -* **Access Key Id** - * See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. - * We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html) to objects in the staging bucket. -* **Secret Access Key** - * Corresponding key to the above key id. -* **Part Size** - * Affects the size limit of an individual Redshift table. Optional. Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default table limit of 100GB. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. - -Optional parameters: -* **Bucket Path** - * The directory within the S3 bucket to place the staging data. For example, if you set this to `yourFavoriteSubdirectory`, staging data will be placed inside `s3://yourBucket/yourFavoriteSubdirectory`. If not provided, defaults to the root directory. - -## Notes about Redshift Naming Conventions - -From [Redshift Names & Identifiers](https://docs.aws.amazon.com/redshift/latest/dg/r_names.html): - -### Standard Identifiers - -* Begin with an ASCII single-byte alphabetic character or underscore character, or a UTF-8 multibyte character two to four bytes long. -* Subsequent characters can be ASCII single-byte alphanumeric characters, underscores, or dollar signs, or UTF-8 multibyte characters two to four bytes long. -* Be between 1 and 127 bytes in length, not including quotation marks for delimited identifiers. -* Contain no quotation marks and no spaces. - -### Delimited Identifiers - -Delimited identifiers \(also known as quoted identifiers\) begin and end with double quotation marks \("\). If you use a delimited identifier, you must use the double quotation marks for every reference to that object. The identifier can contain any standard UTF-8 printable characters other than the double quotation mark itself. Therefore, you can create column or table names that include otherwise illegal characters, such as spaces or the percent symbol. ASCII letters in delimited identifiers are case-insensitive and are folded to lowercase. To use a double quotation mark in a string, you must precede it with another double quotation mark character. - -Therefore, Airbyte Redshift destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special characters. - -## Data Size Limitations - -Redshift specifies a maximum limit of 65535 bytes to store the raw JSON record data. Thus, when a row is too big to fit, the Redshift destination fails to load such data and currently ignores that record. - -For more information, see the [docs here.](https://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html) diff --git a/docs/integrations/getting-started/source-github.md b/docs/integrations/getting-started/source-github.md deleted file mode 100644 index 6ae7f442aade..000000000000 --- a/docs/integrations/getting-started/source-github.md +++ /dev/null @@ -1,12 +0,0 @@ -## Getting Started: Source GitHub - -### Requirements - -* Github Account -* Github Personal Access Token wih the necessary permissions \(described below\) - -### Setup guide - -Log into Github and then generate a [personal access token](https://github.com/settings/tokens). - -Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: diff --git a/docs/integrations/getting-started/source-google-ads.md b/docs/integrations/getting-started/source-google-ads.md deleted file mode 100644 index f1558cddf335..000000000000 --- a/docs/integrations/getting-started/source-google-ads.md +++ /dev/null @@ -1,42 +0,0 @@ -# Getting Started: Source Google Ads - -## Requirements - -Google Ads Account with an approved Developer Token \(note: In order to get API access to Google Ads, you must have a "manager" account. This must be created separately from your standard account. You can find more information about this distinction in the [google ads docs](https://ads.google.com/home/tools/manager-accounts/).\) - -* developer_token -* client_id -* client_secret -* refresh_token -* start_date -* customer_id - -## Setup guide - -This guide will provide information as if starting from scratch. Please skip over any steps you have already completed. - -* Create an Google Ads Account. Here are [Google's instruction](https://support.google.com/google-ads/answer/6366720) on how to create one. -* Create an Google Ads MANAGER Account. Here are [Google's instruction](https://ads.google.com/home/tools/manager-accounts/) on how to create one. -* You should now have two Google Ads accounts: a normal account and a manager account. Link the Manager account to the normal account following [Google's documentation](https://support.google.com/google-ads/answer/7459601). -* Apply for a developer token \(**make sure you follow our** [**instructions**](#how-to-apply-for-the-developer-token)\) on your Manager account. This token allows you to access your data from the Google Ads API. Here are [Google's instructions](https://developers.google.com/google-ads/api/docs/first-call/dev-token). The docs are a little unclear on this point, but you will _not_ be able to access your data via the Google Ads API until this token is approved. You cannot use a test developer token, it has to be at least a basic developer token. It usually takes Google 24 hours to respond to these applications. This developer token is the value you will use in the `developer_token` field. -* Fetch your `client_id`, `client_secret`, and `refresh_token`. Google provides [instructions](https://developers.google.com/google-ads/api/docs/first-call/overview) on how to do this. -* Select your `customer_id`. The `customer_is` refer to the id of each of your Google Ads accounts. This is the 10 digit number in the top corner of the page when you are in google ads ui. The source will only pull data from the accounts for which you provide an id. If you are having trouble finding it, check out [Google's instructions](https://support.google.com/google-ads/answer/1704344). - -Wow! That was a lot of steps. We are working on making the OAuth flow for all of our connectors simpler \(allowing you to skip needing to get a `developer_token` and a `refresh_token` which are the most painful / time-consuming steps in this walkthrough\). - -## How to apply for the developer token - -Google is very picky about which software and which use case can get access to a developer token. The Airbyte team has worked with the Google Ads team to whitelist Airbyte and make sure you can get one \(see [issue 1981](https://github.com/airbytehq/airbyte/issues/1981) for more information\). - -When you apply for a token, you need to mention: - -* Why you need the token \(eg: want to run some internal analytics...\) -* That you will be using the Airbyte Open Source project -* That you have full access to the code base \(because we're open source\) -* That you have full access to the server running the code \(because you're self-hosting Airbyte\) - -If for any reason the request gets denied, let us know and we will be able to unblock you. - -## Understanding Google Ads Query Language - -The Google Ads Query Language can query the Google Ads API. Check out [Google Ads Query Language](https://developers.google.com/google-ads/api/docs/query/overview) diff --git a/docs/operator-guides/locating-files-local-destination.md b/docs/integrations/locating-files-local-destination.md similarity index 98% rename from docs/operator-guides/locating-files-local-destination.md rename to docs/integrations/locating-files-local-destination.md index e514f3a92ebd..d401d7952455 100644 --- a/docs/operator-guides/locating-files-local-destination.md +++ b/docs/integrations/locating-files-local-destination.md @@ -1,3 +1,7 @@ +--- +displayed_sidebar: docs +--- + # Windows - Browsing Local File Output ## Overview diff --git a/docs/integrations/missing-an-integration.md b/docs/integrations/missing-an-integration.md deleted file mode 100644 index e52613182866..000000000000 --- a/docs/integrations/missing-an-integration.md +++ /dev/null @@ -1,14 +0,0 @@ -# Missing an Integration? - -If you'd like to ask for a new connector, or build a new connectors and make them part of the pool of pre-built connectors on Airbyte, first a big thank you. We invite you to check our [contributing guide](../contributing-to-airbyte/). - -If you'd like to build new connectors, or update existing ones, for your own usage, without contributing to the Airbyte codebase, read along. - -## Developing your own connectors - -It's easy to code your own integrations on Airbyte. Here are some links to instruct on how to code new sources and destinations. - -* [Building new connectors](../contributing-to-airbyte/README.md) - -While the guides above are specific to the languages used most frequently to write integrations, **Airbyte integrations can be written in any language**. Please reach out to us if you'd like help developing integrations in other languages. - diff --git a/docs/integrations/sources/amazon-seller-partner-migrations.md b/docs/integrations/sources/amazon-seller-partner-migrations.md new file mode 100644 index 000000000000..4f51ba68b60c --- /dev/null +++ b/docs/integrations/sources/amazon-seller-partner-migrations.md @@ -0,0 +1,21 @@ +# Amazon Seller Partner Migration Guide + +## Upgrading to 2.0.0 + +This change removes Brand Analytics and permanently removes deprecated FBA reports (from Airbyte Cloud). +Customers who have those streams must refresh their schema OR disable the following streams: +* GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT +* GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT +* GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT +* GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT +* GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT +* GET_SALES_AND_TRAFFIC_REPORT +* GET_VENDOR_SALES_REPORT +* GET_VENDOR_INVENTORY_REPORT + +Customers, who have the following streams, will have to disable them: +* GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA +* GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA +* GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA +* GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA +* GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index 3841fa3e416a..cfc3348ab1ba 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -4,20 +4,39 @@ This page guides you through the process of setting up the Amazon Seller Partner ## Prerequisites +- Amazon Selling Partner account + + + +**For Airbyte Cloud:** + +- AWS Environment +- AWS Region +- Granted OAuth access +- Replication Start Date + + + + +**For Airbyte Open Source:** + - AWS Environment - AWS Region -- LWA Client ID (LWA App ID)** -- LWA Client Secret** -- Refresh token** - Replication Start Date + -**not required for Airbyte Cloud +## Setup Guide ## Step 1: Set up Amazon Seller Partner -1. [Register](https://developer-docs.amazon.com/sp-api/docs/registering-your-application) Amazon Seller Partner application. + + +**Airbyte Open Source setup steps** + +- [Register](https://developer-docs.amazon.com/sp-api/docs/registering-your-application) Amazon Seller Partner application. - The application must be published as Amazon does not allow external parties such as Airbyte to access draft applications. -2. [Create](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html) IAM user. + + ## Step 2: Set up the source connector in Airbyte @@ -28,7 +47,7 @@ This page guides you through the process of setting up the Amazon Seller Partner 3. On the source setup page, select **Amazon Seller Partner** from the Source type dropdown and enter a name for this connector. 4. Click `Authenticate your account`. 5. Log in and Authorize to your Amazon Seller Partner account. -6. Paste all other data to required fields using your IAM user. +6. Paste all other data to required fields. 7. Click `Set up source`. **For Airbyte Open Source:** @@ -37,7 +56,7 @@ This page guides you through the process of setting up the Amazon Seller Partner 2. Go to local Airbyte page. 3. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**. 4. On the Set up the source page, enter the name for the Amazon Seller Partner connector and select **Amazon Seller Partner** from the Source type dropdown. -5. Paste all data to required fields using your IAM user and developer account. +5. Paste all data to required fields. 6. Click `Set up source`. ## Supported sync modes @@ -70,21 +89,16 @@ This source is capable of syncing the following tables and their data: - [Orders](https://developer-docs.amazon.com/sp-api/docs/orders-api-v0-reference) \(incremental\) - [Orders Items](https://developer-docs.amazon.com/sp-api/docs/orders-api-v0-reference#getorderitems) \(incremental\) - [Seller Feedback Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(incremental\) -- [Brand Analytics Alternate Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Item Comparison Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Market Basket Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Repeat Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Search Terms Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) +- [Brand Analytics Alternate Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Item Comparison Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Market Basket Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Repeat Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Search Terms Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) - [Browse tree report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#browse-tree-report) - [Financial Event Groups](https://developer-docs.amazon.com/sp-api/docs/finances-api-reference#get-financesv0financialeventgroups) - [Financial Events](https://developer-docs.amazon.com/sp-api/docs/finances-api-reference#get-financesv0financialevents) - [FBA Fee Preview Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Daily Inventory History Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [FBA Promotions Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Inventory Adjustments Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Received Inventory Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Inventory Event Detail Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Monthly Inventory History Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [FBA Manage Inventory](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [Subscribe and Save Forecast Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [Subscribe and Save Performance Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) @@ -102,6 +116,9 @@ This source is capable of syncing the following tables and their data: - [Inventory Ledger Report - Summary View](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [FBA Reimbursements Report](https://sellercentral.amazon.com/help/hub/reference/G200732720) - [Order Data Shipping Report](https://developer-docs.amazon.com/sp-api/docs/order-reports-attributes#get_order_report_data_shipping) +- [Sales and Traffic Business Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(only available in OSS\) +- [Vendor Sales Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(only available in OSS\) +- [Vendor Inventory Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(only available in OSS\) ## Report options @@ -126,6 +143,9 @@ So, for any value that exceeds the limit, the `period_in_days` will be automatic | Version | Date | Pull Request | Subject | |:---------|:-----------|:--------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `2.0.2` | 2023-11-17 | [\#32462](https://github.com/airbytehq/airbyte/pull/32462) | Remove Max time option from specification; set default waiting time for reports to 1 hour | +| `2.0.1` | 2023-11-16 | [\#32550](https://github.com/airbytehq/airbyte/pull/32550) | Fix the OAuth flow | +| `2.0.0` | 2023-11-23 | [\#32355](https://github.com/airbytehq/airbyte/pull/32355) | Remove Brand Analytics from Airbyte Cloud, permanently remove deprecated FBA reports | | `1.6.2` | 2023-11-14 | [\#32508](https://github.com/airbytehq/airbyte/pull/32508) | Do not use AWS signature as it is no longer required by the Amazon API | | `1.6.1` | 2023-11-13 | [\#32457](https://github.com/airbytehq/airbyte/pull/32457) | Fix report decompression | | `1.6.0` | 2023-11-09 | [\#32259](https://github.com/airbytehq/airbyte/pull/32259) | mark "aws_secret_key" and "aws_access_key" as required in specification; update schema for stream `Orders` | diff --git a/docs/integrations/sources/bing-ads.md b/docs/integrations/sources/bing-ads.md index cab52dc73a5b..a7a972931394 100644 --- a/docs/integrations/sources/bing-ads.md +++ b/docs/integrations/sources/bing-ads.md @@ -208,6 +208,7 @@ The Bing Ads API limits the number of requests for all Microsoft Advertising cli | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.0.1 | 2023-11-16 | [32597](https://github.com/airbytehq/airbyte/pull/32597) | Fix start date parsing from stream state | | 2.0.0 | 2023-11-07 | [31995](https://github.com/airbytehq/airbyte/pull/31995) | Schema update for Accounts, Campaigns and Search Query Performance Report streams. Convert `date` and `date-time` fields to standard `RFC3339` | | 1.13.0 | 2023-11-13 | [32306](https://github.com/airbytehq/airbyte/pull/32306) | Add Custom reports and decrease backoff max tries number | | 1.12.1 | 2023-11-10 | [32422](https://github.com/airbytehq/airbyte/pull/32422) | Normalize numeric values in reports | diff --git a/docs/integrations/sources/cart.md b/docs/integrations/sources/cart.md index 0559e6c7f3a3..90a618b956c6 100644 --- a/docs/integrations/sources/cart.md +++ b/docs/integrations/sources/cart.md @@ -50,6 +50,7 @@ Please follow these [steps](https://developers.cart.com/docs/rest-api/docs/READM | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------- | +| 0.3.0 | 2023-11-14 | [23317](https://github.com/airbytehq/airbyte/pull/23317) | Update schemas | | 0.2.1 | 2023-02-22 | [23317](https://github.com/airbytehq/airbyte/pull/23317) | Remove support for incremental for `order_statuses` stream | | 0.2.0 | 2022-09-21 | [16612](https://github.com/airbytehq/airbyte/pull/16612) | Source Cart.com: implement Central API Router access method and improve backoff policy | | 0.1.6 | 2022-07-15 | [14752](https://github.com/airbytehq/airbyte/pull/14752) | Add `order_statuses` stream | diff --git a/docs/integrations/sources/dv-360.md b/docs/integrations/sources/dv-360.md index 9e4341f1d847..b3c095f4691c 100644 --- a/docs/integrations/sources/dv-360.md +++ b/docs/integrations/sources/dv-360.md @@ -36,7 +36,7 @@ Available filters and metrics are provided in this [page](https://developers.goo 3. Fill out a start date, and optionally, an end date and filters (check the [Queries documentation](https://developers.google.com/bid-manager/v1.1/queries)) . 4. You're done. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/sources/e2e-test-cloud.md b/docs/integrations/sources/e2e-test-cloud.md index be70af977245..633e65c3e548 100644 --- a/docs/integrations/sources/e2e-test-cloud.md +++ b/docs/integrations/sources/e2e-test-cloud.md @@ -2,7 +2,7 @@ ## Overview -This is a mock source for testing the Airbyte pipeline. It can generate arbitrary data streams. It is a subset of what is in [End-to-End Testing Source](e2e-test.md) in Open-Source to avoid Airbyte Cloud users accidentally in curring a huge bill. +This is a mock source for testing the Airbyte pipeline. It can generate arbitrary data streams. It is a subset of what is in [End-to-End Testing Source](e2e-test.md) in Open Source to avoid Airbyte Cloud users accidentally in curring a huge bill. ## Mode diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index e6175ad7e465..4160ad4722a0 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -193,7 +193,8 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | | 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | | 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | | 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index 318b30508a54..240f88b90ea2 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -137,7 +137,7 @@ Represents the bidding strategy at the campaign level. Represents labels that can be attached to different entities such as campaigns or ads. - [ad_group_ad](https://developers.google.com/google-ads/api/fields/v14/ad_group_ad) -Different attributtes of ads from ag groups segmented by date. +Different attributes of ads from ad groups segmented by date. - [ad_group_ad_label](https://developers.google.com/google-ads/api/fields/v14/ad_group_ad_label) - [ad_group](https://developers.google.com/google-ads/api/fields/v14/ad_group) @@ -203,7 +203,7 @@ Due to Google Ads API constraints, the `click_view` stream retrieves data one da ::: :::warning -Google Ads doesn't support `PERFORMACE_MAX` campaigns on `ad_group` or `ad` stream level, only on `campaign` level. +Google Ads doesn't support `PERFORMANCE_MAX` campaigns on `ad_group` or `ad` stream level, only on `campaign` level. If you have this type of campaign Google will remove them from the results for the `ads` reports. More [info](https://github.com/airbytehq/airbyte/issues/11062) and [Google Discussions](https://groups.google.com/g/adwords-api/c/_mxbgNckaLQ). ::: @@ -368,4 +368,4 @@ Due to a limitation in the Google Ads API which does not allow getting performan | `0.1.4` | 2021-07-28 | [4962](https://github.com/airbytehq/airbyte/pull/4962) | Support new Report streams | | `0.1.3` | 2021-07-23 | [4788](https://github.com/airbytehq/airbyte/pull/4788) | Support main streams, fix bug with exception `DATE_RANGE_TOO_NARROW` for incremental streams | | `0.1.2` | 2021-07-06 | [4539](https://github.com/airbytehq/airbyte/pull/4539) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | -| `0.1.1` | 2021-06-23 | [4288](https://github.com/airbytehq/airbyte/pull/4288) | Fix `Bugfix: Correctly declare required parameters` | \ No newline at end of file +| `0.1.1` | 2021-06-23 | [4288](https://github.com/airbytehq/airbyte/pull/4288) | Fix `Bugfix: Correctly declare required parameters` | diff --git a/docs/integrations/sources/google-analytics-data-api.md b/docs/integrations/sources/google-analytics-data-api.md index 96993283d498..03e569c0997f 100644 --- a/docs/integrations/sources/google-analytics-data-api.md +++ b/docs/integrations/sources/google-analytics-data-api.md @@ -68,6 +68,7 @@ Before you can use the service account to access Google Analytics data, you need 1. Go to the [Google Analytics Reporting API dashboard](https://console.developers.google.com/apis/api/analyticsreporting.googleapis.com/overview). Make sure you have selected the associated project for your service account, and enable the API. You can also set quotas and check usage. 2. Go to the [Google Analytics API dashboard](https://console.developers.google.com/apis/api/analytics.googleapis.com/overview). Make sure you have selected the associated project for your service account, and enable the API. +3. Go to the [Google Analytics Data API dashboard](https://console.developers.google.com/apis/api/analyticsdata.googleapis.com/overview). Make sure you have selected the associated project for your service account, and enable the API. #### Set up the Google Analytics connector in Airbyte diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 835d1d324df5..85538f77acef 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -104,7 +104,7 @@ The Google Analytics (Universal Analytics) source connector can sync the followi Reach out to us on Slack or [create an issue](https://github.com/airbytehq/airbyte/issues) if you need to send custom Google Analytics report data with Airbyte. -## Rate Limits and Performance Considerations \(Airbyte Open-Source\) +## Rate Limits and Performance Considerations \(Airbyte Open Source\) [Analytics Reporting API v4](https://developers.google.com/analytics/devguides/reporting/core/v4/limits-quotas) diff --git a/docs/integrations/sources/google-directory.md b/docs/integrations/sources/google-directory.md index b0e570f7544f..d263d9efc93e 100644 --- a/docs/integrations/sources/google-directory.md +++ b/docs/integrations/sources/google-directory.md @@ -40,7 +40,7 @@ This connector attempts to back off gracefully when it hits Directory API's rate 1. Click `OAuth2.0 authorization` then `Authenticate your Google Directory account`. 2. You're done. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) Google APIs use the OAuth 2.0 protocol for authentication and authorization. This connector supports [Web server application](https://developers.google.com/identity/protocols/oauth2#webserver) and [Service accounts](https://developers.google.com/identity/protocols/oauth2#serviceaccount) scenarios. Therefore, there are 2 options of setting up authorization for this source: diff --git a/docs/integrations/sources/google-drive.md b/docs/integrations/sources/google-drive.md index cff121b4e696..df8aa03e2bba 100644 --- a/docs/integrations/sources/google-drive.md +++ b/docs/integrations/sources/google-drive.md @@ -247,6 +247,7 @@ Before parsing each document, the connector exports Google Document files to Doc | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|-----------------------------------------------------------------------------------| +| 0.0.3 | 2023-11-16 | [31458](https://github.com/airbytehq/airbyte/pull/31458) | Improve folder id input and update document file type parser | | 0.0.2 | 2023-11-02 | [31458](https://github.com/airbytehq/airbyte/pull/31458) | Allow syncs on shared drives | | 0.0.1 | 2023-11-02 | [31458](https://github.com/airbytehq/airbyte/pull/31458) | Initial Google Drive source | diff --git a/docs/integrations/sources/instagram-migrations.md b/docs/integrations/sources/instagram-migrations.md new file mode 100644 index 000000000000..f9009b09e3b5 --- /dev/null +++ b/docs/integrations/sources/instagram-migrations.md @@ -0,0 +1,9 @@ +# Instagram Migration Guide + +## Upgrading to 2.0.0 + +This release adds a default primary key for the streams UserLifetimeInsights and UserInsights, and updates the format of timestamp fields in the UserLifetimeInsights, UserInsights, Media and Stories streams to include timezone information. + +To ensure uninterrupted syncs, users should: +- Refresh the source schema +- Reset affected streams \ No newline at end of file diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index b392c4737016..7b4999945fd4 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -10,9 +10,10 @@ This page contains the setup guide and reference information for the Instagram s - [Meta for Developers account](https://developers.facebook.com) - [Instagram business account](https://www.facebook.com/business/help/898752960195806) to your Facebook page +- [Facebook ad account ID number](https://www.facebook.com/business/help/1492627900875762) (you'll use this to configure Instagram as a source in Airbyte - [Instagram Graph API](https://developers.facebook.com/docs/instagram-api/) to your Facebook app -- [Facebook OAuth Reference](https://developers.facebook.com/docs/instagram-basic-display-api/reference) -- [Facebook ad account ID number](https://www.facebook.com/business/help/1492627900875762) (you'll use this to configure Instagram as a source in Airbyte) +- [Facebook Instagram OAuth Reference](https://developers.facebook.com/docs/instagram-basic-display-api/reference) + ## Setup Guide @@ -28,7 +29,7 @@ This page contains the setup guide and reference information for the Instagram s 4. Enter a name for your source. 5. Click **Authenticate your Instagram account**. 6. Log in and authorize the Instagram account. -7. Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If this field is blank, Airbyte will replicate all data. +7. (Optional) Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date. 8. Click **Set up source**. @@ -40,9 +41,8 @@ This page contains the setup guide and reference information for the Instagram s 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Instagram** from the **Source type** dropdown. 4. Enter a name for your source. -5. Click **Authenticate your Instagram account**. -6. Log in and authorize the Instagram account. -7. Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If this field is blank, Airbyte will replicate all data. +5. Enter **Access Token** generated using [Graph API Explorer](https://developers.facebook.com/tools/explorer/) or [by using an app you can create on Facebook](https://developers.facebook.com/docs/instagram-api/getting-started) with the required permissions: instagram_basic, instagram_manage_insights, pages_show_list, pages_read_engagement. +7. (Optional) Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date. 8. Click **Set up source**. @@ -93,6 +93,8 @@ AirbyteRecords are required to conform to the [Airbyte type](https://docs.airbyt | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------| +| 2.0.0 | 2023-11-17 | [32500](https://github.com/airbytehq/airbyte/pull/32500) | Add primary keys for UserLifetimeInsights and UserInsights; add airbyte_type to timestamp fields | +| 1.0.16 | 2023-11-17 | [32627](https://github.com/airbytehq/airbyte/pull/32627) | Fix start_date type; fix docs | | 1.0.15 | 2023-11-14 | [32494](https://github.com/airbytehq/airbyte/pull/32494) | Marked start_date as optional; set max retry time to 10 minutes; add suggested streams | | 1.0.14 | 2023-11-13 | [32423](https://github.com/airbytehq/airbyte/pull/32423) | Capture media_product_type column in media and stories stream | | 1.0.13 | 2023-11-10 | [32245](https://github.com/airbytehq/airbyte/pull/32245) | Add skipping reading MediaInsights stream if an error code 10 is received | diff --git a/docs/integrations/sources/mailchimp.md b/docs/integrations/sources/mailchimp.md index d3d2551a8a9c..20523890da5e 100644 --- a/docs/integrations/sources/mailchimp.md +++ b/docs/integrations/sources/mailchimp.md @@ -76,6 +76,7 @@ Now that you have set up the Mailchimp source connector, check out the following | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------| +| 0.8.3 | 2023-11-15 | [32543](https://github.com/airbytehq/airbyte/pull/32543) | Handle empty datetime fields in Reports stream | | 0.8.2 | 2023-11-13 | [32466](https://github.com/airbytehq/airbyte/pull/32466) | Improve error handling during connection check | | 0.8.1 | 2023-11-06 | [32226](https://github.com/airbytehq/airbyte/pull/32226) | Unmute expected records test after data anonymisation | | 0.8.0 | 2023-11-01 | [32032](https://github.com/airbytehq/airbyte/pull/32032) | Add ListMembers stream | diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index 391c7cfbed48..c2029242225d 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -25,7 +25,7 @@ Note: Currently hierarchyid and sql_variant are not processed in CDC migration t On Airbyte Cloud, only TLS connections to your MSSQL instance are supported in source configuration. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements @@ -342,6 +342,7 @@ WHERE actor_definition_id ='b5ea17b1-f170-46dc-bc31-cc744ca984c1' AND (configura | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------| +| 3.0.1 | 2023-11-22 | [32656](https://github.com/airbytehq/airbyte/pull/32656) | Adopt java CDK version 0.5.0. | | 3.0.0 | 2023-11-07 | [31531](https://github.com/airbytehq/airbyte/pull/31531) | Remapped date, smalldatetime, datetime2, time, and datetimeoffset datatype to their correct Airbyte types | | 2.0.4 | 2023-11-06 | [#32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | | 2.0.3 | 2023-10-31 | [32024](https://github.com/airbytehq/airbyte/pull/32024) | Upgrade to Debezium version 2.4.0. | diff --git a/docs/integrations/sources/my-hours.md b/docs/integrations/sources/my-hours.md index 66ae44d7bc2d..f58aa7899378 100644 --- a/docs/integrations/sources/my-hours.md +++ b/docs/integrations/sources/my-hours.md @@ -24,7 +24,7 @@ This source allows you to synchronize the following data tables: **Requirements** In order to use the My Hours API you need to provide the credentials to an admin My Hours account. -### Performance Considerations (Airbyte Open-Source) +### Performance Considerations (Airbyte Open Source) Depending on the amount of team members and time logs the source provides a property to change the pagination size for the time logs query. Typically a pagination of 30 days is a correct balance between reliability and speed. But if you have a big amount of monthly entries you might want to change this value to a lower value. diff --git a/docs/integrations/sources/mysql.md b/docs/integrations/sources/mysql.md index f75d347df8f6..2befdd9f78bf 100644 --- a/docs/integrations/sources/mysql.md +++ b/docs/integrations/sources/mysql.md @@ -91,7 +91,7 @@ To fill out the required information: #### Step 4: (Airbyte Cloud Only) Allow inbound traffic from Airbyte IPs. If you are on Airbyte Cloud, you will always need to modify your database configuration to allow inbound traffic from Airbyte IPs. You can find a list of all IPs that need to be allowlisted in -our [Airbyte Security docs](../../../operator-guides/security#network-security-1). +our [Airbyte Security docs](../../operating-airbyte/security#network-security-1). Now, click `Set up source` in the Airbyte UI. Airbyte will now test connecting to your database. Once this succeeds, you've configured an Airbyte MySQL source! @@ -220,136 +220,137 @@ Any database or table encoding combination of charset and collation is supported ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 3.1.7 | 2023-11-08 | [32125](https://github.com/airbytehq/airbyte/pull/32125) | fix compilation warnings | -| 3.1.6 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | -| 3.1.5 | 2023-10-31 | [32024](https://github.com/airbytehq/airbyte/pull/32024) | Upgrade to Debezium version 2.4.0. | -| 3.1.4 | 2023-10-30 | [31960](https://github.com/airbytehq/airbyte/pull/31960) | Adopt java CDK version 0.2.0. | -| 3.1.3 | 2023-10-11 | [31322](https://github.com/airbytehq/airbyte/pull/31322) | Correct pevious release | -| 3.1.2 | 2023-09-29 | [30806](https://github.com/airbytehq/airbyte/pull/30806) | Cap log line length to 32KB to prevent loss of records | -| 3.1.1 | 2023-09-26 | [30744](https://github.com/airbytehq/airbyte/pull/30744) | Update MySQL JDBC connection configs to keep default auto-commit behavior | -| 3.1.0 | 2023-09-21 | [30270](https://github.com/airbytehq/airbyte/pull/30270) | Enhanced Standard Sync with initial load via Primary Key with a switch to cursor for incremental syncs | -| 3.0.9 | 2023-09-20 | [30620](https://github.com/airbytehq/airbyte/pull/30620) | Airbyte Certified MySQL Source connector | -| 3.0.8 | 2023-09-14 | [30333](https://github.com/airbytehq/airbyte/pull/30333) | CDC : Update the correct timezone parameter passed to Debezium to `database.connectionTimezone` | -| 3.0.7 | 2023-09-13 | [30375](https://github.com/airbytehq/airbyte/pull/30375) | Fix a bug causing a failure when DB views are included in sync | -| 3.0.6 | 2023-09-12 | [30308](https://github.com/airbytehq/airbyte/pull/30308) | CDC : Enable compression of schema history blob in state | -| 3.0.5 | 2023-09-12 | [30289](https://github.com/airbytehq/airbyte/pull/30289) | CDC : Introduce logic for compression of schema history blob in state | -| 3.0.4 | 2023-09-06 | [30213](https://github.com/airbytehq/airbyte/pull/30213) | CDC : Checkpointable initial snapshot | -| 3.0.3 | 2023-08-31 | [29821](https://github.com/airbytehq/airbyte/pull/29821) | Set replication_method display_type to radio | -| 3.0.2 | 2023-08-30 | [30015](https://github.com/airbytehq/airbyte/pull/30015) | Logging : Log storage engines associated with tables in the sync | -| 3.0.1 | 2023-08-21 | [29308](https://github.com/airbytehq/airbyte/pull/29308) | CDC: Enable frequent state emissions during incremental runs | -| 3.0.0 | 2023-08-08 | [28756](https://github.com/airbytehq/airbyte/pull/28756) | CDC: Set a default cursor | -| 2.1.2 | 2023-08-08 | [29220](https://github.com/airbytehq/airbyte/pull/29220) | Add indicator that CDC is the recommended update method | -| 2.1.1 | 2023-07-31 | [28882](https://github.com/airbytehq/airbyte/pull/28882) | Improve replication method labels and descriptions | -| 2.1.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | -| 2.0.25 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | -| 2.0.24 | 2023-05-25 | [26473](https://github.com/airbytehq/airbyte/pull/26473) | CDC : Limit queue size | -| 2.0.23 | 2023-05-24 | [25586](https://github.com/airbytehq/airbyte/pull/25586) | No need to base64 encode strings on databases sorted with binary collation | -| 2.0.22 | 2023-05-22 | [25859](https://github.com/airbytehq/airbyte/pull/25859) | Allow adding sessionVariables JDBC parameters | -| 2.0.21 | 2023-05-10 | [25460](https://github.com/airbytehq/airbyte/pull/25460) | Handle a decimal number with 0 decimal points as an integer | -| 2.0.20 | 2023-05-01 | [25740](https://github.com/airbytehq/airbyte/pull/25740) | Disable index logging | -| 2.0.19 | 2023-04-26 | [25401](https://github.com/airbytehq/airbyte/pull/25401) | CDC : Upgrade Debezium to version 2.2.0 | -| 2.0.18 | 2023-04-19 | [25345](https://github.com/airbytehq/airbyte/pull/25345) | Logging : Log database indexes per stream | -| 2.0.17 | 2023-04-19 | [24582](https://github.com/airbytehq/airbyte/pull/24582) | CDC : refactor for performance improvement | -| 2.0.16 | 2023-04-17 | [25220](https://github.com/airbytehq/airbyte/pull/25220) | Logging changes : Log additional metadata & clean up noisy logs | -| 2.0.15 | 2023-04-12 | [25131](https://github.com/airbytehq/airbyte/pull/25131) | Make Client Certificate and Client Key always show | -| 2.0.14 | 2023-04-11 | [24656](https://github.com/airbytehq/airbyte/pull/24656) | CDC minor refactor | -| 2.0.13 | 2023-04-06 | [24820](https://github.com/airbytehq/airbyte/pull/24820) | Fix data loss bug during an initial failed non-CDC incremental sync | -| 2.0.12 | 2023-04-04 | [24833](https://github.com/airbytehq/airbyte/pull/24833) | Fix Debezium retry policy configuration | -| 2.0.11 | 2023-03-28 | [24166](https://github.com/airbytehq/airbyte/pull/24166) | Fix InterruptedException bug during Debezium shutdown | -| 2.0.10 | 2023-03-27 | [24529](https://github.com/airbytehq/airbyte/pull/24373) | Preparing the connector for CDC checkpointing | -| 2.0.9 | 2023-03-24 | [24529](https://github.com/airbytehq/airbyte/pull/24529) | Set SSL Mode to required on strict-encrypt variant | -| 2.0.8 | 2023-03-22 | [20760](https://github.com/airbytehq/airbyte/pull/20760) | Removed redundant date-time datatypes formatting | -| 2.0.7 | 2023-03-21 | [24207](https://github.com/airbytehq/airbyte/pull/24207) | Fix incorrect schema change warning in CDC mode | -| 2.0.6 | 2023-03-21 | [23984](https://github.com/airbytehq/airbyte/pull/23984) | Support CDC heartbeats | -| 2.0.5 | 2023-03-21 | [24147](https://github.com/airbytehq/airbyte/pull/24275) | Fix error with CDC checkpointing | -| 2.0.4 | 2023-03-20 | [24147](https://github.com/airbytehq/airbyte/pull/24147) | Support different table structure during "DESCRIBE" query | -| 2.0.3 | 2023-03-15 | [24082](https://github.com/airbytehq/airbyte/pull/24082) | Fixed NPE during cursor values validation | -| 2.0.2 | 2023-03-14 | [23908](https://github.com/airbytehq/airbyte/pull/23908) | Log warning on null cursor values | -| 2.0.1 | 2023-03-10 | [23939](https://github.com/airbytehq/airbyte/pull/23939) | For network isolation, source connector accepts a list of hosts it is allowed to connect | -| 2.0.0 | 2023-03-06 | [23112](https://github.com/airbytehq/airbyte/pull/23112) | Upgrade Debezium version to 2.1.2 | -| 1.0.21 | 2023-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. | -| 1.0.20 | 2023-01-24 | [20593](https://github.com/airbytehq/airbyte/pull/20593) | Handle ssh time out exception | -| 1.0.19 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources | -| 1.0.18 | 2022-12-14 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions | -| 1.0.17 | 2022-12-13 | [20289](https://github.com/airbytehq/airbyte/pull/20289) | Mark unknown column exception as config error | -| 1.0.16 | 2022-12-12 | [18959](https://github.com/airbytehq/airbyte/pull/18959) | CDC : Don't timeout if snapshot is not complete. | -| 1.0.15 | 2022-12-06 | [20000](https://github.com/airbytehq/airbyte/pull/20000) | Add check and better messaging when user does not have permission to access binary log in CDC mode | -| 1.0.14 | 2022-11-22 | [19514](https://github.com/airbytehq/airbyte/pull/19514) | Adjust batch selection memory limits databases. | -| 1.0.13 | 2022-11-14 | [18956](https://github.com/airbytehq/airbyte/pull/18956) | Clean up Tinyint Unsigned data type identification | -| 1.0.12 | 2022-11-07 | [19025](https://github.com/airbytehq/airbyte/pull/19025) | Stop enforce SSL if ssl mode is disabled | -| 1.0.11 | 2022-11-03 | [18851](https://github.com/airbytehq/airbyte/pull/18851) | Fix bug with unencrypted CDC connections | -| 1.0.10 | 2022-11-02 | [18619](https://github.com/airbytehq/airbyte/pull/18619) | Fix bug with handling Tinyint(1) Unsigned values as boolean | -| 1.0.9 | 2022-10-31 | [18538](https://github.com/airbytehq/airbyte/pull/18538) | Encode database name | -| 1.0.8 | 2022-10-25 | [18383](https://github.com/airbytehq/airbyte/pull/18383) | Better SSH error handling + messages | -| 1.0.7 | 2022-10-21 | [18263](https://github.com/airbytehq/airbyte/pull/18263) | Fixes bug introduced in [15833](https://github.com/airbytehq/airbyte/pull/15833) and adds better error messaging for SSH tunnel in Destinations | -| 1.0.6 | 2022-10-19 | [18087](https://github.com/airbytehq/airbyte/pull/18087) | Better error messaging for configuration errors (SSH configs, choosing an invalid cursor) | -| 1.0.5 | 2022-10-17 | [18041](https://github.com/airbytehq/airbyte/pull/18041) | Fixes bug introduced 2022-09-12 with SshTunnel, handles iterator exception properly | -| | 2022-10-13 | [15535](https://github.com/airbytehq/airbyte/pull/16238) | Update incremental query to avoid data missing when new data is inserted at the same time as a sync starts under non-CDC incremental mode | -| 1.0.4 | 2022-10-11 | [17815](https://github.com/airbytehq/airbyte/pull/17815) | Expose setting server timezone for CDC syncs | -| 1.0.3 | 2022-10-07 | [17236](https://github.com/airbytehq/airbyte/pull/17236) | Fix large table issue by fetch size | -| 1.0.2 | 2022-10-03 | [17170](https://github.com/airbytehq/airbyte/pull/17170) | Make initial CDC waiting time configurable | -| 1.0.1 | 2022-10-01 | [17459](https://github.com/airbytehq/airbyte/pull/17459) | Upgrade debezium version to 1.9.6 from 1.9.2 | -| 1.0.0 | 2022-09-27 | [17164](https://github.com/airbytehq/airbyte/pull/17164) | Certify MySQL Source as Beta | -| 0.6.15 | 2022-09-27 | [17299](https://github.com/airbytehq/airbyte/pull/17299) | Improve error handling for strict-encrypt mysql source | -| 0.6.14 | 2022-09-26 | [16954](https://github.com/airbytehq/airbyte/pull/16954) | Implement support for snapshot of new tables in CDC mode | -| 0.6.13 | 2022-09-14 | [15668](https://github.com/airbytehq/airbyte/pull/15668) | Wrap logs in AirbyteLogMessage | -| 0.6.12 | 2022-09-13 | [16657](https://github.com/airbytehq/airbyte/pull/16657) | Improve CDC record queueing performance | -| 0.6.11 | 2022-09-08 | [16202](https://github.com/airbytehq/airbyte/pull/16202) | Adds error messaging factory to UI | -| 0.6.10 | 2022-09-08 | [16007](https://github.com/airbytehq/airbyte/pull/16007) | Implement per stream state support. | -| 0.6.9 | 2022-09-03 | [16216](https://github.com/airbytehq/airbyte/pull/16216) | Standardize spec for CDC replication. See upgrade instructions [above](#upgrading-from-0.6.8-and-older-versions-to-0.6.9-and-later-versions). | -| 0.6.8 | 2022-09-01 | [16259](https://github.com/airbytehq/airbyte/pull/16259) | Emit state messages more frequently | -| 0.6.7 | 2022-08-30 | [16114](https://github.com/airbytehq/airbyte/pull/16114) | Prevent traffic going on an unsecured channel in strict-encryption version of source mysql | -| 0.6.6 | 2022-08-25 | [15993](https://github.com/airbytehq/airbyte/pull/15993) | Improved support for connecting over SSL | -| 0.6.5 | 2022-08-25 | [15917](https://github.com/airbytehq/airbyte/pull/15917) | Fix temporal data type default value bug | -| 0.6.4 | 2022-08-18 | [14356](https://github.com/airbytehq/airbyte/pull/14356) | DB Sources: only show a table can sync incrementally if at least one column can be used as a cursor field | -| 0.6.3 | 2022-08-12 | [15044](https://github.com/airbytehq/airbyte/pull/15044) | Added the ability to connect using different SSL modes and SSL certificates | -| 0.6.2 | 2022-08-11 | [15538](https://github.com/airbytehq/airbyte/pull/15538) | Allow additional properties in db stream state | -| 0.6.1 | 2022-08-02 | [14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | -| 0.6.0 | 2022-07-26 | [14362](https://github.com/airbytehq/airbyte/pull/14362) | Integral columns are now discovered as int64 fields. | -| 0.5.17 | 2022-07-22 | [14714](https://github.com/airbytehq/airbyte/pull/14714) | Clarified error message when invalid cursor column selected | -| 0.5.16 | 2022-07-14 | [14574](https://github.com/airbytehq/airbyte/pull/14574) | Removed additionalProperties:false from JDBC source connectors | -| 0.5.15 | 2022-06-23 | [14077](https://github.com/airbytehq/airbyte/pull/14077) | Use the new state management | -| 0.5.13 | 2022-06-21 | [13945](https://github.com/airbytehq/airbyte/pull/13945) | Aligned datatype test | -| 0.5.12 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | -| 0.5.11 | 2022-05-03 | [12544](https://github.com/airbytehq/airbyte/pull/12544) | Prevent source from hanging under certain circumstances by adding a watcher for orphaned threads. | -| 0.5.10 | 2022-04-29 | [12480](https://github.com/airbytehq/airbyte/pull/12480) | Query tables with adaptive fetch size to optimize JDBC memory consumption | -| 0.5.9 | 2022-04-06 | [11729](https://github.com/airbytehq/airbyte/pull/11729) | Bump mina-sshd from 2.7.0 to 2.8.0 | -| 0.5.6 | 2022-02-21 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Fixed cursor for old connectors that use non-microsecond format. Now connectors work with both formats | -| 0.5.5 | 2022-02-18 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Updated timestamp transformation with microseconds | -| 0.5.4 | 2022-02-11 | [10251](https://github.com/airbytehq/airbyte/issues/10251) | bug Source MySQL CDC: sync failed when has Zero-date value in mandatory column | -| 0.5.2 | 2021-12-14 | [6425](https://github.com/airbytehq/airbyte/issues/6425) | MySQL CDC sync fails because starting binlog position not found in DB | -| 0.5.1 | 2021-12-13 | [8582](https://github.com/airbytehq/airbyte/pull/8582) | Update connector fields title/description | -| 0.5.0 | 2021-12-11 | [7970](https://github.com/airbytehq/airbyte/pull/7970) | Support all MySQL types | -| 0.4.13 | 2021-12-03 | [8335](https://github.com/airbytehq/airbyte/pull/8335) | Source-MySql: do not check cdc required param binlog_row_image for standard replication | -| 0.4.12 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | -| 0.4.11 | 2021-11-19 | [8047](https://github.com/airbytehq/airbyte/pull/8047) | Source MySQL: transform binary data base64 format | -| 0.4.10 | 2021-11-15 | [7820](https://github.com/airbytehq/airbyte/pull/7820) | Added basic performance test | -| 0.4.9 | 2021-11-02 | [7559](https://github.com/airbytehq/airbyte/pull/7559) | Correctly process large unsigned short integer values which may fall outside java's `Short` data type capability | -| 0.4.8 | 2021-09-16 | [6093](https://github.com/airbytehq/airbyte/pull/6093) | Improve reliability of processing various data types like decimals, dates, datetime, binary, and text | -| 0.4.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | -| 0.4.6 | 2021-09-29 | [6510](https://github.com/airbytehq/airbyte/pull/6510) | Support SSL connection | -| 0.4.5 | 2021-09-17 | [6146](https://github.com/airbytehq/airbyte/pull/6146) | Added option to connect to DB via SSH | -| 0.4.1 | 2021-07-23 | [4956](https://github.com/airbytehq/airbyte/pull/4956) | Fix log link | -| 0.3.7 | 2021-06-09 | [3179](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | -| 0.3.6 | 2021-06-09 | [3966](https://github.com/airbytehq/airbyte/pull/3966) | Fix excessive logging for CDC method | -| 0.3.5 | 2021-06-07 | [3890](https://github.com/airbytehq/airbyte/pull/3890) | Fix CDC handle tinyint\(1\) and boolean types | -| 0.3.4 | 2021-06-04 | [3846](https://github.com/airbytehq/airbyte/pull/3846) | Fix max integer value failure | -| 0.3.3 | 2021-06-02 | [3789](https://github.com/airbytehq/airbyte/pull/3789) | MySQL CDC poll wait 5 minutes when not received a single record | -| 0.3.2 | 2021-06-01 | [3757](https://github.com/airbytehq/airbyte/pull/3757) | MySQL CDC poll 5s to 5 min | -| 0.3.1 | 2021-06-01 | [3505](https://github.com/airbytehq/airbyte/pull/3505) | Implemented MySQL CDC | -| 0.3.0 | 2021-04-21 | [2990](https://github.com/airbytehq/airbyte/pull/2990) | Support namespaces | -| 0.2.5 | 2021-04-15 | [2899](https://github.com/airbytehq/airbyte/pull/2899) | Fix bug in tests | -| 0.2.4 | 2021-03-28 | [2600](https://github.com/airbytehq/airbyte/pull/2600) | Add NCHAR and NVCHAR support to DB and cursor type casting | -| 0.2.3 | 2021-03-26 | [2611](https://github.com/airbytehq/airbyte/pull/2611) | Add an optional `jdbc_url_params` in parameters | -| 0.2.2 | 2021-03-26 | [2460](https://github.com/airbytehq/airbyte/pull/2460) | Destination supports destination sync mode | -| 0.2.1 | 2021-03-18 | [2488](https://github.com/airbytehq/airbyte/pull/2488) | Sources support primary keys | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.10 | 2021-02-02 | [1887](https://github.com/airbytehq/airbyte/pull/1887) | Migrate AbstractJdbcSource to use iterators | -| 0.1.9 | 2021-01-25 | [1746](https://github.com/airbytehq/airbyte/pull/1746) | Fix NPE in State Decorator | -| 0.1.8 | 2021-01-19 | [1724](https://github.com/airbytehq/airbyte/pull/1724) | Fix JdbcSource handling of tables with same names in different schemas | -| 0.1.7 | 2021-01-14 | [1655](https://github.com/airbytehq/airbyte/pull/1655) | Fix JdbcSource OOM | -| 0.1.6 | 2021-01-08 | [1307](https://github.com/airbytehq/airbyte/pull/1307) | Migrate Postgres and MySQL to use new JdbcSource | -| 0.1.5 | 2020-12-11 | [1267](https://github.com/airbytehq/airbyte/pull/1267) | Support incremental sync | -| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 3.1.8 | 2023-11-22 | [32656](https://github.com/airbytehq/airbyte/pull/32656) | Adopt java CDK version 0.5.0. | +| 3.1.7 | 2023-11-08 | [32125](https://github.com/airbytehq/airbyte/pull/32125) | fix compilation warnings | +| 3.1.6 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | +| 3.1.5 | 2023-10-31 | [32024](https://github.com/airbytehq/airbyte/pull/32024) | Upgrade to Debezium version 2.4.0. | +| 3.1.4 | 2023-10-30 | [31960](https://github.com/airbytehq/airbyte/pull/31960) | Adopt java CDK version 0.2.0. | +| 3.1.3 | 2023-10-11 | [31322](https://github.com/airbytehq/airbyte/pull/31322) | Correct pevious release | +| 3.1.2 | 2023-09-29 | [30806](https://github.com/airbytehq/airbyte/pull/30806) | Cap log line length to 32KB to prevent loss of records | +| 3.1.1 | 2023-09-26 | [30744](https://github.com/airbytehq/airbyte/pull/30744) | Update MySQL JDBC connection configs to keep default auto-commit behavior | +| 3.1.0 | 2023-09-21 | [30270](https://github.com/airbytehq/airbyte/pull/30270) | Enhanced Standard Sync with initial load via Primary Key with a switch to cursor for incremental syncs | +| 3.0.9 | 2023-09-20 | [30620](https://github.com/airbytehq/airbyte/pull/30620) | Airbyte Certified MySQL Source connector | +| 3.0.8 | 2023-09-14 | [30333](https://github.com/airbytehq/airbyte/pull/30333) | CDC : Update the correct timezone parameter passed to Debezium to `database.connectionTimezone` | +| 3.0.7 | 2023-09-13 | [30375](https://github.com/airbytehq/airbyte/pull/30375) | Fix a bug causing a failure when DB views are included in sync | +| 3.0.6 | 2023-09-12 | [30308](https://github.com/airbytehq/airbyte/pull/30308) | CDC : Enable compression of schema history blob in state | +| 3.0.5 | 2023-09-12 | [30289](https://github.com/airbytehq/airbyte/pull/30289) | CDC : Introduce logic for compression of schema history blob in state | +| 3.0.4 | 2023-09-06 | [30213](https://github.com/airbytehq/airbyte/pull/30213) | CDC : Checkpointable initial snapshot | +| 3.0.3 | 2023-08-31 | [29821](https://github.com/airbytehq/airbyte/pull/29821) | Set replication_method display_type to radio | +| 3.0.2 | 2023-08-30 | [30015](https://github.com/airbytehq/airbyte/pull/30015) | Logging : Log storage engines associated with tables in the sync | +| 3.0.1 | 2023-08-21 | [29308](https://github.com/airbytehq/airbyte/pull/29308) | CDC: Enable frequent state emissions during incremental runs | +| 3.0.0 | 2023-08-08 | [28756](https://github.com/airbytehq/airbyte/pull/28756) | CDC: Set a default cursor | +| 2.1.2 | 2023-08-08 | [29220](https://github.com/airbytehq/airbyte/pull/29220) | Add indicator that CDC is the recommended update method | +| 2.1.1 | 2023-07-31 | [28882](https://github.com/airbytehq/airbyte/pull/28882) | Improve replication method labels and descriptions | +| 2.1.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | +| 2.0.25 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | +| 2.0.24 | 2023-05-25 | [26473](https://github.com/airbytehq/airbyte/pull/26473) | CDC : Limit queue size | +| 2.0.23 | 2023-05-24 | [25586](https://github.com/airbytehq/airbyte/pull/25586) | No need to base64 encode strings on databases sorted with binary collation | +| 2.0.22 | 2023-05-22 | [25859](https://github.com/airbytehq/airbyte/pull/25859) | Allow adding sessionVariables JDBC parameters | +| 2.0.21 | 2023-05-10 | [25460](https://github.com/airbytehq/airbyte/pull/25460) | Handle a decimal number with 0 decimal points as an integer | +| 2.0.20 | 2023-05-01 | [25740](https://github.com/airbytehq/airbyte/pull/25740) | Disable index logging | +| 2.0.19 | 2023-04-26 | [25401](https://github.com/airbytehq/airbyte/pull/25401) | CDC : Upgrade Debezium to version 2.2.0 | +| 2.0.18 | 2023-04-19 | [25345](https://github.com/airbytehq/airbyte/pull/25345) | Logging : Log database indexes per stream | +| 2.0.17 | 2023-04-19 | [24582](https://github.com/airbytehq/airbyte/pull/24582) | CDC : refactor for performance improvement | +| 2.0.16 | 2023-04-17 | [25220](https://github.com/airbytehq/airbyte/pull/25220) | Logging changes : Log additional metadata & clean up noisy logs | +| 2.0.15 | 2023-04-12 | [25131](https://github.com/airbytehq/airbyte/pull/25131) | Make Client Certificate and Client Key always show | +| 2.0.14 | 2023-04-11 | [24656](https://github.com/airbytehq/airbyte/pull/24656) | CDC minor refactor | +| 2.0.13 | 2023-04-06 | [24820](https://github.com/airbytehq/airbyte/pull/24820) | Fix data loss bug during an initial failed non-CDC incremental sync | +| 2.0.12 | 2023-04-04 | [24833](https://github.com/airbytehq/airbyte/pull/24833) | Fix Debezium retry policy configuration | +| 2.0.11 | 2023-03-28 | [24166](https://github.com/airbytehq/airbyte/pull/24166) | Fix InterruptedException bug during Debezium shutdown | +| 2.0.10 | 2023-03-27 | [24529](https://github.com/airbytehq/airbyte/pull/24373) | Preparing the connector for CDC checkpointing | +| 2.0.9 | 2023-03-24 | [24529](https://github.com/airbytehq/airbyte/pull/24529) | Set SSL Mode to required on strict-encrypt variant | +| 2.0.8 | 2023-03-22 | [20760](https://github.com/airbytehq/airbyte/pull/20760) | Removed redundant date-time datatypes formatting | +| 2.0.7 | 2023-03-21 | [24207](https://github.com/airbytehq/airbyte/pull/24207) | Fix incorrect schema change warning in CDC mode | +| 2.0.6 | 2023-03-21 | [23984](https://github.com/airbytehq/airbyte/pull/23984) | Support CDC heartbeats | +| 2.0.5 | 2023-03-21 | [24147](https://github.com/airbytehq/airbyte/pull/24275) | Fix error with CDC checkpointing | +| 2.0.4 | 2023-03-20 | [24147](https://github.com/airbytehq/airbyte/pull/24147) | Support different table structure during "DESCRIBE" query | +| 2.0.3 | 2023-03-15 | [24082](https://github.com/airbytehq/airbyte/pull/24082) | Fixed NPE during cursor values validation | +| 2.0.2 | 2023-03-14 | [23908](https://github.com/airbytehq/airbyte/pull/23908) | Log warning on null cursor values | +| 2.0.1 | 2023-03-10 | [23939](https://github.com/airbytehq/airbyte/pull/23939) | For network isolation, source connector accepts a list of hosts it is allowed to connect | +| 2.0.0 | 2023-03-06 | [23112](https://github.com/airbytehq/airbyte/pull/23112) | Upgrade Debezium version to 2.1.2 | +| 1.0.21 | 2023-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. | +| 1.0.20 | 2023-01-24 | [20593](https://github.com/airbytehq/airbyte/pull/20593) | Handle ssh time out exception | +| 1.0.19 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources | +| 1.0.18 | 2022-12-14 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions | +| 1.0.17 | 2022-12-13 | [20289](https://github.com/airbytehq/airbyte/pull/20289) | Mark unknown column exception as config error | +| 1.0.16 | 2022-12-12 | [18959](https://github.com/airbytehq/airbyte/pull/18959) | CDC : Don't timeout if snapshot is not complete. | +| 1.0.15 | 2022-12-06 | [20000](https://github.com/airbytehq/airbyte/pull/20000) | Add check and better messaging when user does not have permission to access binary log in CDC mode | +| 1.0.14 | 2022-11-22 | [19514](https://github.com/airbytehq/airbyte/pull/19514) | Adjust batch selection memory limits databases. | +| 1.0.13 | 2022-11-14 | [18956](https://github.com/airbytehq/airbyte/pull/18956) | Clean up Tinyint Unsigned data type identification | +| 1.0.12 | 2022-11-07 | [19025](https://github.com/airbytehq/airbyte/pull/19025) | Stop enforce SSL if ssl mode is disabled | +| 1.0.11 | 2022-11-03 | [18851](https://github.com/airbytehq/airbyte/pull/18851) | Fix bug with unencrypted CDC connections | +| 1.0.10 | 2022-11-02 | [18619](https://github.com/airbytehq/airbyte/pull/18619) | Fix bug with handling Tinyint(1) Unsigned values as boolean | +| 1.0.9 | 2022-10-31 | [18538](https://github.com/airbytehq/airbyte/pull/18538) | Encode database name | +| 1.0.8 | 2022-10-25 | [18383](https://github.com/airbytehq/airbyte/pull/18383) | Better SSH error handling + messages | +| 1.0.7 | 2022-10-21 | [18263](https://github.com/airbytehq/airbyte/pull/18263) | Fixes bug introduced in [15833](https://github.com/airbytehq/airbyte/pull/15833) and adds better error messaging for SSH tunnel in Destinations | +| 1.0.6 | 2022-10-19 | [18087](https://github.com/airbytehq/airbyte/pull/18087) | Better error messaging for configuration errors (SSH configs, choosing an invalid cursor) | +| 1.0.5 | 2022-10-17 | [18041](https://github.com/airbytehq/airbyte/pull/18041) | Fixes bug introduced 2022-09-12 with SshTunnel, handles iterator exception properly | +| | 2022-10-13 | [15535](https://github.com/airbytehq/airbyte/pull/16238) | Update incremental query to avoid data missing when new data is inserted at the same time as a sync starts under non-CDC incremental mode | +| 1.0.4 | 2022-10-11 | [17815](https://github.com/airbytehq/airbyte/pull/17815) | Expose setting server timezone for CDC syncs | +| 1.0.3 | 2022-10-07 | [17236](https://github.com/airbytehq/airbyte/pull/17236) | Fix large table issue by fetch size | +| 1.0.2 | 2022-10-03 | [17170](https://github.com/airbytehq/airbyte/pull/17170) | Make initial CDC waiting time configurable | +| 1.0.1 | 2022-10-01 | [17459](https://github.com/airbytehq/airbyte/pull/17459) | Upgrade debezium version to 1.9.6 from 1.9.2 | +| 1.0.0 | 2022-09-27 | [17164](https://github.com/airbytehq/airbyte/pull/17164) | Certify MySQL Source as Beta | +| 0.6.15 | 2022-09-27 | [17299](https://github.com/airbytehq/airbyte/pull/17299) | Improve error handling for strict-encrypt mysql source | +| 0.6.14 | 2022-09-26 | [16954](https://github.com/airbytehq/airbyte/pull/16954) | Implement support for snapshot of new tables in CDC mode | +| 0.6.13 | 2022-09-14 | [15668](https://github.com/airbytehq/airbyte/pull/15668) | Wrap logs in AirbyteLogMessage | +| 0.6.12 | 2022-09-13 | [16657](https://github.com/airbytehq/airbyte/pull/16657) | Improve CDC record queueing performance | +| 0.6.11 | 2022-09-08 | [16202](https://github.com/airbytehq/airbyte/pull/16202) | Adds error messaging factory to UI | +| 0.6.10 | 2022-09-08 | [16007](https://github.com/airbytehq/airbyte/pull/16007) | Implement per stream state support. | +| 0.6.9 | 2022-09-03 | [16216](https://github.com/airbytehq/airbyte/pull/16216) | Standardize spec for CDC replication. See upgrade instructions [above](#upgrading-from-0.6.8-and-older-versions-to-0.6.9-and-later-versions). | +| 0.6.8 | 2022-09-01 | [16259](https://github.com/airbytehq/airbyte/pull/16259) | Emit state messages more frequently | +| 0.6.7 | 2022-08-30 | [16114](https://github.com/airbytehq/airbyte/pull/16114) | Prevent traffic going on an unsecured channel in strict-encryption version of source mysql | +| 0.6.6 | 2022-08-25 | [15993](https://github.com/airbytehq/airbyte/pull/15993) | Improved support for connecting over SSL | +| 0.6.5 | 2022-08-25 | [15917](https://github.com/airbytehq/airbyte/pull/15917) | Fix temporal data type default value bug | +| 0.6.4 | 2022-08-18 | [14356](https://github.com/airbytehq/airbyte/pull/14356) | DB Sources: only show a table can sync incrementally if at least one column can be used as a cursor field | +| 0.6.3 | 2022-08-12 | [15044](https://github.com/airbytehq/airbyte/pull/15044) | Added the ability to connect using different SSL modes and SSL certificates | +| 0.6.2 | 2022-08-11 | [15538](https://github.com/airbytehq/airbyte/pull/15538) | Allow additional properties in db stream state | +| 0.6.1 | 2022-08-02 | [14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | +| 0.6.0 | 2022-07-26 | [14362](https://github.com/airbytehq/airbyte/pull/14362) | Integral columns are now discovered as int64 fields. | +| 0.5.17 | 2022-07-22 | [14714](https://github.com/airbytehq/airbyte/pull/14714) | Clarified error message when invalid cursor column selected | +| 0.5.16 | 2022-07-14 | [14574](https://github.com/airbytehq/airbyte/pull/14574) | Removed additionalProperties:false from JDBC source connectors | +| 0.5.15 | 2022-06-23 | [14077](https://github.com/airbytehq/airbyte/pull/14077) | Use the new state management | +| 0.5.13 | 2022-06-21 | [13945](https://github.com/airbytehq/airbyte/pull/13945) | Aligned datatype test | +| 0.5.12 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | +| 0.5.11 | 2022-05-03 | [12544](https://github.com/airbytehq/airbyte/pull/12544) | Prevent source from hanging under certain circumstances by adding a watcher for orphaned threads. | +| 0.5.10 | 2022-04-29 | [12480](https://github.com/airbytehq/airbyte/pull/12480) | Query tables with adaptive fetch size to optimize JDBC memory consumption | +| 0.5.9 | 2022-04-06 | [11729](https://github.com/airbytehq/airbyte/pull/11729) | Bump mina-sshd from 2.7.0 to 2.8.0 | +| 0.5.6 | 2022-02-21 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Fixed cursor for old connectors that use non-microsecond format. Now connectors work with both formats | +| 0.5.5 | 2022-02-18 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Updated timestamp transformation with microseconds | +| 0.5.4 | 2022-02-11 | [10251](https://github.com/airbytehq/airbyte/issues/10251) | bug Source MySQL CDC: sync failed when has Zero-date value in mandatory column | +| 0.5.2 | 2021-12-14 | [6425](https://github.com/airbytehq/airbyte/issues/6425) | MySQL CDC sync fails because starting binlog position not found in DB | +| 0.5.1 | 2021-12-13 | [8582](https://github.com/airbytehq/airbyte/pull/8582) | Update connector fields title/description | +| 0.5.0 | 2021-12-11 | [7970](https://github.com/airbytehq/airbyte/pull/7970) | Support all MySQL types | +| 0.4.13 | 2021-12-03 | [8335](https://github.com/airbytehq/airbyte/pull/8335) | Source-MySql: do not check cdc required param binlog_row_image for standard replication | +| 0.4.12 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | +| 0.4.11 | 2021-11-19 | [8047](https://github.com/airbytehq/airbyte/pull/8047) | Source MySQL: transform binary data base64 format | +| 0.4.10 | 2021-11-15 | [7820](https://github.com/airbytehq/airbyte/pull/7820) | Added basic performance test | +| 0.4.9 | 2021-11-02 | [7559](https://github.com/airbytehq/airbyte/pull/7559) | Correctly process large unsigned short integer values which may fall outside java's `Short` data type capability | +| 0.4.8 | 2021-09-16 | [6093](https://github.com/airbytehq/airbyte/pull/6093) | Improve reliability of processing various data types like decimals, dates, datetime, binary, and text | +| 0.4.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | +| 0.4.6 | 2021-09-29 | [6510](https://github.com/airbytehq/airbyte/pull/6510) | Support SSL connection | +| 0.4.5 | 2021-09-17 | [6146](https://github.com/airbytehq/airbyte/pull/6146) | Added option to connect to DB via SSH | +| 0.4.1 | 2021-07-23 | [4956](https://github.com/airbytehq/airbyte/pull/4956) | Fix log link | +| 0.3.7 | 2021-06-09 | [3179](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.3.6 | 2021-06-09 | [3966](https://github.com/airbytehq/airbyte/pull/3966) | Fix excessive logging for CDC method | +| 0.3.5 | 2021-06-07 | [3890](https://github.com/airbytehq/airbyte/pull/3890) | Fix CDC handle tinyint\(1\) and boolean types | +| 0.3.4 | 2021-06-04 | [3846](https://github.com/airbytehq/airbyte/pull/3846) | Fix max integer value failure | +| 0.3.3 | 2021-06-02 | [3789](https://github.com/airbytehq/airbyte/pull/3789) | MySQL CDC poll wait 5 minutes when not received a single record | +| 0.3.2 | 2021-06-01 | [3757](https://github.com/airbytehq/airbyte/pull/3757) | MySQL CDC poll 5s to 5 min | +| 0.3.1 | 2021-06-01 | [3505](https://github.com/airbytehq/airbyte/pull/3505) | Implemented MySQL CDC | +| 0.3.0 | 2021-04-21 | [2990](https://github.com/airbytehq/airbyte/pull/2990) | Support namespaces | +| 0.2.5 | 2021-04-15 | [2899](https://github.com/airbytehq/airbyte/pull/2899) | Fix bug in tests | +| 0.2.4 | 2021-03-28 | [2600](https://github.com/airbytehq/airbyte/pull/2600) | Add NCHAR and NVCHAR support to DB and cursor type casting | +| 0.2.3 | 2021-03-26 | [2611](https://github.com/airbytehq/airbyte/pull/2611) | Add an optional `jdbc_url_params` in parameters | +| 0.2.2 | 2021-03-26 | [2460](https://github.com/airbytehq/airbyte/pull/2460) | Destination supports destination sync mode | +| 0.2.1 | 2021-03-18 | [2488](https://github.com/airbytehq/airbyte/pull/2488) | Sources support primary keys | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.10 | 2021-02-02 | [1887](https://github.com/airbytehq/airbyte/pull/1887) | Migrate AbstractJdbcSource to use iterators | +| 0.1.9 | 2021-01-25 | [1746](https://github.com/airbytehq/airbyte/pull/1746) | Fix NPE in State Decorator | +| 0.1.8 | 2021-01-19 | [1724](https://github.com/airbytehq/airbyte/pull/1724) | Fix JdbcSource handling of tables with same names in different schemas | +| 0.1.7 | 2021-01-14 | [1655](https://github.com/airbytehq/airbyte/pull/1655) | Fix JdbcSource OOM | +| 0.1.6 | 2021-01-08 | [1307](https://github.com/airbytehq/airbyte/pull/1307) | Migrate Postgres and MySQL to use new JdbcSource | +| 0.1.5 | 2020-12-11 | [1267](https://github.com/airbytehq/airbyte/pull/1267) | Support incremental sync | +| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | diff --git a/docs/integrations/sources/oracle.md b/docs/integrations/sources/oracle.md index 1e81b7c73fed..e4493f950b19 100644 --- a/docs/integrations/sources/oracle.md +++ b/docs/integrations/sources/oracle.md @@ -20,7 +20,7 @@ The Oracle source does not alter the schema present in your database. Depending On Airbyte Cloud, only TLS connections to your Oracle instance are supported. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/sources/pardot.md b/docs/integrations/sources/pardot.md index f8f304797a39..c4304a8abe0e 100644 --- a/docs/integrations/sources/pardot.md +++ b/docs/integrations/sources/pardot.md @@ -1,7 +1,62 @@ # Pardot +## Overview + The Airbyte Source for [Salesforce Pardot](https://www.pardot.com/) +The Pardot supports full refresh syncs + +### Output schema + +Several output streams are available from this source: + +* [Campaigns](https://developer.salesforce.com/docs/marketing/pardot/guide/campaigns-v4.html) +* [EmailClicks](https://developer.salesforce.com/docs/marketing/pardot/guide/batch-email-clicks-v4.html) +* [ListMembership](https://developer.salesforce.com/docs/marketing/pardot/guide/list-memberships-v4.html) +* [Lists](https://developer.salesforce.com/docs/marketing/pardot/guide/lists-v4.html) +* [ProspectAccounts](https://developer.salesforce.com/docs/marketing/pardot/guide/prospect-accounts-v4.html) +* [Prospects](https://developer.salesforce.com/docs/marketing/pardot/guide/prospects-v4.html) +* [Users](https://developer.salesforce.com/docs/marketing/pardot/guide/users-v4.html) +* [VisitorActivities](https://developer.salesforce.com/docs/marketing/pardot/guide/visitor-activities-v4.html) +* [Visitors](https://developer.salesforce.com/docs/marketing/pardot/guide/visitors-v4.html) +* [Visits](https://developer.salesforce.com/docs/marketing/pardot/guide/visits-v4.html) + +If there are more endpoints you'd like Airbyte to support, please [create an issue.](https://github.com/airbytehq/airbyte/issues/new/choose) + +### Features + +| Feature | Supported? | +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental Sync | No | +| SSL connection | No | +| Namespaces | No | + +### Performance considerations + +The Pardot connector should not run into Pardot API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* Pardot Account +* Pardot Business Unit ID +* Client ID +* Client Secret +* Refresh Token +* Start Date +* Is Sandbox environment? + +### Setup guide + +* `pardot_business_unit_id`: Pardot Business ID, can be found at Setup > Pardot > Pardot Account Setup +* `client_id`: The Consumer Key that can be found when viewing your app in Salesforce +* `client_secret`: The Consumer Secret that can be found when viewing your app in Salesforce +* `refresh_token`: Salesforce Refresh Token used for Airbyte to access your Salesforce account. If you don't know what this is, follow [this guide](https://medium.com/@bpmmendis94/obtain-access-refresh-tokens-from-salesforce-rest-api-a324fe4ccd9b) to retrieve it. +* `start_date`: UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated. Leave blank to skip this filter +* `is_sandbox`: Whether or not the the app is in a Salesforce sandbox. If you do not know what this, assume it is false. + ## Changelog | Version | Date | Pull Request | Subject | diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index aa599ef5389e..1aae30167248 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -84,8 +84,10 @@ The connector is restricted by the Pinterest [requests limitation](https://devel ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :------------------------------------------------------- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :------------------------------------------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.8.1 | 2023-11-16 | [32601](https://github.com/airbytehq/airbyte/pull/32601) | added ability to create custom reports | +| 0.8.0 | 2023-11-16 | [32592](https://github.com/airbytehq/airbyte/pull/32592) | Make start_date optional; add suggested streams; add missing fields | | 0.7.2 | 2023-11-08 | [32299](https://github.com/airbytehq/airbyte/pull/32299) | added default `AvailabilityStrategy`, fixed bug which cases duplicated requests, added new streams: Catalogs, CatalogsFeeds, CatalogsProductGroups, Audiences, Keywords, ConversionTags, CustomerLists, CampaignTargetingReport, AdvertizerReport, AdvertizerTargetingReport, AdGroupReport, AdGroupTargetingReport, PinPromotionReport, PinPromotionTargetingReport, ProductGroupReport, ProductGroupTargetingReport, ProductItemReport, KeywordReport | | 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | | 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | diff --git a/docs/integrations/sources/pokeapi.md b/docs/integrations/sources/pokeapi.md index 4290a6073023..ee543b33e024 100644 --- a/docs/integrations/sources/pokeapi.md +++ b/docs/integrations/sources/pokeapi.md @@ -4,7 +4,7 @@ The PokéAPI is primarly used as a tutorial and educational resource, as it requires zero dependencies. Learn how Airbyte and this connector works with these tutorials: -- [Airbyte Quickstart: An Introduction to Deploying and Syncing](../../quickstart/deploy-airbyte.md) +- [Airbyte Quickstart: An Introduction to Deploying and Syncing](../../using-airbyte/getting-started/readme.md) - [Airbyte CDK Speedrun: A Quick Primer on Building Source Connectors](../../connector-development/tutorials/cdk-speedrun.md) - [How to Build ETL Sources in Under 30 Minutes: A Video Tutorial](https://www.youtube.com/watch?v=kJ3hLoNfz_E&t=13s&ab_channel=Airbyte) @@ -24,7 +24,7 @@ This source uses the fully open [PokéAPI](https://pokeapi.co/docs/v2#info) to s Currently, only one output stream is available from this source, which is the Pokémon output stream. This schema is defined [here](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-pokeapi/source_pokeapi/schemas/pokemon.json). -## Rate Limiting & Performance Considerations \(Airbyte Open-Source\) +## Rate Limiting & Performance Considerations \(Airbyte Open Source\) According to the API's [fair use policy](https://pokeapi.co/docs/v2#fairuse), please make sure to cache resources retrieved from the PokéAPI wherever possible. That said, the PokéAPI does not perform rate limiting. diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 6c09d3aabd75..e9d7a5928d3b 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -54,7 +54,7 @@ To fill out the required information: #### Step 3: (Airbyte Cloud Only) Allow inbound traffic from Airbyte IPs. If you are on Airbyte Cloud, you will always need to modify your database configuration to allow inbound traffic from Airbyte IPs. You can find a list of all IPs that need to be allowlisted in -our [Airbyte Security docs](../../../operator-guides/security#network-security-1). +our [Airbyte Security docs](../../operating-airbyte/security#network-security-1). Now, click `Set up source` in the Airbyte UI. Airbyte will now test connecting to your database. Once this succeeds, you've configured an Airbyte Postgres source! @@ -291,8 +291,9 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 3.2.21 | 2023-11-07 | [31856](https://github.com/airbytehq/airbyte/pull/31856) | handle date/timestamp infinity values properly | -| 3.2.20 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | +| 3.2.22 | 2023-11-22 | [32656](https://github.com/airbytehq/airbyte/pull/32656) | Adopt java CDK version 0.5.0. | +| 3.2.21 | 2023-11-07 | [31856](https://github.com/airbytehq/airbyte/pull/31856) | handle date/timestamp infinity values properly | +| 3.2.20 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | | 3.2.19 | 2023-11-03 | [32050](https://github.com/airbytehq/airbyte/pull/32050) | Adopt java CDK version 0.4.0. | | 3.2.18 | 2023-11-01 | [29038](https://github.com/airbytehq/airbyte/pull/29038) | Fix typo (s/Airbtye/Airbyte/) | | 3.2.17 | 2023-11-01 | [32068](https://github.com/airbytehq/airbyte/pull/32068) | Bump Debezium 2.2.0Final -> 2.4.0Final | diff --git a/docs/integrations/sources/postgres/cloud-sql-postgres.md b/docs/integrations/sources/postgres/cloud-sql-postgres.md index 9a3f9e6e01a0..670d268f82d3 100644 --- a/docs/integrations/sources/postgres/cloud-sql-postgres.md +++ b/docs/integrations/sources/postgres/cloud-sql-postgres.md @@ -58,7 +58,7 @@ If you are on Airbyte Cloud, you will always need to modify your database config ![Add a Network](./assets/airbyte_cloud_sql_postgres_add_network.png) -2. Add a new network, and enter the Airbyte's IPs, which you can find in our [Airbyte Security documentation](../../../operator-guides/security#network-security-1). +2. Add a new network, and enter the Airbyte's IPs, which you can find in our [Airbyte Security documentation](../../../operating-airbyte/security#network-security-1). Now, click `Set up source` in the Airbyte UI. Airbyte will now test connecting to your database. Once this succeeds, you've configured an Airbyte Postgres source! diff --git a/docs/integrations/sources/redshift.md b/docs/integrations/sources/redshift.md index dafe396d2684..f7d84b6e06d2 100644 --- a/docs/integrations/sources/redshift.md +++ b/docs/integrations/sources/redshift.md @@ -56,6 +56,7 @@ All Redshift connections are encrypted using SSL | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | +| (none) | 2023-11-17 | [32616](https://github.com/airbytehq/airbyte/pull/32616) | Improve timestamptz handling | | 0.4.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | | 0.3.17 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | | 0.3.16 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources | diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index bc5652a0c934..a0e2e8ec9d70 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -256,7 +256,8 @@ To perform the text extraction from PDF and Docx files, the connector uses the [ | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| -| 4.2.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema | +| 4.2.2 | 2023-11-20 | [32677](https://github.com/airbytehq/airbyte/pull/32677) | Only read files with ".zip" extension as zipped files | +| 4.2.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema | | 4.2.0 | 2023-11-02 | [32109](https://github.com/airbytehq/airbyte/pull/32109) | Fix docs; add HTTPS validation for S3 endpoint; fix coverage | | 4.1.4 | 2023-10-30 | [31904](https://github.com/airbytehq/airbyte/pull/31904) | Update CDK | | 4.1.3 | 2023-10-25 | [31654](https://github.com/airbytehq/airbyte/pull/31654) | Reduce image size | diff --git a/docs/integrations/sources/stripe-migrations.md b/docs/integrations/sources/stripe-migrations.md index 5dc7fa19f9b5..60f4be4d4ab4 100644 --- a/docs/integrations/sources/stripe-migrations.md +++ b/docs/integrations/sources/stripe-migrations.md @@ -1,5 +1,20 @@ # Stripe Migration Guide +## Upgrading to 5.0.0 + +This change fixes multiple incremental sync issues with the `Refunds`, `Checkout Sessions` and `Checkout Sessions Line Items` streams: + - `Refunds` stream was not syncing data in the incremental sync mode. Cursor field has been updated to "created" to allow for incremental syncs. Because of the changed cursor field of the `Refunds` stream, incremental syncs will not reflect every update of the records that have been previously replicated. Only newly created records will be synced. To always have the up-to-date data, users are encouraged to make use of the lookback window. + - `CheckoutSessions` stream had been missing data for one day when using the incremental sync mode after a reset; this has been resolved. + - `CheckoutSessionsLineItems` previously had potential data loss. It has been updated to use a new cursor field `checkout_session_updated`. + - Incremental streams with the `created` cursor had been duplicating some data; this has been fixed. + +Stream schema update is a breaking change as well as changing the cursor field for the `Refunds` and the `CheckoutSessionsLineItems` stream. A schema refresh and data reset of all effected streams is required after the update is applied. + +Also, this update affects three more streams: `Invoices`, `Subscriptions`, `SubscriptionSchedule`. Schemas are changed in this update so that the declared data types would match the actual data. + +Stream schema update is a breaking change as well as changing the cursor field for the `Refunds` and the `CheckoutSessionsLineItems` stream. A schema refresh and data reset of all effected streams is required after the update is applied. +Because of the changed cursor field of the `Refunds` stream, incremental syncs will not reflect every update of the records that have been previously replicated. Only newly created records will be synced. To always have the up-to-date data, users are encouraged to make use of the lookback window. + ## Upgrading to 4.0.0 A major update of most streams to support event-based incremental sync mode. This allows the connector to pull not only the newly created data since the last sync, but the modified data as well. diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index 06bea065cf1f..c873eae04833 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -36,7 +36,7 @@ For more information on Stripe API Keys, see the [Stripe documentation](https:// 5. For **Account ID**, enter your Stripe Account ID. This ID begins with `acct_`, and can be found in the top-right corner of your Stripe [account settings page](https://dashboard.stripe.com/settings/account). 6. For **Secret Key**, enter the restricted key you created for the connection. 7. For **Replication Start Date**, use the provided datepicker or enter a UTC date and time programmatically in the format `YYYY-MM-DDTHH:mm:ssZ`. The data added on and after this date will be replicated. -8. (Optional) For **Lookback Window**, you may specify a number of days from the present day to reread data. This allows the connector to retrieve data that might have been updated after its initial creation, and is useful for handling any post-transaction adjustments. This applies only to streams that do not support event-based incremental syncs, please see the list below. +8. (Optional) For **Lookback Window**, you may specify a number of days from the present day to reread data. This allows the connector to retrieve data that might have been updated after its initial creation, and is useful for handling any post-transaction adjustments. This applies only to streams that do not support event-based incremental syncs, please see [the list below](#troubleshooting). - Leaving the **Lookback Window** at its default value of 0 means Airbyte will not re-export data after it has been synced. - Setting the **Lookback Window** to 1 means Airbyte will re-export data from the past day, capturing any changes made in the last 24 hours. @@ -143,26 +143,27 @@ Please be aware: this also means that any change older than 30 days will not be ::: ### Troubleshooting -:::note + Since the Stripe API does not allow querying objects which were updated since the last sync, the Stripe connector uses the Events API under the hood to implement incremental syncs and export data based on its update date. However, not all the entities are supported by the Events API, so the Stripe connector uses the `created` field or its analogue to query for new data in your Stripe account. These are the entities synced based on the date of creation: -- `BalanceTransactions` -- `CheckoutSessionLineItems` (cursor field is `checkout_session_expires_at`) +- `Balance Transactions` - `Events` -- `FileLinks` +- `File Links` - `Files` -- `SetupAttempts` -- `ShippingRates` +- `Refunds` +- `Setup Attempts` +- `Shipping Rates` On the other hand, the following streams use the `updated` field value as a cursor: - `Application Fees` - `Application Fee Refunds` - `Authorizations` -- `Bank accounts` +- `Bank Accounts` - `Cardholders` - `Cards` - `Charges` - `Checkout Sessions` +- `Checkout Session Line Items` (cursor field is `checkout_session_updated`) - `Coupons` - `Credit Notes` - `Customer Balance Transactions` @@ -180,7 +181,6 @@ On the other hand, the following streams use the `updated` field value as a curs - `Plans` - `Prices` - `Products` -- `Refunds` - `Reviews` - `Setup Intents` - `Subscription Schedule` @@ -188,9 +188,9 @@ On the other hand, the following streams use the `updated` field value as a curs - `Top Ups` - `Transactions` - `Transfers` -::: -:::note +## Incremental deletes + The Stripe API also provides a way to implement incremental deletes for a limited number of streams: - `Bank Accounts` - `Coupons` @@ -206,97 +206,100 @@ The Stripe API also provides a way to implement incremental deletes for a limite - `Subscriptions` Each record is marked with `is_deleted` flag when the appropriate event happens upstream. -::: - * Check out common troubleshooting issues for the Stripe source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions). +### Data type mapping + ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| -| 4.5.3 | 2023-11-14 | [32473](https://github.com/airbytehq/airbyte/pull/32473/) | Have all full_refresh stream syncs be concurrent | -| 4.5.2 | 2023-11-03 | [32146](https://github.com/airbytehq/airbyte/pull/32146/) | Fix multiple BankAccount issues | -| 4.5.1 | 2023-11-01 | [32056](https://github.com/airbytehq/airbyte/pull/32056/) | Use CDK version 0.52.8 | -| 4.5.0 | 2023-10-25 | [31327](https://github.com/airbytehq/airbyte/pull/31327/) | Use concurrent CDK when running in full-refresh | -| 4.4.2 | 2023-10-24 | [31764](https://github.com/airbytehq/airbyte/pull/31764) | Base image migration: remove Dockerfile and use the python-connector-base image | -| 4.4.1 | 2023-10-18 | [31553](https://github.com/airbytehq/airbyte/pull/31553) | Adjusted `Setup Attempts` and extended `Checkout Sessions` stream schemas | -| 4.4.0 | 2023-10-04 | [31046](https://github.com/airbytehq/airbyte/pull/31046) | Added margins field to invoice_line_items stream. | -| 4.3.1 | 2023-09-27 | [30800](https://github.com/airbytehq/airbyte/pull/30800) | Handle permission issues a non breaking | -| 4.3.0 | 2023-09-26 | [30752](https://github.com/airbytehq/airbyte/pull/30752) | Do not sync upcoming invoices, extend stream schemas | -| 4.2.0 | 2023-09-21 | [30660](https://github.com/airbytehq/airbyte/pull/30660) | Fix updated state for the incremental syncs | -| 4.1.1 | 2023-09-15 | [30494](https://github.com/airbytehq/airbyte/pull/30494) | Fix datatype of invoices.lines property | -| 4.1.0 | 2023-08-29 | [29950](https://github.com/airbytehq/airbyte/pull/29950) | Implement incremental deletes, add suggested streams | -| 4.0.1 | 2023-09-07 | [30254](https://github.com/airbytehq/airbyte/pull/30254) | Fix cursorless incremental streams | -| 4.0.0 | 2023-08-15 | [29330](https://github.com/airbytehq/airbyte/pull/29330) | Implement incremental syncs based on date of update | -| 3.17.4 | 2023-08-15 | [29425](https://github.com/airbytehq/airbyte/pull/29425) | Revert 3.17.3 | -| 3.17.3 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Revert 3.17.2 and fix atm_fee property | -| 3.17.2 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Fix stream schemas, remove custom 403 error handling | -| 3.17.1 | 2023-08-01 | [28887](https://github.com/airbytehq/airbyte/pull/28887) | Fix `Invoices` schema | -| 3.17.0 | 2023-07-28 | [26127](https://github.com/airbytehq/airbyte/pull/26127) | Add `Prices` stream | -| 3.16.0 | 2023-07-27 | [28776](https://github.com/airbytehq/airbyte/pull/28776) | Add new fields to stream schemas | -| 3.15.0 | 2023-07-09 | [28709](https://github.com/airbytehq/airbyte/pull/28709) | Remove duplicate streams | -| 3.14.0 | 2023-07-09 | [27217](https://github.com/airbytehq/airbyte/pull/27217) | Add `ShippingRates` stream | -| 3.13.0 | 2023-07-18 | [28466](https://github.com/airbytehq/airbyte/pull/28466) | Pin source API version | -| 3.12.0 | 2023-05-20 | [26208](https://github.com/airbytehq/airbyte/pull/26208) | Add new stream `Persons` | -| 3.11.0 | 2023-06-26 | [27734](https://github.com/airbytehq/airbyte/pull/27734) | License Update: Elv2 stream | -| 3.10.0 | 2023-06-22 | [27132](https://github.com/airbytehq/airbyte/pull/27132) | Add `CreditNotes` stream | -| 3.9.1 | 2023-06-20 | [27522](https://github.com/airbytehq/airbyte/pull/27522) | Fix formatting | -| 3.9.0 | 2023-06-19 | [27362](https://github.com/airbytehq/airbyte/pull/27362) | Add new Streams: Transfer Reversals, Setup Attempts, Usage Records, Transactions | -| 3.8.0 | 2023-06-12 | [27238](https://github.com/airbytehq/airbyte/pull/27238) | Add `Topups` stream; Add `Files` stream; Add `FileLinks` stream | -| 3.7.0 | 2023-06-06 | [27083](https://github.com/airbytehq/airbyte/pull/27083) | Add new Streams: Authorizations, Cardholders, Cards, Payment Methods, Reviews | -| 3.6.0 | 2023-05-24 | [25893](https://github.com/airbytehq/airbyte/pull/25893) | Add `ApplicationFeesRefunds` stream with parent `ApplicationFees` | -| 3.5.0 | 2023-05-20 | [22859](https://github.com/airbytehq/airbyte/pull/22859) | Add stream `Early Fraud Warnings` | -| 3.4.3 | 2023-05-10 | [25965](https://github.com/airbytehq/airbyte/pull/25965) | Fix Airbyte date-time data-types | -| 3.4.2 | 2023-05-04 | [25795](https://github.com/airbytehq/airbyte/pull/25795) | Added `CDK TypeTransformer` to guarantee declared JSON Schema data-types | -| 3.4.1 | 2023-04-24 | [23389](https://github.com/airbytehq/airbyte/pull/23389) | Add `customer_tax_ids` to `Invoices` | -| 3.4.0 | 2023-03-20 | [23963](https://github.com/airbytehq/airbyte/pull/23963) | Add `SetupIntents` stream | -| 3.3.0 | 2023-04-12 | [25136](https://github.com/airbytehq/airbyte/pull/25136) | Add stream `Accounts` | -| 3.2.0 | 2023-04-10 | [23624](https://github.com/airbytehq/airbyte/pull/23624) | Add new stream `Subscription Schedule` | -| 3.1.0 | 2023-03-10 | [19906](https://github.com/airbytehq/airbyte/pull/19906) | Expand `tiers` when syncing `Plans` streams | -| 3.0.5 | 2023-03-25 | [22866](https://github.com/airbytehq/airbyte/pull/22866) | Specified date formatting in specification | -| 3.0.4 | 2023-03-24 | [24471](https://github.com/airbytehq/airbyte/pull/24471) | Fix stream slices for single sliced streams | -| 3.0.3 | 2023-03-17 | [24179](https://github.com/airbytehq/airbyte/pull/24179) | Get customer's attributes safely | -| 3.0.2 | 2023-03-13 | [24051](https://github.com/airbytehq/airbyte/pull/24051) | Cache `customers` stream; Do not request transactions of customers with zero balance. | -| 3.0.1 | 2023-02-22 | [22898](https://github.com/airbytehq/airbyte/pull/22898) | Add missing column to Subscriptions stream | -| 3.0.0 | 2023-02-21 | [23295](https://github.com/airbytehq/airbyte/pull/23295) | Fix invoice schema | -| 2.0.0 | 2023-02-14 | [22312](https://github.com/airbytehq/airbyte/pull/22312) | Another fix of `Invoices` stream schema + Remove http urls from openapi_spec.json | -| 1.0.2 | 2023-02-09 | [22659](https://github.com/airbytehq/airbyte/pull/22659) | Set `AvailabilityStrategy` for all streams | -| 1.0.1 | 2023-01-27 | [22042](https://github.com/airbytehq/airbyte/pull/22042) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 1.0.0 | 2023-01-25 | [21858](https://github.com/airbytehq/airbyte/pull/21858) | Update the `Subscriptions` and `Invoices` stream schemas | -| 0.1.40 | 2022-10-20 | [18228](https://github.com/airbytehq/airbyte/pull/18228) | Update the `PaymentIntents` stream schema | -| 0.1.39 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream states. | -| 0.1.38 | 2022-09-09 | [16537](https://github.com/airbytehq/airbyte/pull/16537) | Fix `redeem_by` field type for `customers` stream | -| 0.1.37 | 2022-08-16 | [15686](https://github.com/airbytehq/airbyte/pull/15686) | Fix the bug when the stream couldn't be fetched due to limited permission set, if so - it should be skipped | -| 0.1.36 | 2022-08-04 | [15292](https://github.com/airbytehq/airbyte/pull/15292) | Implement slicing | -| 0.1.35 | 2022-07-21 | [14924](https://github.com/airbytehq/airbyte/pull/14924) | Remove `additionalProperties` field from spec and schema | -| 0.1.34 | 2022-07-01 | [14357](https://github.com/airbytehq/airbyte/pull/14357) | Add external account streams - | -| 0.1.33 | 2022-06-06 | [13449](https://github.com/airbytehq/airbyte/pull/13449) | Add semi-incremental support for CheckoutSessions and CheckoutSessionsLineItems streams, fixed big in StripeSubStream, added unittests, updated docs | -| 0.1.32 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | -| 0.1.31 | 2022-04-20 | [12230](https://github.com/airbytehq/airbyte/pull/12230) | Update connector to use a `spec.yaml` | -| 0.1.30 | 2022-03-21 | [11286](https://github.com/airbytehq/airbyte/pull/11286) | Minor corrections to documentation and connector specification | -| 0.1.29 | 2022-03-08 | [10359](https://github.com/airbytehq/airbyte/pull/10359) | Improved performance for streams with substreams: invoice_line_items, subscription_items, bank_accounts | -| 0.1.28 | 2022-02-08 | [10165](https://github.com/airbytehq/airbyte/pull/10165) | Improve 404 handling for `CheckoutSessionsLineItems` stream | -| 0.1.27 | 2021-12-28 | [9148](https://github.com/airbytehq/airbyte/pull/9148) | Fix `date`, `arrival\_date` fields | -| 0.1.26 | 2021-12-21 | [8992](https://github.com/airbytehq/airbyte/pull/8992) | Fix type `events.request` in schema | -| 0.1.25 | 2021-11-25 | [8250](https://github.com/airbytehq/airbyte/pull/8250) | Rearrange setup fields | -| 0.1.24 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Include tax data in `checkout_sessions_line_items` stream | -| 0.1.23 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Correct `payment_intents` schema | -| 0.1.22 | 2021-11-05 | [7345](https://github.com/airbytehq/airbyte/pull/7345) | Add 3 new streams | -| 0.1.21 | 2021-10-07 | [6841](https://github.com/airbytehq/airbyte/pull/6841) | Fix missing `start_date` argument + update json files for SAT | -| 0.1.20 | 2021-09-30 | [6017](https://github.com/airbytehq/airbyte/pull/6017) | Add lookback_window_days parameter | -| 0.1.19 | 2021-09-27 | [6466](https://github.com/airbytehq/airbyte/pull/6466) | Use `start_date` parameter in incremental streams | -| 0.1.18 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Fix coupons and subscriptions stream schemas by removing incorrect timestamp formatting | -| 0.1.17 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Add `PaymentIntents` stream | -| 0.1.16 | 2021-07-28 | [4980](https://github.com/airbytehq/airbyte/pull/4980) | Remove Updated field from schemas | -| 0.1.15 | 2021-07-21 | [4878](https://github.com/airbytehq/airbyte/pull/4878) | Fix incorrect percent_off and discounts data filed types | -| 0.1.14 | 2021-07-09 | [4669](https://github.com/airbytehq/airbyte/pull/4669) | Subscriptions Stream now returns all kinds of subscriptions \(including expired and canceled\) | -| 0.1.13 | 2021-07-03 | [4528](https://github.com/airbytehq/airbyte/pull/4528) | Remove regex for acc validation | -| 0.1.12 | 2021-06-08 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | -| 0.1.11 | 2021-05-30 | [3744](https://github.com/airbytehq/airbyte/pull/3744) | Fix types in schema | -| 0.1.10 | 2021-05-28 | [3728](https://github.com/airbytehq/airbyte/pull/3728) | Update data types to be number instead of int | -| 0.1.9 | 2021-05-13 | [3367](https://github.com/airbytehq/airbyte/pull/3367) | Add acceptance tests for connected accounts | -| 0.1.8 | 2021-05-11 | [3566](https://github.com/airbytehq/airbyte/pull/3368) | Bump CDK connectors | - - \ No newline at end of file +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 5.0.1 | 2023-11-17 | [32638](https://github.com/airbytehq/airbyte/pull/32638/) | Availability stretegy: check availability of both endpoints (if applicable) - common API + events API | +| 5.0.0 | 2023-11-16 | [32286](https://github.com/airbytehq/airbyte/pull/32286/) | Fix multiple issues regarding usage of the incremental sync mode for the `Refunds`, `CheckoutSessions`, `CheckoutSessionsLineItems` streams. Fix schemas for the streams: `Invoices`, `Subscriptions`, `SubscriptionSchedule` | +| 4.5.4 | 2023-11-16 | [32284](https://github.com/airbytehq/airbyte/pull/32284/) | Enable client-side rate limiting | +| 4.5.3 | 2023-11-14 | [32473](https://github.com/airbytehq/airbyte/pull/32473/) | Have all full_refresh stream syncs be concurrent | +| 4.5.2 | 2023-11-03 | [32146](https://github.com/airbytehq/airbyte/pull/32146/) | Fix multiple BankAccount issues | +| 4.5.1 | 2023-11-01 | [32056](https://github.com/airbytehq/airbyte/pull/32056/) | Use CDK version 0.52.8 | +| 4.5.0 | 2023-10-25 | [31327](https://github.com/airbytehq/airbyte/pull/31327/) | Use concurrent CDK when running in full-refresh | +| 4.4.2 | 2023-10-24 | [31764](https://github.com/airbytehq/airbyte/pull/31764) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 4.4.1 | 2023-10-18 | [31553](https://github.com/airbytehq/airbyte/pull/31553) | Adjusted `Setup Attempts` and extended `Checkout Sessions` stream schemas | +| 4.4.0 | 2023-10-04 | [31046](https://github.com/airbytehq/airbyte/pull/31046) | Added margins field to invoice_line_items stream. | +| 4.3.1 | 2023-09-27 | [30800](https://github.com/airbytehq/airbyte/pull/30800) | Handle permission issues a non breaking | +| 4.3.0 | 2023-09-26 | [30752](https://github.com/airbytehq/airbyte/pull/30752) | Do not sync upcoming invoices, extend stream schemas | +| 4.2.0 | 2023-09-21 | [30660](https://github.com/airbytehq/airbyte/pull/30660) | Fix updated state for the incremental syncs | +| 4.1.1 | 2023-09-15 | [30494](https://github.com/airbytehq/airbyte/pull/30494) | Fix datatype of invoices.lines property | +| 4.1.0 | 2023-08-29 | [29950](https://github.com/airbytehq/airbyte/pull/29950) | Implement incremental deletes, add suggested streams | +| 4.0.1 | 2023-09-07 | [30254](https://github.com/airbytehq/airbyte/pull/30254) | Fix cursorless incremental streams | +| 4.0.0 | 2023-08-15 | [29330](https://github.com/airbytehq/airbyte/pull/29330) | Implement incremental syncs based on date of update | +| 3.17.4 | 2023-08-15 | [29425](https://github.com/airbytehq/airbyte/pull/29425) | Revert 3.17.3 | +| 3.17.3 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Revert 3.17.2 and fix atm_fee property | +| 3.17.2 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Fix stream schemas, remove custom 403 error handling | +| 3.17.1 | 2023-08-01 | [28887](https://github.com/airbytehq/airbyte/pull/28887) | Fix `Invoices` schema | +| 3.17.0 | 2023-07-28 | [26127](https://github.com/airbytehq/airbyte/pull/26127) | Add `Prices` stream | +| 3.16.0 | 2023-07-27 | [28776](https://github.com/airbytehq/airbyte/pull/28776) | Add new fields to stream schemas | +| 3.15.0 | 2023-07-09 | [28709](https://github.com/airbytehq/airbyte/pull/28709) | Remove duplicate streams | +| 3.14.0 | 2023-07-09 | [27217](https://github.com/airbytehq/airbyte/pull/27217) | Add `ShippingRates` stream | +| 3.13.0 | 2023-07-18 | [28466](https://github.com/airbytehq/airbyte/pull/28466) | Pin source API version | +| 3.12.0 | 2023-05-20 | [26208](https://github.com/airbytehq/airbyte/pull/26208) | Add new stream `Persons` | +| 3.11.0 | 2023-06-26 | [27734](https://github.com/airbytehq/airbyte/pull/27734) | License Update: Elv2 stream | +| 3.10.0 | 2023-06-22 | [27132](https://github.com/airbytehq/airbyte/pull/27132) | Add `CreditNotes` stream | +| 3.9.1 | 2023-06-20 | [27522](https://github.com/airbytehq/airbyte/pull/27522) | Fix formatting | +| 3.9.0 | 2023-06-19 | [27362](https://github.com/airbytehq/airbyte/pull/27362) | Add new Streams: Transfer Reversals, Setup Attempts, Usage Records, Transactions | +| 3.8.0 | 2023-06-12 | [27238](https://github.com/airbytehq/airbyte/pull/27238) | Add `Topups` stream; Add `Files` stream; Add `FileLinks` stream | +| 3.7.0 | 2023-06-06 | [27083](https://github.com/airbytehq/airbyte/pull/27083) | Add new Streams: Authorizations, Cardholders, Cards, Payment Methods, Reviews | +| 3.6.0 | 2023-05-24 | [25893](https://github.com/airbytehq/airbyte/pull/25893) | Add `ApplicationFeesRefunds` stream with parent `ApplicationFees` | +| 3.5.0 | 2023-05-20 | [22859](https://github.com/airbytehq/airbyte/pull/22859) | Add stream `Early Fraud Warnings` | +| 3.4.3 | 2023-05-10 | [25965](https://github.com/airbytehq/airbyte/pull/25965) | Fix Airbyte date-time data-types | +| 3.4.2 | 2023-05-04 | [25795](https://github.com/airbytehq/airbyte/pull/25795) | Added `CDK TypeTransformer` to guarantee declared JSON Schema data-types | +| 3.4.1 | 2023-04-24 | [23389](https://github.com/airbytehq/airbyte/pull/23389) | Add `customer_tax_ids` to `Invoices` | +| 3.4.0 | 2023-03-20 | [23963](https://github.com/airbytehq/airbyte/pull/23963) | Add `SetupIntents` stream | +| 3.3.0 | 2023-04-12 | [25136](https://github.com/airbytehq/airbyte/pull/25136) | Add stream `Accounts` | +| 3.2.0 | 2023-04-10 | [23624](https://github.com/airbytehq/airbyte/pull/23624) | Add new stream `Subscription Schedule` | +| 3.1.0 | 2023-03-10 | [19906](https://github.com/airbytehq/airbyte/pull/19906) | Expand `tiers` when syncing `Plans` streams | +| 3.0.5 | 2023-03-25 | [22866](https://github.com/airbytehq/airbyte/pull/22866) | Specified date formatting in specification | +| 3.0.4 | 2023-03-24 | [24471](https://github.com/airbytehq/airbyte/pull/24471) | Fix stream slices for single sliced streams | +| 3.0.3 | 2023-03-17 | [24179](https://github.com/airbytehq/airbyte/pull/24179) | Get customer's attributes safely | +| 3.0.2 | 2023-03-13 | [24051](https://github.com/airbytehq/airbyte/pull/24051) | Cache `customers` stream; Do not request transactions of customers with zero balance. | +| 3.0.1 | 2023-02-22 | [22898](https://github.com/airbytehq/airbyte/pull/22898) | Add missing column to Subscriptions stream | +| 3.0.0 | 2023-02-21 | [23295](https://github.com/airbytehq/airbyte/pull/23295) | Fix invoice schema | +| 2.0.0 | 2023-02-14 | [22312](https://github.com/airbytehq/airbyte/pull/22312) | Another fix of `Invoices` stream schema + Remove http urls from openapi_spec.json | +| 1.0.2 | 2023-02-09 | [22659](https://github.com/airbytehq/airbyte/pull/22659) | Set `AvailabilityStrategy` for all streams | +| 1.0.1 | 2023-01-27 | [22042](https://github.com/airbytehq/airbyte/pull/22042) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 1.0.0 | 2023-01-25 | [21858](https://github.com/airbytehq/airbyte/pull/21858) | Update the `Subscriptions` and `Invoices` stream schemas | +| 0.1.40 | 2022-10-20 | [18228](https://github.com/airbytehq/airbyte/pull/18228) | Update the `PaymentIntents` stream schema | +| 0.1.39 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream states. | +| 0.1.38 | 2022-09-09 | [16537](https://github.com/airbytehq/airbyte/pull/16537) | Fix `redeem_by` field type for `customers` stream | +| 0.1.37 | 2022-08-16 | [15686](https://github.com/airbytehq/airbyte/pull/15686) | Fix the bug when the stream couldn't be fetched due to limited permission set, if so - it should be skipped | +| 0.1.36 | 2022-08-04 | [15292](https://github.com/airbytehq/airbyte/pull/15292) | Implement slicing | +| 0.1.35 | 2022-07-21 | [14924](https://github.com/airbytehq/airbyte/pull/14924) | Remove `additionalProperties` field from spec and schema | +| 0.1.34 | 2022-07-01 | [14357](https://github.com/airbytehq/airbyte/pull/14357) | Add external account streams - | +| 0.1.33 | 2022-06-06 | [13449](https://github.com/airbytehq/airbyte/pull/13449) | Add semi-incremental support for CheckoutSessions and CheckoutSessionsLineItems streams, fixed big in StripeSubStream, added unittests, updated docs | +| 0.1.32 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | +| 0.1.31 | 2022-04-20 | [12230](https://github.com/airbytehq/airbyte/pull/12230) | Update connector to use a `spec.yaml` | +| 0.1.30 | 2022-03-21 | [11286](https://github.com/airbytehq/airbyte/pull/11286) | Minor corrections to documentation and connector specification | +| 0.1.29 | 2022-03-08 | [10359](https://github.com/airbytehq/airbyte/pull/10359) | Improved performance for streams with substreams: invoice_line_items, subscription_items, bank_accounts | +| 0.1.28 | 2022-02-08 | [10165](https://github.com/airbytehq/airbyte/pull/10165) | Improve 404 handling for `CheckoutSessionsLineItems` stream | +| 0.1.27 | 2021-12-28 | [9148](https://github.com/airbytehq/airbyte/pull/9148) | Fix `date`, `arrival\_date` fields | +| 0.1.26 | 2021-12-21 | [8992](https://github.com/airbytehq/airbyte/pull/8992) | Fix type `events.request` in schema | +| 0.1.25 | 2021-11-25 | [8250](https://github.com/airbytehq/airbyte/pull/8250) | Rearrange setup fields | +| 0.1.24 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Include tax data in `checkout_sessions_line_items` stream | +| 0.1.23 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Correct `payment_intents` schema | +| 0.1.22 | 2021-11-05 | [7345](https://github.com/airbytehq/airbyte/pull/7345) | Add 3 new streams | +| 0.1.21 | 2021-10-07 | [6841](https://github.com/airbytehq/airbyte/pull/6841) | Fix missing `start_date` argument + update json files for SAT | +| 0.1.20 | 2021-09-30 | [6017](https://github.com/airbytehq/airbyte/pull/6017) | Add lookback_window_days parameter | +| 0.1.19 | 2021-09-27 | [6466](https://github.com/airbytehq/airbyte/pull/6466) | Use `start_date` parameter in incremental streams | +| 0.1.18 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Fix coupons and subscriptions stream schemas by removing incorrect timestamp formatting | +| 0.1.17 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Add `PaymentIntents` stream | +| 0.1.16 | 2021-07-28 | [4980](https://github.com/airbytehq/airbyte/pull/4980) | Remove Updated field from schemas | +| 0.1.15 | 2021-07-21 | [4878](https://github.com/airbytehq/airbyte/pull/4878) | Fix incorrect percent_off and discounts data filed types | +| 0.1.14 | 2021-07-09 | [4669](https://github.com/airbytehq/airbyte/pull/4669) | Subscriptions Stream now returns all kinds of subscriptions \(including expired and canceled\) | +| 0.1.13 | 2021-07-03 | [4528](https://github.com/airbytehq/airbyte/pull/4528) | Remove regex for acc validation | +| 0.1.12 | 2021-06-08 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | +| 0.1.11 | 2021-05-30 | [3744](https://github.com/airbytehq/airbyte/pull/3744) | Fix types in schema | +| 0.1.10 | 2021-05-28 | [3728](https://github.com/airbytehq/airbyte/pull/3728) | Update data types to be number instead of int | +| 0.1.9 | 2021-05-13 | [3367](https://github.com/airbytehq/airbyte/pull/3367) | Add acceptance tests for connected accounts | +| 0.1.8 | 2021-05-11 | [3566](https://github.com/airbytehq/airbyte/pull/3368) | Bump CDK connectors | + + diff --git a/docs/operator-guides/security.md b/docs/operating-airbyte/security.md similarity index 97% rename from docs/operator-guides/security.md rename to docs/operating-airbyte/security.md index a887e8bd5b91..7f1b10973bd6 100644 --- a/docs/operator-guides/security.md +++ b/docs/operating-airbyte/security.md @@ -1,4 +1,4 @@ -# Airbyte Security +# Security Airbyte is committed to keeping your data safe by following industry-standard practices for securing physical deployments, setting access policies, and leveraging the security features of leading Cloud providers. @@ -142,7 +142,7 @@ Airbyte Cloud allows you to log in to the platform using your email and password ### Access Control -Airbyte Cloud supports [user management](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace#add-users-to-your-workspace) but doesn’t support role-based access control (RBAC) yet. +Airbyte Cloud supports [user management](/using-airbyte/workspaces.md#add-users-to-your-workspace) but doesn’t support role-based access control (RBAC) yet. ### Compliance diff --git a/docs/operator-guides/browsing-output-logs.md b/docs/operator-guides/browsing-output-logs.md index 456965c21904..19de2cdcb6b6 100644 --- a/docs/operator-guides/browsing-output-logs.md +++ b/docs/operator-guides/browsing-output-logs.md @@ -1,29 +1,49 @@ -# Browsing Output Logs +# Browsing Logs ## Overview -This tutorial will describe how to explore Airbyte Workspace folders. +Airbyte records the full logs as a part of each sync. These logs can be used to understand the underlying operations Airbyte performs to read data from the source and write to the destination as a part of the [Airbyte Protocol](/understanding-airbyte/airbyte-protocol.md). The logs includes many details, including any errors that can be helpful when troubleshooting sync errors. -This is useful if you need to browse the docker volumes where extra output files of Airbyte server and workers are stored since they may not be accessible through the UI. +:::info +When using Airbyte Open Source, you can also access additional logs outside of the UI. This is useful if you need to browse the Docker volumes where extra output files of Airbyte server and workers are stored. +::: + +To find the logs for a connection, navigate to a connection's `Job History` tab to see the latest syncs. + +## View the logs in the UI +To open the logs in the UI, select the three grey dots next to a sync and select `View logs`. This will open our full screen in-app log viewer. + +:::tip +If you are troubleshooting a sync error, you can search for `Error`, `Exception`, or `Fail` to find common errors. +::: -## Exploring the Logs folders +The in-app log viewer will only search for instances of the search term within that attempt. To search across all attempts, download the logs locally. -When running a Sync in Airbyte, you have the option to look at the logs in the UI as shown next. +## Link to a sync job +To help others quickly find your job, copy the link to the logs to your clipboard, select the three grey dots next to a sync and select `Copy link to job`. -### Identifying Workspace IDs +You can also access the link to a sync job from the in-app log viewer. + +## Download the logs +To download a copy of the logs locally, select the three grey dots next to a sync and select `Download logs`. + +You can also access the download log button from the in-app log viewer. + +:::note +If a sync was completed across multiple attempts, downloading the logs will union all the logs for all attempts for that job. +::: -In the screenshot below, you can notice the highlighted blue boxes are showing the id numbers that were used for the selected "Attempt" for this sync job. +## Exploring Local Logs -In this case, the job was running in `/tmp/workspace/9/2/` folder since the tab of the third attempt is being selected in the UI \(first attempt would be `/tmp/workspace/9/0/`\). + -![](../.gitbook/assets/explore_logs.png) +### Establish the folder directory -The highlighted button in the red circle on the right would allow you to download the logs.log file. -However, there are actually more files being recorded in the same workspace folder... Thus, we might want to dive deeper to explore these folders and gain a better understanding of what is being run by Airbyte. +In the UI, you can discover the Attempt ID within the sync job. Most jobs will complete in the first attempt, so your folder directory will look like `/tmp/workspace/9/0`. If you sync job completes in multiple attempts, you'll need to define which attempt you're interested in, and note this. For example, for the third attempt, it will look like `/tmp/workspace/9/2/` . ### Understanding the Docker run commands -Scrolling down a bit more, we can also read the different docker commands being used internally are starting with: +We can also read the different docker commands being used internally are starting with: ```text docker run --rm -i -v airbyte_workspace:/data -v /tmp/airbyte_local:/local -w /data/9/2 --network host ... @@ -35,7 +55,7 @@ Following [Docker Volume documentation](https://docs.docker.com/storage/volumes/ ### Opening a Unix shell prompt to browse the Docker volume -For example, we can run any docker container/image to browse the content of this named volume by mounting it similarly, let's use the [busybox](https://hub.docker.com/_/busybox) image. +For example, we can run any docker container/image to browse the content of this named volume by mounting it similarly. In the example below, the [busybox](https://hub.docker.com/_/busybox) image is used. ```text docker run -it --rm --volume airbyte_workspace:/data busybox @@ -50,13 +70,15 @@ ls /data/9/2/ Example Output: ```text -catalog.json normalize tap_config.json -logs.log singer_rendered_catalog.json target_config.json +catalog.json +tap_config.json +logs.log +target_config.json ``` ### Browsing from the host shell -Or, if you don't want to transfer to a shell prompt inside the docker image, you can simply run Shell commands using docker commands as a proxy like this: +Or, if you don't want to transfer to a shell prompt inside the docker image, you can run Shell commands using docker commands as a proxy: ```bash docker run -it --rm --volume airbyte_workspace:/data busybox ls /data/9/2 @@ -81,7 +103,7 @@ docker run -it --rm --volume airbyte_workspace:/data busybox cat /data/9/2/catal Example Output: ```text -{"streams":[{"stream":{"name":"exchange_rate","json_schema":{"type":"object","properties":{"CHF":{"type":"number"},"HRK":{"type":"number"},"date":{"type":"string"},"MXN":{"type":"number"},"ZAR":{"type":"number"},"INR":{"type":"number"},"CNY":{"type":"number"},"THB":{"type":"number"},"AUD":{"type":"number"},"ILS":{"type":"number"},"KRW":{"type":"number"},"JPY":{"type":"number"},"PLN":{"type":"number"},"GBP":{"type":"number"},"IDR":{"type":"number"},"HUF":{"type":"number"},"PHP":{"type":"number"},"TRY":{"type":"number"},"RUB":{"type":"number"},"HKD":{"type":"number"},"ISK":{"type":"number"},"EUR":{"type":"number"},"DKK":{"type":"number"},"CAD":{"type":"number"},"MYR":{"type":"number"},"USD":{"type":"number"},"BGN":{"type":"number"},"NOK":{"type":"number"},"RON":{"type":"number"},"SGD":{"type":"number"},"CZK":{"type":"number"},"SEK":{"type":"number"},"NZD":{"type":"number"},"BRL":{"type":"number"}}},"supported_sync_modes":["full_refresh"],"default_cursor_field":[]},"sync_mode":"full_refresh","cursor_field":[]}]} +{"streams":[{"stream":{"name":"exchange_rate","json_schema":{"type":"object","properties":{"CHF":{"type":"number"},"HRK":{"type":"number"},"date":{"type":"string"},"MXN":{"type":"number"},"ZAR":{"type":"number"},"INR":{"type":"number"},"CNY":{"type":"number"},"THB":{"type":"number"},"NZD":{"type":"number"},"BRL":{"type":"number"}}},"supported_sync_modes":["full_refresh"],"default_cursor_field":[]},"sync_mode":"full_refresh","cursor_field":[]}]} ``` ### Extract catalog.json file from docker volume diff --git a/docs/operator-guides/configuring-sync-notifications.md b/docs/operator-guides/configuring-sync-notifications.md deleted file mode 100644 index 6418aa2ffab5..000000000000 --- a/docs/operator-guides/configuring-sync-notifications.md +++ /dev/null @@ -1,55 +0,0 @@ -# Configuring Sync Notifications - -## Overview - -You can set up Airbyte to notify you when syncs have **failed** or **succeeded**. This is achieved through a webhook, a URL that you can input into other applications to get real time data from Airbyte. - -## Set up Slack Notifications on Sync Status - -If you're more of a visual learner, just head over to [this video](https://www.youtube.com/watch?v=NjYm8F-KiFc&ab_channel=Airbyte) to learn how to do this. Otherwise, keep reading! - -**Set up the bot.** - -Navigate to https://api.slack.com/apps/. Hit `Create an App`. - -![](../.gitbook/assets/notifications_create_slack_app.png) - -Then click `From scratch`. Enter your App Name (e.g. Airbyte Sync Notifications) and pick your desired Slack workspace. - -**Set up the webhook URL.** - -Now on the left sidebar, click on `Incoming Webhooks`. - -![](../.gitbook/assets/notifications_incoming_webhooks.png) - -Click the slider button in the top right to turn the feature on. Then click `Add New Webhook to Workspace`. - -![](../.gitbook/assets/notifications_add_new_webhook.png) - -Pick the channel that you want to receive Airbyte notifications in (ideally a dedicated one), and click `Allow` to give it permissions to access the channel. You should see the bot show up in the selected channel now. - -Now you should see an active webhook right above the `Add New Webhook to Workspace` button. - -![](../.gitbook/assets/notifications_webhook_url.png) - -Click `Copy.` - -**Add the webhook to Airbyte.** - -Assuming you have a [running instance of Airbyte](../deploying-airbyte/README.md), we can navigate to the UI. Click on Settings and then click on `Notifications`. - -![](../.gitbook/assets/notifications_airbyte_settings.png) - -Simply paste the copied webhook URL in `Connection status Webhook URL` and you're ready to go! On this page, you can click one or both of the sliders to decide whether you want notifications on sync successes, failures, or both. Make sure to click `Save changes` before you leave. - -Your Webhook URL should look something like this: - -![](../.gitbook/assets/notifications_airbyte_notification_settings.png) - -**Test it out.** - -From the settings page, you can click `Test` to send a test message to the channel. Or, just run a sync now and try it out! If all goes well, you should receive a notification in your selected channel that looks like this: - -![](../.gitbook/assets/notifications_slack_message.png) - -You're done! diff --git a/docs/operator-guides/reset.md b/docs/operator-guides/reset.md index ff7dc4d06124..3fba28aa45a3 100644 --- a/docs/operator-guides/reset.md +++ b/docs/operator-guides/reset.md @@ -1,20 +1,25 @@ # Resetting Your Data -The reset button gives you a blank slate, of sorts, to perform a fresh new sync. This can be useful if you are just testing Airbyte or don't necessarily require the data replicated to your destination to be saved permanently. +Resetting your data allows you to drop all previously synced data so that any ensuing sync can start syncing fresh. This is useful if you don't require the data replicated to your destination to be saved permanently or are just testing Airbyte. -![](../.gitbook/assets/reset_your_data_1.png) +Airbyte allows you to reset all streams in the connection, some, or only a single stream (when the connector support per-stream operations). -As outlined above, you can click on the `Reset your data` button to give you that clean slate. Just as a heads up, here is what it does and doesn't do: +A sync will automatically start after a completed reset, which commonly backfills all historical data. -The reset button **DOES**: +## Performing a Reset +To perform a reset, select `Reset your data` in the UI on a connection's status or job history tabs. You will also be prompted to reset affected streams if you edit any stream settings to ensure data continues to sync accurately. -* Delete all records in your destination tables -* Delete all records in your destination file +Similarly to a sync job, a reset can be completed as successful, failed, or cancelled. To resolve a failed reset, you should manually drop the tables in the destination so that Airbyte can continue syncing accurately into the destination. -The reset button **DOES NOT**: +## Reset behavior +When a reset is successfully completed, all the records are deleted from your destination tables (and files, if using local JSON or local CSV as the destination). -* Delete the destination tables -* Delete a destination file if using the LocalCSV or LocalJSON Destinations +:::info +If you are using destinations that are on the [Destinations v2](/release_notes/upgrading_to_destinations_v2.md) framework, only raw tables will be cleared of their data. Final tables will retain all records from the last sync. +::: -Because of this, if you have any orphaned tables or files that are no longer being synced to, they will have to be cleaned up later, as Airbyte will not clean them up for you. +A reset **DOES NOT** delete any destination tables when using a data warehouse, data lake, database. The schema is retained but will not contain any rows. +:::tip +If you have any orphaned tables or files that are no longer being synced to, they should be cleaned up separately, as Airbyte will not clean them up for you. This can occur when the `Destination Namespace` or `Stream Prefix` connection configuration is changed for an existing connection. +::: diff --git a/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md b/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md index a204b2a2f49b..1f0175b392d8 100644 --- a/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md +++ b/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md @@ -18,7 +18,7 @@ After replication of data from a source connector \(Extract\) to a destination c ## Public Git repository -In the connection settings page, I can add new Transformations steps to apply after [normalization](../../understanding-airbyte/basic-normalization.md). For example, I want to run my custom dbt project jaffle_shop, whenever my sync is done replicating and normalizing my data. +In the connection settings page, I can add new Transformations steps to apply after [normalization](../../using-airbyte/core-concepts/basic-normalization.md). For example, I want to run my custom dbt project jaffle_shop, whenever my sync is done replicating and normalizing my data. You can find the jaffle shop test repository by clicking [here](https://github.com/dbt-labs/jaffle_shop). diff --git a/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md b/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md index 3f6c9357d2c1..4e29e15fe167 100644 --- a/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md +++ b/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md @@ -16,7 +16,7 @@ At its core, Airbyte is geared to handle the EL \(Extract Load\) steps of an ELT However, this is actually producing a table in the destination with a JSON blob column... For the typical analytics use case, you probably want this json blob normalized so that each field is its own column. -So, after EL, comes the T \(transformation\) and the first T step that Airbyte actually applies on top of the extracted data is called "Normalization". You can find more information about it [here](../../understanding-airbyte/basic-normalization.md). +So, after EL, comes the T \(transformation\) and the first T step that Airbyte actually applies on top of the extracted data is called "Normalization". You can find more information about it [here](../../using-airbyte/core-concepts/basic-normalization.md). Airbyte runs this step before handing the final data over to other tools that will manage further transformation down the line. diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 5c5197441b85..4d2dafd2991f 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -1,5 +1,12 @@ # Upgrading Airbyte +:::info + +If you run on [Airbyte Cloud](https://cloud.airbyte.com/signup) you'll always run on the newest +Airbyte version automatically. This documentation only applies to users deploying our self-managed +version. +::: + ## Overview This tutorial will describe how to determine if you need to run this upgrade process, and if you do, how to do so. This process does require temporarily turning off Airbyte. diff --git a/docs/operator-guides/using-custom-connectors.md b/docs/operator-guides/using-custom-connectors.md index 4516f19ff987..04be26cf889e 100644 --- a/docs/operator-guides/using-custom-connectors.md +++ b/docs/operator-guides/using-custom-connectors.md @@ -1,15 +1,17 @@ # Using custom connectors -If our connector catalog does not fulfill your needs, you can build your own Airbyte connectors. -There are two approaches you can take while jumping on connector development project: -1. You want to build a connector for an **external** source or destination (public API, off-the-shelf DBMS, data warehouses, etc.). In this scenario, your connector development will probably benefit the community. The right way is to open a PR on our repo to add your connector to our catalog. You will then benefit from an Airbyte team review and potential future improvements and maintenance from the community. -2. You want to build a connector for an **internal** source or destination (private API) specific to your organization. This connector has no good reason to be exposed to the community. - -This guide focuses on the second approach and assumes the following: -* You followed our other guides and tutorials about connector developments. -* You finished your connector development, running it locally on an Airbyte development instance. + +:::info +This guide walks through the setup of a Docker-based custom connector. To understand how to use our low-code connector builder, read our guide [here](/connector-development/connector-builder-ui/overview.md). +::: + +If our connector catalog does not fulfill your needs, you can build your own Airbyte connectors! You can either use our [low-code connector builder](/connector-development/connector-builder-ui/overview.md) or upload a Docker-based custom connector. + +This page walks through the process to upload a **Docker-based custom connector**. This is an ideal route for connectors that have an **internal** use case like a private API with a specific fit for your organization. This guide for using Docker-based custom connectors assumes the following: +* You followed our other guides and tutorials about [connector development](/connector-development/connector-builder-ui/overview.md) +* You finished your connector development and have it running locally on an Airbyte development instance. * You want to deploy this connector to a production Airbyte instance running on a VM with docker-compose or on a Kubernetes cluster. -If you prefer video tutorials, [we recorded a demo about uploading connectors images to a GCP Artifact Registry](https://www.youtube.com/watch?v=4YF20PODv30&ab_channel=Airbyte). +If you prefer video tutorials, we recorded a demo on how to upload [connectors images to a GCP Artifact Registry](https://www.youtube.com/watch?v=4YF20PODv30&ab_channel=Airbyte). ## 1. Create a private Docker registry Airbyte needs to pull its Docker images from a remote Docker registry to consume a connector. @@ -70,42 +72,21 @@ If you want Airbyte to pull images from another private Docker registry, you wil You should run all the above commands from your local/CI environment, where your connector source code is available. -## 4. Use your custom connector in Airbyte +## 4. Use your custom Docker connector in Airbyte At this step, you should have: * A private Docker registry hosting your custom connector image. * Authenticated your Airbyte instance to your private Docker registry. You can pull your connector image from your private registry to validate the previous steps. On your Airbyte instance: run `docker pull :` if you are using our `docker-compose` deployment, or start a pod that is using the connector image. -### 1. Click on Settings -![Step 1 screenshot](https://images.tango.us/public/screenshot_bf5c3e27-19a3-4cc0-bc40-90c80afdbcba?crop=focalpoint&fit=crop&fp-x=0.0211&fp-y=0.9320&fp-z=2.9521&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 2. Click on Sources (or Destinations) -![Step 2 screenshot](https://images.tango.us/public/screenshot_d956e987-424d-4f76-ad39-f6d6172f6acc?crop=focalpoint&fit=crop&fp-x=0.0855&fp-y=0.1083&fp-z=2.7473&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 3. Click on + New connector -![Step 3 screenshot](https://images.tango.us/public/screenshot_52248202-6351-496d-bc8f-892c43cf7cf8?crop=focalpoint&fit=crop&fp-x=0.8912&fp-y=0.0833&fp-z=3.0763&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 4. Fill the name of your custom connector -![Step 4 screenshot](https://images.tango.us/public/screenshot_809a22c8-ff38-4b10-8292-bce7364f111c?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.4145&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 5. Fill the Docker image name of your custom connector -![Step 5 screenshot](https://images.tango.us/public/screenshot_ed91d789-9fc7-4758-a6f0-50bf2f04f248?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.4924&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 6. Fill the Docker Tag of your custom connector image -![Step 6 screenshot](https://images.tango.us/public/screenshot_5b6bff70-5703-4dac-b359-95b9ab8f8ce1?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.5703&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) +1. Click on `Settings` in the left-hand sidebar. Navigate to `Sources` or `Destinations` depending on your connector. Click on `Add a new Docker connector`. +2. Name your custom connector in `Connector display name`. This is just the display name used for your workspace. -### 7. Fill the URL to your connector documentation -This is a required field at the moment, but you can fill with any value if you do not have online documentation for your connector. -This documentation will be linked in the connector setting page. -![Step 7 screenshot](https://images.tango.us/public/screenshot_007e6465-619f-4553-8d65-9af2f5ad76bc?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.6482&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) +3. Fill in the Docker `Docker full image name` and `Docker image tag`. +4. (Optional) Add a link to connector's documentation in `Connector documentation URL` +You can optionally fill this with any value if you do not have online documentation for your connector. +This documentation will be linked in your connector setting's page. -### 8. Click on Add -![Step 8 screenshot](https://images.tango.us/public/screenshot_c097183f-1687-469f-852d-f66f743e8c10?crop=focalpoint&fit=crop&fp-x=0.5968&fp-y=0.7010&fp-z=3.0725&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) +5. `Add` the connector to save the configuration. You can now select your new connector when setting up a new connection! \ No newline at end of file diff --git a/docs/project-overview/README.md b/docs/project-overview/README.md deleted file mode 100644 index a427d02b0519..000000000000 --- a/docs/project-overview/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Project Overview - diff --git a/docs/project-overview/code-of-conduct.md b/docs/project-overview/code-of-conduct.md deleted file mode 100644 index 9eacce28a212..000000000000 --- a/docs/project-overview/code-of-conduct.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -description: Our Community Code of Conduct ---- - -# Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others’ private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [conduct@airbyte.io](mailto:conduct@airbyte.io). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html) - diff --git a/docs/project-overview/product-support-levels.md b/docs/project-overview/product-support-levels.md deleted file mode 100644 index 47e533d90f40..000000000000 --- a/docs/project-overview/product-support-levels.md +++ /dev/null @@ -1,39 +0,0 @@ -# Connector Support Levels - -The following table describes the support levels of Airbyte connectors. - -| | Certified | Custom | Community | -| --------------------------------- | -------------------------- | -------------------------- | ---------------------- | -| **Availability** | Available to all users | Available to all users | Available to all users | -| **Support: Cloud** | Supported* | Supported** | No Support | -| **Support: Powered by Airbyte** | Supported* | Supported** | No Support | -| **Support: Self-Managed Enterprise** | Supported* | Supported** | No Support | -| **Support: Community (OSS)** | Slack Support only | Slack Support only | No Support | -| **Who builds them?** | Either the community or the Airbyte team. | Anyone can build custom connectors. We recommend using our [Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview) or [Low-code CDK](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview). | Typically they are built by the community. The Airbyte team may upgrade them to Certified at any time. | -| **Who maintains them?** | The Airbyte team | Users | Users | -| **Production Readiness** | Guaranteed by Airbyte | Not guaranteed | Not guaranteed | - -\*For Certified connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. Otherwise, please use our support portal and we will address your issues as soon as possible. - -\*\*For Custom connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. This support is provided with best efforts, and maintenance/upgrades are owned by the customer. - -## Certified - -A **Certified** connector is actively maintained and supported by the Airbyte team and maintains a high quality bar. It is production ready. - -### What you should know about Certified connectors: - -- Certified connectors are available to all users. -- These connectors have been tested and vetted in order to be certified and are production ready. -- Certified connectors should go through minimal breaking change but in the event an upgrade is needed users will be given an adequate upgrade window. - -## Community - -A **Community** connector is maintained by the Airbyte community until it becomes Certified. Airbyte has over 800 code contributors and 15,000 people in the Slack community to help. The Airbyte team is continually certifying Community connectors as usage grows. As these connectors are not maintained by Airbyte, we do not offer support SLAs around them, and we encourage caution when using them in production. - -### What you should know about Community connectors: - -- Community connectors are available to all users. -- Community connectors may be upgraded to Certified at any time, and we will notify users of these upgrades via our Slack Community and in our Connector Catalog. -- Community connectors might not be feature-complete (features planned for release are under development or not prioritized) and may include backward-incompatible/breaking API changes with no or short notice. -- Community connectors have no Support SLAs. diff --git a/docs/project-overview/slack-code-of-conduct.md b/docs/project-overview/slack-code-of-conduct.md deleted file mode 100644 index c88da4c1adb5..000000000000 --- a/docs/project-overview/slack-code-of-conduct.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -description: Be nice to one another. ---- - -# Slack Code of Conduct - -Airbyte's Slack community is growing incredibly fast. We're home to over 1500 data professionals and are growing at an awesome pace. We are proud of our community, and have provided these guidelines to support new members in maintaining the wholesome spirit we have developed here. We appreciate your continued commitment to making this a community we are all excited to be a part of. - -## Rule 1: Be respectful. - -Our desire is for everyone to have a positive, fulfilling experience in Airbyte Slack, and we sincerely appreciate your help in making this happen. -All of the guidelines we provide below are important, but there’s a reason respect is the first rule. We take it seriously, and while the occasional breach of etiquette around Slack is forgivable, we cannot condone disrespectful behavior. - -## Rule 2: Use the most relevant channels. - -We deliberately use topic-specific Slack channels so members of the community can opt-in on various types of conversations. Our members take care to post their messages in the most relevant channel, and you’ll often see reminders about the best place to post a message (respectfully written, of course!). If you're looking for help directly from the Community Assistance Team or other Airbyte employees, please stick to posting in the airbyte-help channel, so we know you're asking us specifically! - -## Rule 3: Don’t double-post. - -Please be considerate of our community members’ time. We know your question is important, but please keep in mind that Airbyte Slack is not a customer service platform but a community of volunteers who will help you as they are able around their own work schedule. You have access to all the history, so it’s easy to check if your question has already been asked. - -## Rule 4: Check question for clarity and thoughtfulness. - -Airbyte Slack is a community of volunteers. Our members enjoy helping others; they are knowledgeable, gracious, and willing to give their time and expertise for free. Putting some effort into a well-researched and thoughtful post shows consideration for their time and will gain more responses. - -## Rule 5: Keep it public. - -This is a public forum; please do not contact individual members of this community without their express permission, regardless of whether you are trying to recruit someone, sell a product, or solicit help. - -## Rule 6: No soliciting! - -The purpose of the Airbyte Slack community is to provide a forum for data practitioners to discuss their work and share their ideas and learnings. It is not intended as a place to generate leads for vendors or recruiters, and may not be used as such. - -If you’re a vendor, you may advertise your product in #shameless-plugs. Advertising your product anywhere else is strictly against the rules. - -## Rule 7: Don't spam tags, or use @here or @channel. - -Using the @here and @channel keywords in a post will not help, as they are disabled in Slack for everyone excluding admins. Nonetheless, if you use them we will remind you with a link to this rule, to help you better understand the way Airbyte Slack operates. - -Do not tag specific individuals for help on your questions. If someone chooses to respond to your question, they will do so. You will find that our community of volunteers is generally very responsive and amazingly helpful! - -## Rule 8: Use threads for discussion. - -The simplest way to keep conversations on track in Slack is to use threads. The Airbyte Slack community relies heavily on threads, and if you break from this convention, rest assured one of our community members will respectfully inform you quickly! - -_If you see a message or receive a direct message that violates any of these rules, please contact an Airbyte team member and we will take the appropriate moderation action immediately. We have zero tolerance for intentional rule-breaking and hate speech._ - diff --git a/docs/quickstart/deploy-airbyte.md b/docs/quickstart/deploy-airbyte.md deleted file mode 100644 index 4df34e9aa05a..000000000000 --- a/docs/quickstart/deploy-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Deploy Airbyte - -Deploying Airbyte Open-Source just takes two steps. - -1. Install Docker on your workstation \(see [instructions](https://www.docker.com/products/docker-desktop)\). Make sure you're on the latest version of `docker-compose`. -2. Run the following commands in your terminal: - -```bash -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -./run-ab-platform.sh -``` - -Once you see an Airbyte banner, the UI is ready to go at [http://localhost:8000](http://localhost:8000)! You will be asked for a username and password. By default, that's username `airbyte` and password `password`. Once you deploy airbyte to your servers, **be sure to change these** in your `.env` file. - -Alternatively, if you have an Airbyte Cloud invite, just follow [these steps.](../deploying-airbyte/on-cloud.md) - -If you need direct access to our team for any kind of assistance, don't hesitate to [talk to our team](https://airbyte.com/talk-to-sales-premium-support) to discuss about our premium support offers. - -## FAQ - -If you have any questions about the Airbyte Open-Source setup and deployment process, head over to our [Getting Started FAQ](https://github.com/airbytehq/airbyte/discussions/categories/questions) on our Airbyte Forum that answers the following questions and more: - -- How long does it take to set up Airbyte? -- Where can I see my data once I've run a sync? -- Can I set a start time for my sync? - -If there are any questions that we couldn't answer here, we'd love to help you get started. [Join our Slack](https://airbytehq.slack.com/ssb/redirect) and feel free to ask your questions in the \#getting-started channel. diff --git a/docs/quickstart/getting-started.md b/docs/quickstart/getting-started.md deleted file mode 100644 index afb0e3408522..000000000000 --- a/docs/quickstart/getting-started.md +++ /dev/null @@ -1,105 +0,0 @@ -# Getting Started - -## Goal - -During this getting started tutorial, we are going to replicate currencies closing price into a JSON file. - -## Start Airbyte - -First of all, make sure you have Docker and Docker Compose installed. Then run the following commands: - -```text -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -./run-ab-platform.sh -``` - -Once you see an Airbyte banner, the UI is ready to go at [http://localhost:8000/](http://localhost:8000/). - -## Set up your preferences - -You should see an onboarding page. Enter your email if you want updates about Airbyte and continue. - -![](../.gitbook/assets/airbyte_get-started.png) - -## Set up your first connection - -### Create a source - -The source we are creating will pull data from an external API. It will replicate the closing price of currencies compared to USD since the specified start date. - -To set it up, just follow the instructions on the screenshot below. - -:::info - -You might have to wait ~30 seconds before the fields show up because it is the first time you're using Airbyte. - -::: - -![](../.gitbook/assets/demo_source.png) - -### Create a destination - -The destination we are creating is a simple JSON line file, meaning that it will contain one JSON object per line. Each objects will represent data extracted from the source. - -The resulting files will be located in `/tmp/airbyte_local/json_data` - -:::caution - -Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a MacOS, as /tmp has a symlink that points to /private. It will not work otherwise). You allow it with "File sharing" in `Settings -> Resources -> File sharing -> add the one or two above folder` and hit the "Apply & restart" button. - -::: - -To set it up, just follow the instructions on the screenshot below. - -:::info - -You might have to wait ~30 seconds before the fields show up because it is the first time you're using Airbyte. - -::: - -![](../.gitbook/assets/demo_destination.png) - -### Create connection - -When we create the connection, we can select which data stream we want to replicate. We can also select if we want an incremental replication. The replication will run at the specified sync frequency. - -To set it up, just follow the instructions on the screenshot below. - -![](../.gitbook/assets/demo_connection.png) - -## Check the logs of your first sync - -After you've completed the onboarding, you will be redirected to the source list and will see the source you just added. Click on it to find more information about it. You will now see all the destinations connected to that source. Click on it and you will see the sync history. - -From there, you can look at the logs, download them, force a sync and adjust the configuration of your connection. - -![](../.gitbook/assets/demo_history.png) - -## Check the data of your first sync - -Now let's verify that this worked: - -```bash -cat /tmp/airbyte_local/json_data/_airbyte_raw_exchange_rate.jsonl -``` - -You should see one line for each day that was replicated. - -If you have [`jq`](https://stedolan.github.io/jq/) installed, let's look at the evolution of `EUR`. - -```bash -cat /tmp/airbyte_local/test_json/_airbyte_raw_exchange_rate.jsonl | -jq -c '.data | {date: .date, EUR: .EUR }' -``` - -And there you have it. You've pulled data from an API directly into a file and all of the actual configuration for this replication only took place in the UI. - -## That's it! - -This is just the beginning of using Airbyte. We support a large collection of sources and destinations. You can even contribute your own. - -If you have any questions at all, please reach out to us on [Slack](https://slack.airbyte.io/). We’re still in alpha, so if you see any rough edges or want to request a connector you need, please create an issue on our [Github](https://github.com/airbytehq/airbyte) or leave a thumbs up on an existing issue. - -Thank you and we hope you enjoy using Airbyte. - diff --git a/docs/readme.md b/docs/readme.md index cbf550c2a7a6..708a6a790430 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -1,21 +1,25 @@ +--- +displayed_sidebar: docs +--- + # Welcome to Airbyte Docs Whether you are an Airbyte user or contributor, we have docs for you! ## For Airbyte Cloud users -Browse the [connector catalog](https://docs.airbyte.com/integrations/) to find the connector you want. In case the connector is not yet supported on Airbyte Cloud, consider using [Airbyte Open Source](#for-airbyte-open-source-users). +Browse the [connector catalog](/integrations/) to find the connector you want. In case the connector is not yet supported on Airbyte Cloud, consider using [Airbyte Open Source](#for-airbyte-open-source-users). -Next, check out the [step-by-step tutorial](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud) to sign up for Airbyte Cloud, understand Airbyte [concepts](https://docs.airbyte.com/cloud/core-concepts), and run your first sync. Then learn how to [use your Airbyte Cloud account](https://docs.airbyte.com/category/using-airbyte-cloud). +Next, check out the [step-by-step tutorial](/using-airbyte/getting-started) to sign up for Airbyte Cloud, understand Airbyte [concepts](/using-airbyte/core-concepts), and run your first sync. ## For Airbyte Open Source users -Browse the [connector catalog](https://docs.airbyte.com/integrations/) to find the connector you want. If the connector is not yet supported on Airbyte Open Source, [build your own connector](https://docs.airbyte.com/connector-development/). +Browse the [connector catalog](/integrations/) to find the connector you want. If the connector is not yet supported on Airbyte Open Source, [build your own connector](/connector-development/). -Next, check out the [Airbyte Open Source QuickStart](https://docs.airbyte.com/quickstart/deploy-airbyte). Then learn how to [deploy](https://docs.airbyte.com/deploying-airbyte/local-deployment) and [manage](https://docs.airbyte.com/operator-guides/upgrading-airbyte) Airbyte Open Source in your cloud infrastructure. +Next, check out the [Airbyte Open Source QuickStart](/quickstart/deploy-airbyte). Then learn how to [deploy](/deploying-airbyte/local-deployment) and [manage](/operator-guides/upgrading-airbyte) Airbyte Open Source in your cloud infrastructure. ## For Airbyte contributors -To contribute to Airbyte code, connectors, and documentation, refer to our [Contributing Guide](https://docs.airbyte.com/contributing-to-airbyte/). +To contribute to Airbyte code, connectors, and documentation, refer to our [Contributing Guide](/contributing-to-airbyte/). [![GitHub stars](https://img.shields.io/github/stars/airbytehq/airbyte?style=social&label=Star&maxAge=2592000)](https://GitHub.com/airbytehq/airbyte/stargazers/) [![License](https://img.shields.io/static/v1?label=license&message=MIT&color=brightgreen)](https://github.com/airbytehq/airbyte/tree/a9b1c6c0420550ad5069aca66c295223e0d05e27/LICENSE/README.md) [![License](https://img.shields.io/static/v1?label=license&message=ELv2&color=brightgreen)](https://github.com/airbytehq/airbyte/tree/a9b1c6c0420550ad5069aca66c295223e0d05e27/LICENSE/README.md) diff --git a/docs/release_notes/july_2022.md b/docs/release_notes/july_2022.md index 0c6cbc35e004..c3a4c8240b2b 100644 --- a/docs/release_notes/july_2022.md +++ b/docs/release_notes/july_2022.md @@ -19,7 +19,7 @@ This page includes new features and improvements to the Airbyte Cloud and Airbyt * Airbyte is currently developing a low-code connector builder, which allows you to easily create new source and destination connectors in your workspace. [#14402](https://github.com/airbytehq/airbyte/pull/14402) [#14317](https://github.com/airbytehq/airbyte/pull/14317) [#14288](https://github.com/airbytehq/airbyte/pull/14288) [#14004](https://github.com/airbytehq/airbyte/pull/14004) -* Added [documentation](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace#single-workspace-vs-multiple-workspaces) about the benefits and considerations of having a single workspace vs. multiple workspaces in Airbyte Cloud. [#14608](https://github.com/airbytehq/airbyte/pull/14608) +* Added [documentation](/using-airbyte/workspaces.md#single-workspace-vs-multiple-workspaces) about the benefits and considerations of having a single workspace vs. multiple workspaces in Airbyte Cloud. [#14608](https://github.com/airbytehq/airbyte/pull/14608) ### Improvements * Improved platform security by using Docker images from the latest version of OpenJDK (openjdk:19-slim-bullseye). [#14971](https://github.com/airbytehq/airbyte/pull/14971) diff --git a/docs/release_notes/upgrading_to_destinations_v2.md b/docs/release_notes/upgrading_to_destinations_v2.md index 0d5f70c6bed4..e48eea50f611 100644 --- a/docs/release_notes/upgrading_to_destinations_v2.md +++ b/docs/release_notes/upgrading_to_destinations_v2.md @@ -13,7 +13,7 @@ Airbyte Destinations V2 provides you with: - Internal Airbyte tables in the `airbyte_internal` schema: Airbyte will now generate all raw tables in the `airbyte_internal` schema. We no longer clutter your destination schema with raw data tables. - Incremental delivery for large syncs: Data will be incrementally delivered to your final tables. No more waiting hours to see the first rows in your destination table. -To see more details and examples on the contents of the Destinations V2 release, see this [guide](understanding-airbyte/typing-deduping.md). The remainder of this page will walk you through upgrading connectors from legacy normalization to Destinations V2. +To see more details and examples on the contents of the Destinations V2 release, see this [guide](../using-airbyte/core-concepts/typing-deduping.md). The remainder of this page will walk you through upgrading connectors from legacy normalization to Destinations V2. Destinations V2 were in preview for Snowflake and BigQuery during August 2023, and launched on August 29th, 2023. Other destinations will be transitioned to Destinations V2 on or before November 1st, 2023. diff --git a/docs/snowflake-native-apps/facebook-marketing.md b/docs/snowflake-native-apps/facebook-marketing.md index a24a38b37bc1..1b4a458e2e20 100644 --- a/docs/snowflake-native-apps/facebook-marketing.md +++ b/docs/snowflake-native-apps/facebook-marketing.md @@ -3,7 +3,7 @@ The Facebook Marketing Connector by Airbyte is a Snowflake Native Application that allows you to extract data from your Facebook Marketing account and load records into a Snowflake database of your choice. :::info -The Snowflake Native Apps platform is new and rapidly evolving. The Facebook Marketing Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](../understanding-airbyte/connections/full-refresh-overwrite.md) without deduplication is supported. +The Snowflake Native Apps platform is new and rapidly evolving. The Facebook Marketing Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md) without deduplication is supported. ::: # Getting started diff --git a/docs/snowflake-native-apps/linkedin-ads.md b/docs/snowflake-native-apps/linkedin-ads.md index af43f7157cc5..bd34a7ffa565 100644 --- a/docs/snowflake-native-apps/linkedin-ads.md +++ b/docs/snowflake-native-apps/linkedin-ads.md @@ -3,7 +3,7 @@ The LinkedIn Ads Connector by Airbyte is a Snowflake Native Application that allows you to extract data from your LinkedIn Ads account and load records into a Snowflake database of your choice. :::info -The Snowflake Native Apps platform is new and rapidly evolving. The LinkedIn Ads Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](../understanding-airbyte/connections/full-refresh-overwrite.md) without deduplication is supported. +The Snowflake Native Apps platform is new and rapidly evolving. The LinkedIn Ads Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md) without deduplication is supported. ::: # Getting started diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index b9a5d7d12472..000000000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,59 +0,0 @@ -# Troubleshooting & FAQ - -Welcome to the Airbyte troubleshooting guide! Like any platform, you may experience issues when using Airbyte. This guide is designed to help you diagnose and resolve any problems you may encounter while using Airbyte. By following the troubleshooting steps outlined in this guide, you can quickly and effectively identify the root cause of the issue and take steps to resolve it. We recommend checking this guide whenever you encounter an issue with Airbyte to help ensure a smooth and uninterrupted experience with our platform. Let's dive in! - -Step 1: Check the logs. The logs provide detailed information about what's happening behind the scenes, and they can help pinpoint the root cause of the problem. - -Step 2: Check the documentation. Our documentation covers a wide range of topics, including common issues and their solutions, troubleshooting tips, and best practices. - -Step 3: Reach out to the community. Our community forum is a great place to ask for help, share your experiences, and learn from others who have faced similar issues. - -Step 4: Open a Github ticket. If you're still unable to resolve the issue after reaching out to the community, it's time to open a support ticket. Our support team is here to help you with any issues you're facing with Airbyte. - -Airbyte is an open source project with a vibrant community that fosters collaboration and mutual support. To ensure accessible troubleshooting guidance, Airbyte offers multiple platforms for users to ask and discuss issues, including the Airbyte Github, Airbyte Community Slack (which is over 10,000 users), and the Airbyte Forum. In addition, Airbyte hosts daily office hours that include topic demonstrations and dedicated space for issue discussion in Zoom meetings. In addition to these community resources, Airbyte also offers premium support packages for users who require additional assistance beyond what is provided by the community. - -## OSS Premium Support -Open source [premium support packages](https://airbyte.com/talk-to-sales-premium-support) are a great option for who use Airbyte OSS and need additional assistance beyond what is provided by the community. These packages typically include access to a dedicated support team that can provide assistance with installation, configuration, troubleshooting, and other technical issues. Premium support packages also often include faster response times, guaranteed issue resolution, and access to updates and patches. By opting for a premium support package, users can enjoy the benefits of open source software while also receiving the peace of mind they need to keep their systems running smoothly. - -Premier Support comes with: - -* 1-business-day SLA for your Severity 0 and 1 -* 2-business-day SLA for your Severity 2 and 3 -* 1-week Pull Request review SLA for first comment -If you need better SLA times, we can definitely discuss this, don't hesitate to [talk to our team](https://airbyte.com/talk-to-sales) about it. You can also see more details about it in our pricing page. - -## Office Hour -Airbyte provides a [Daily Office Hour](https://airbyte.com/daily-office-hour) to discuss issues. -It is a 45 minute meeting, the first 20 minutes are reserved to a weekly topic presentation about Airbyte concepts and the others 25 minutes are for general questions. The schedule is: -* Monday, Wednesday and Fridays: 1 PM PST/PDT -* Tuesday and Thursday: 4 PM CEST - - -## Github Issues -Whenever you face an issue using a connector or with the platform you're welcome to report opening a Github issue. -https://github.com/airbytehq/airbyte - - -## Airbyte Slack -You can access Airbyte Slack [here](https://slack.airbyte.com/). - -**Before posting on a channel this please first check if a similar question was already answered.** - -**The existing categories**: -* `#help-connections-issues`: for any questions or issues on your connections -* `#help-infrastructure-deployment`: for any questions or issues on your deployment and infrastructure -* `#help-connector-development`: for any questions about on the CDKs and issues while building a custom connector -* `#help-api-cli-orchestration`: for any questions or issues about the API, CLI, any scheduling effort. -* `#help-contributions`: for any questions about contributing to Airbyte’s codebase - -## Airbyte Forum -We are driving our community support from our [forum](https://github.com/airbytehq/airbyte/discussions). - -**Before posting on this forum please first check if a similar question was already answered.** - -**The existing categories**: -* 🙏 Questions: Ask the community for help on your question. As a reminder, the Airbyte team won’t provide help here, as our support is part of our Airbyte Cloud and Airbyte Enterprise offers. -* 💡 Ideas: Share ideas for new features, improvements, or feedback. -* 🙌 Show & Tell: Share projects, tutorials, videos, and articles you are working on. -* 🫶 Kind words: Show off something you love about Airbyte -* 🐙 General: For anything that doesn’t fit in the above categories diff --git a/docs/understanding-airbyte/airbyte-protocol.md b/docs/understanding-airbyte/airbyte-protocol.md index 17c742722882..e436b24eada6 100644 --- a/docs/understanding-airbyte/airbyte-protocol.md +++ b/docs/understanding-airbyte/airbyte-protocol.md @@ -143,7 +143,7 @@ The `discover` method detects and describes the _structure_ of the data in the d 1. `config` - A configuration JSON object that has been validated using `ConnectorSpecification#connectionSpecification` (see [ActorSpecification](#actor-specification) for information on `connectionSpecification`). 2. `configured catalog` - A `ConfiguredAirbyteCatalog` is built on top of the `catalog` returned by `discover`. The `ConfiguredAirbyteCatalog` specifies HOW the data in the catalog should be replicated. The catalog is documented in the [Catalog Section](#catalog). -3. `state` - An JSON object that represents a checkpoint in the replication. This object is only ever written or read by the source, so it is a JSON blob with whatever information is necessary to keep track of how much of the data source has already been read (learn more in the [State & Checkpointing](#state--checkpointing) Section). +3. `state` - A JSON object that represents a checkpoint in the replication. This object is only ever written or read by the source, so it is a JSON blob with whatever information is necessary to keep track of how much of the data source has already been read (learn more in the [State & Checkpointing](#state--checkpointing) Section). #### Output: @@ -333,7 +333,7 @@ Technical systems often group their underlying data into namespaces with each na An example of a namespace is the RDBMS's `schema` concept. An API namespace might be used for multiple accounts (e.g. `company_a` vs `company_b`, each having a "users" and "purchases" stream). Some common use cases for schemas are enforcing permissions, segregating test and production data and general data organization. -The `AirbyteStream` represents this concept through an optional field called `namespace`. Additional documentation on Namespaces can be found [here](namespaces.md). +The `AirbyteStream` represents this concept through an optional field called `namespace`. Additional documentation on Namespaces can be found [here](/using-airbyte/core-concepts/namespaces.md). ### Cursor diff --git a/docs/understanding-airbyte/beginners-guide-to-catalog.md b/docs/understanding-airbyte/beginners-guide-to-catalog.md index ff5451e15c5d..1953b1681c82 100644 --- a/docs/understanding-airbyte/beginners-guide-to-catalog.md +++ b/docs/understanding-airbyte/beginners-guide-to-catalog.md @@ -16,7 +16,7 @@ This article will illustrate how to use `AirbyteCatalog` via a series of example * [Dynamic Streams Example](#dynamic-streams-example) * [Nested Schema Example](#nested-schema-example) -In order to understand in depth how to configure incremental data replication, head over to the [incremental replication docs](connections/incremental-append.md). +In order to understand in depth how to configure incremental data replication, head over to the [incremental replication docs](/using-airbyte/core-concepts/sync-modes/incremental-append.md). ## Database Example @@ -92,7 +92,7 @@ The catalog is structured as a list of `AirbyteStream`. In the case of a databas Let's walk through what each field in a stream means. * `name` - The name of the stream. -* `supported_sync_modes` - This field lists the type of data replication that this source supports. The possible values in this array include `FULL_REFRESH` \([docs](connections/full-refresh-overwrite.md)\) and `INCREMENTAL` \([docs](connections/incremental-append.md)\). +* `supported_sync_modes` - This field lists the type of data replication that this source supports. The possible values in this array include `FULL_REFRESH` \([docs](/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md)\) and `INCREMENTAL` \([docs](/using-airbyte/core-concepts/sync-modes/incremental-append.md)\). * `source_defined_cursor` - If the stream supports `INCREMENTAL` replication, then this field signals whether the source can figure out how to detect new records on its own or not. * `json_schema` - This field is a [JsonSchema](https://json-schema.org/understanding-json-schema) object that describes the structure of the data. Notice that each key in the `properties` object corresponds to a column name in our database table. @@ -137,7 +137,7 @@ Let's walk through each field in the `ConfiguredAirbyteStream`: * `sync_mode` - This field must be one of the values that was in `supported_sync_modes` in the `AirbyteStream` - Configures which sync mode will be used when data is replicated. * `stream` - Hopefully this one looks familiar! This field contains an `AirbyteStream`. It should be _identical_ to the one we saw in the `AirbyteCatalog`. -* `cursor_field` - When `sync_mode` is `INCREMENTAL` and `source_defined_cursor = false`, this field configures which field in the stream will be used to determine if a record should be replicated or not. Read more about this concept in our [documentation of incremental replication](connections/incremental-append.md). +* `cursor_field` - When `sync_mode` is `INCREMENTAL` and `source_defined_cursor = false`, this field configures which field in the stream will be used to determine if a record should be replicated or not. Read more about this concept in our [documentation of incremental replication](/using-airbyte/core-concepts/sync-modes/incremental-append.md). ### Summary of the Postgres Example diff --git a/docs/understanding-airbyte/connections/README.md b/docs/understanding-airbyte/connections/README.md deleted file mode 100644 index 5e6c449152b7..000000000000 --- a/docs/understanding-airbyte/connections/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# Connections and Sync Modes - -A connection is a configuration for syncing data between a source and a destination. To setup a connection, a user must configure things such as: - -- Sync schedule: when to trigger a sync of the data. -- Destination [Namespace](../namespaces.md) and stream names: where the data will end up being written. -- A catalog selection: which [streams and fields](../airbyte-protocol.md#catalog) to replicate from the source -- Sync mode: how streams should be replicated \(read and write\): -- Optional transformations: how to convert Airbyte protocol messages \(raw JSON blob\) data into some other data representations. - -## Sync schedules - -Sync schedules are explained below. For information about catalog selections, see [AirbyteCatalog & ConfiguredAirbyteCatalog](../airbyte-protocol.md#catalog). - -Syncs will be triggered by either: - -- A manual request \(i.e: clicking the "Sync Now" button in the UI\) -- A schedule - -When a scheduled connection is first created, a sync is executed as soon as possible. After that, a sync is run once the time since the last sync \(whether it was triggered manually or due to a schedule\) has exceeded the schedule interval. For example, consider the following illustrative scenario: - -- **October 1st, 2pm**, a user sets up a connection to sync data every 24 hours. -- **October 1st, 2:01pm**: sync job runs -- **October 2nd, 2:01pm:** 24 hours have passed since the last sync, so a sync is triggered. -- **October 2nd, 5pm**: The user manually triggers a sync from the UI -- **October 3rd, 2:01pm:** since the last sync was less than 24 hours ago, no sync is run -- **October 3rd, 5:01pm:** It has been more than 24 hours since the last sync, so a sync is run - -## Destination namespace - -The location of where a connection replication will store data is referenced as the destination namespace. The destination connectors should create and write records \(for both raw and normalized tables\) in the specified namespace which should be configurable in the UI via the Namespace Configuration field \(or NamespaceDefinition in the API\). You can read more about configuring namespaces [here](../namespaces.md). - -## Destination stream name - -### Prefix stream name - -Stream names refer to table names in a typical RDBMS. But it can also be the name of an API endpoint, etc. Similarly to the namespace, stream names can be configured to diverge from their names in the source with a "prefix" field. The prefix is prepended to the source stream name in the destination. - -## Stream-specific customization - -All the customization of namespace and stream names described above will be equally applied to all streams selected for replication in a catalog per connection. If you need more granular customization, stream by stream, for example, or with different logic rules, then you could follow the tutorial on [customizing transformations with dbt](../../operator-guides/transformation-and-normalization/transformations-with-dbt.md). - -## Sync modes - -A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. To minimize confusion, a mode's behavior is reflected in its name. The easiest way to understand Airbyte's sync modes is to understand how the modes are named. - -1. The first part of the name denotes how the source connector reads data from the source: - 1. Incremental: Read records added to the source since the last sync job. \(The first sync using Incremental is equivalent to a Full Refresh\) - - Method 1: Using a cursor. Generally supported by all connectors whose data source allows extracting records incrementally. - - Method 2: Using change data capture. Only supported by some sources. See [CDC](../cdc.md) for more info. - 2. Full Refresh: Read everything in the source. -2. The second part of the sync mode name denotes how the destination connector writes data. This is not affected by how the source connector produced the data: - 1. Overwrite: Overwrite by first deleting existing data in the destination. - 2. Append: Write by adding data to existing tables in the destination. - 3. Deduped History: Write by first adding data to existing tables in the destination to keep a history of changes. The final table is produced by de-duplicating the intermediate ones using a primary key. - -A sync mode is therefore, a combination of a source and destination mode together. The UI exposes the following options, whenever both source and destination connectors are capable to support it for the corresponding stream: - -- [Full Refresh Overwrite](full-refresh-overwrite.md): Sync the whole stream and replace data in destination by overwriting it. -- [Full Refresh Append](full-refresh-append.md): Sync the whole stream and append data in destination. -- [Incremental Append](incremental-append.md): Sync new records from stream and append data in destination. -- [Incremental Append + Deduped](incremental-append-deduped.md): Sync new records from stream and append data in destination, also provides a de-duplicated view mirroring the state of the stream in the source. - -## Optional operations - -### Typing and Deduping - -As described by the [Airbyte Protocol from the Airbyte Specifications](../airbyte-protocol.md), replication is composed of source connectors that are transmitting data in a JSON format. It is then written as such by the destination connectors. On top of this replication, Airbyte's database and datawarehous destinations can provide converstions from the raw JSON data into type-cast relational columns. Learn more [here](/understanding-airbyte/typing-deduping). - -:::note - -Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. - -::: - -### Custom sync operations - -Further operations can be included in a sync on top of Airbyte basic normalization \(or even to replace it completely\). See [operations](../operations.md) for more details. diff --git a/docs/understanding-airbyte/namespaces.md b/docs/understanding-airbyte/namespaces.md deleted file mode 100644 index d5deac5d12fc..000000000000 --- a/docs/understanding-airbyte/namespaces.md +++ /dev/null @@ -1,122 +0,0 @@ -# Namespaces - -## High-Level Overview - -:::info - -The high-level overview contains all the information you need to use Namespaces when pulling from APIs. Information past that can be read for advanced or educational purposes. - -::: - -When looking through our connector docs, you'll notice that some sources and destinations support "Namespaces." These allow you to organize and separate your data into groups in the destination if the destination supports it. In most cases, namespaces are schemas in the database you're replicating to. If your desired destination doesn't support it, you can ignore this feature. - -Note that this is the location that both your normalized and raw data will get written to. Your raw data will show up with the prefix `_airbyte_raw_` in the namespace you define. If you don't enable basic normalization, you will only receive the raw tables. - -If only your destination supports namespaces, you have two simple options. **This is the most likely case**, as all HTTP APIs currently don't support Namespaces. - -1. Mirror Destination Settings - Replicate to the default namespace in the destination, which will differ based on your destination. -2. Custom Format - Create a "Custom Format" to rename the namespace that your data will be replicated into. - -If both your desired source and destination support namespaces, you're likely using a more advanced use case with a database as a source, so continue reading. - -## What is a Namespace? - -Technical systems often group their underlying data into namespaces with each namespace's data isolated from another namespace. This isolation allows for better organisation and flexibility, leading to better usability. - -An example of a namespace is the RDMS's `schema` concept. Some common use cases for schemas are enforcing permissions, segregating test and production data and general data organisation. - -## Syncing - -The Airbyte Protocol supports namespaces and allows Sources to define namespaces, and Destinations to write to various namespaces. - -If the Source does not support namespaces, the data will be replicated into the Destination's default namespace. For databases, the default namespace is the schema provided in the destination configuration. - -If the Destination does not support namespaces, the [namespace field](https://github.com/airbytehq/airbyte/blob/master/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml#L64) is ignored. - -## Destination namespace configuration - -As part of the [connections sync settings](connections/), it is possible to configure the namespace used by: 1. destination connectors: to store the `_airbyte_raw_*` tables. 2. basic normalization: to store the final normalized tables. - -Note that custom transformation outputs are not affected by the namespace settings from Airbyte: It is up to the configuration of the custom dbt project, and how it is written to handle its [custom schemas](https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas). The default target schema for dbt in this case, will always be the destination namespace. - -Available options for namespace configurations are: - -### - Mirror source structure - -Some sources \(such as databases based on JDBC for example\) are providing namespace information from which a stream has been extracted. Whenever a source is able to fill this field in the catalog.json file, the destination will try to reproduce exactly the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will fall back to the "Destination Connector settings". - -### - Destination connector settings - -All stream will be replicated and store in the default namespace defined on the destination settings page. In the destinations, namespace refers to: - -| Destination Connector | Namespace setting | -| :--- | :--- | -| BigQuery | dataset | -| MSSQL | schema | -| MySql | database | -| Oracle DB | schema | -| Postgres | schema | -| Redshift | schema | -| Snowflake | schema | -| S3 | path prefix | - -### - Custom format - -When replicating multiple sources into the same destination, conflicts on tables being overwritten by syncs can occur. - -For example, a Github source can be replicated into a "github" schema. But if we have multiple connections to different GitHub repositories \(similar in multi-tenant scenarios\): - -* we'd probably wish to keep the same table names \(to keep consistent queries downstream\) -* but store them in different namespaces \(to avoid mixing data from different "tenants"\) - -To solve this, we can either: - -* use a specific namespace for each connection, thus this option of custom format. -* or, use prefix to stream names as described below. - -Note that we can use a template format string using variables that will be resolved during replication as follow: - -* `${SOURCE_NAMESPACE}`: will be replaced by the namespace provided by the source if available - -### Examples - -The following table summarises how this works. We assume an example of replication configurations between a Postgres Source and Snowflake Destination \(with settings of schema = "my\_schema"\): - -| Namespace Configuration | Source Namespace | Source Table Name | Destination Namespace | Destination Table Name | -| :--- | :--- | :--- | :--- | :--- | -| Mirror source structure | public | my\_table | public | my\_table | -| Mirror source structure | | my\_table | my\_schema | my\_table | -| Destination connector settings | public | my\_table | my\_schema | my\_table | -| Destination connector settings | | my\_table | my\_schema | my\_table | -| Custom format = "custom" | public | my\_table | custom | my\_table | -| Custom format = "${SOURCE\_NAMESPACE}" | public | my\_table | public | my\_table | -| Custom format = "my\_${SOURCE\_NAMESPACE}\_schema" | public | my\_table | my\_public\_schema | my\_table | -| Custom format = " " | public | my\_table | my\_schema | my\_table | - -## Requirements - -* Both Source and Destination connectors need to support namespaces. -* Relevant Source and Destination connectors need to be at least version `0.3.0` or later. -* Airbyte version `0.21.0-alpha` or later. - -## Current Support - -### Sources - -* MSSQL -* MYSQL -* Oracle DB -* Postgres -* Redshift - -### Destination - -* BigQuery -* MSSQL -* MySql -* Oracle DB -* Postgres -* Redshift -* Snowflake -* S3 - diff --git a/docs/understanding-airbyte/operations.md b/docs/understanding-airbyte/operations.md index f3839499e39b..b21a087651b3 100644 --- a/docs/understanding-airbyte/operations.md +++ b/docs/understanding-airbyte/operations.md @@ -1,6 +1,6 @@ # Operations -Airbyte [connections](connections/) support configuring additional transformations that execute after the sync. Useful applications could be: +Airbyte [connections](/using-airbyte/core-concepts/sync-modes/) support configuring additional transformations that execute after the sync. Useful applications could be: * Customized normalization to better fit the requirements of your own business context. * Business transformations from a technical data representation into a more logical and business oriented data structure. This can facilitate usage by end-users, non-technical operators, and executives looking to generate Business Intelligence dashboards and reports. diff --git a/docs/understanding-airbyte/tech-stack.md b/docs/understanding-airbyte/tech-stack.md index ba69157075e6..c829f8b7a81b 100644 --- a/docs/understanding-airbyte/tech-stack.md +++ b/docs/understanding-airbyte/tech-stack.md @@ -3,7 +3,7 @@ ## Airbyte Core Backend * [Java 17](https://jdk.java.net/archive/) -* Framework: [Jersey](https://eclipse-ee4j.github.io/jersey/) +* Framework: [Micronaut](https://micronaut.io/) * API: [OAS3](https://www.openapis.org/) * Databases: [PostgreSQL](https://www.postgresql.org/) * Unit & E2E testing: [JUnit 5](https://junit.org/junit5) @@ -18,7 +18,7 @@ Connectors can be written in any language. However the most common languages are ## **Frontend** -* [Node.js 16](https://nodejs.org/en/) +* [Node.js](https://nodejs.org/en/) * [TypeScript](https://www.typescriptlang.org/) * Web Framework/Library: [React](https://reactjs.org/) @@ -27,7 +27,7 @@ Connectors can be written in any language. However the most common languages are * CI/CD: [GitHub Actions](https://github.com/features/actions) * Containerization: [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/) * Linter \(Frontend\): [ESLint](https://eslint.org/) -* Formatter \(Frontend\): [Prettier](https://prettier.io/) +* Formatter \(Frontend & Backend\): [Prettier](https://prettier.io/) * Formatter \(Backend\): [Spotless](https://github.com/diffplug/spotless) ## FAQ diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/using-airbyte/core-concepts/basic-normalization.md similarity index 91% rename from docs/understanding-airbyte/basic-normalization.md rename to docs/using-airbyte/core-concepts/basic-normalization.md index e51f4eb1a1ac..b76d4759de54 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/using-airbyte/core-concepts/basic-normalization.md @@ -2,7 +2,7 @@ :::danger -Basic normalization is being removed in favor of [Typing and Deduping](/understanding-airbyte/typing-deduping), as part of [Destinations V2](/release_notes/upgrading_to_destinations_v2). This pages remains as a guide for legacy connectors. +Basic normalization is being removed in favor of [Typing and Deduping](typing-deduping.md), as part of [Destinations V2](/release_notes/upgrading_to_destinations_v2). This pages remains as a guide for legacy connectors. ::: @@ -14,10 +14,23 @@ The high-level overview contains all the information you need to use Basic Norma ::: -When you run your first Airbyte sync without the basic normalization, you'll notice that your data gets written to your destination as one data column with a JSON blob that contains all of your data. This is the `_airbyte_raw_` table that you may have seen before. Why do we create this table? A core tenet of ELT philosophy is that data should be untouched as it moves through the E and L stages so that the raw data is always accessible. If an unmodified version of the data exists in the destination, it can be retransformed without needing to sync data again. +For every connection, you can choose between two options: + +- Basic Normalization: Airbyte converts the raw JSON blob version of your data to the format of your destination. _Note: Not all destinations support normalization._ +- Raw data (no normalization): Airbyte places the JSON blob version of your data in a table called `_airbyte_raw_` + +When basic normalization is enabled, Airbyte transforms data after the sync in a step called `Basic Normalization`, which structures data from the source into a format appropriate for consumption in the destination. For example, when writing data from a nested, dynamically typed source like a JSON API to a relational destination like Postgres, normalization is the process which un-nests JSON from the source into a relational table format which uses the appropriate column types in the destination. + +Without basic normalization, your data will be written to your destination as one data column with a JSON blob that contains all of your data. This is the `_airbyte_raw_` table that you may have seen before. Why do we create this table? A core tenet of ELT philosophy is that data should be untouched as it moves through the E and L stages so that the raw data is always accessible. If an unmodified version of the data exists in the destination, it can be retransformed without needing to sync data again. If you have Basic Normalization enabled, Airbyte automatically uses this JSON blob to create a schema and tables with your data in mind, converting it to the format of your destination. This runs after your sync and may take a long time if you have a large amount of data synced. If you don't enable Basic Normalization, you'll have to transform the JSON data from that column yourself. +:::note + +Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. + +::: + ## Example Basic Normalization uses a fixed set of rules to map a json object from a source to the types and format that are native to the destination. For example if a source emits data that looks like this: @@ -78,7 +91,7 @@ Additional metadata columns can be added on some tables depending on the usage: - On de-duplicated (and SCD) tables: - `_airbyte_unique_key`: hash of primary keys used to de-duplicate the final table. -The [normalization rules](basic-normalization.md#Rules) are _not_ configurable. They are designed to pick a reasonable set of defaults to hit the 80/20 rule of data normalization. We respect that normalization is a detail-oriented problem and that with a fixed set of rules, we cannot normalize your data in such a way that covers all use cases. If this feature does not meet your normalization needs, we always put the full json blob in destination as well, so that you can parse that object however best meets your use case. We will be adding more advanced normalization functionality shortly. Airbyte is focused on the EL of ELT. If you need a really featureful tool for the transformations then, we suggest trying out dbt. +The [normalization rules](#Rules) are _not_ configurable. They are designed to pick a reasonable set of defaults to hit the 80/20 rule of data normalization. We respect that normalization is a detail-oriented problem and that with a fixed set of rules, we cannot normalize your data in such a way that covers all use cases. If this feature does not meet your normalization needs, we always put the full json blob in destination as well, so that you can parse that object however best meets your use case. We will be adding more advanced normalization functionality shortly. Airbyte is focused on the EL of ELT. If you need a really featureful tool for the transformations then, we suggest trying out dbt. Airbyte places the json blob version of your data in a table called `_airbyte_raw_`. If basic normalization is turned on, it will place a separate copy of the data in a table called ``. Under the hood, Airbyte is using dbt, which means that the data only ingresses into the data store one time. The normalization happens as a query within the datastore. This implementation avoids extra network time and costs. @@ -94,7 +107,7 @@ Airbyte runs this step before handing the final data over to other tools that wi To summarize, we can represent the ELT process in the diagram below. These are steps that happens between your "Source Database or API" and the final "Replicated Tables" with examples of implementation underneath: -![](../.gitbook/assets/connecting-EL-with-T-4.png) +![](../../.gitbook/assets/connecting-EL-with-T-4.png) In Airbyte, the current normalization option is implemented using a dbt Transformer composed of: @@ -103,14 +116,14 @@ In Airbyte, the current normalization option is implemented using a dbt Transfor ## Destinations that Support Basic Normalization -- [BigQuery](../integrations/destinations/bigquery.md) -- [MS Server SQL](../integrations/destinations/mssql.md) -- [MySQL](../integrations/destinations/mysql.md) +- [BigQuery](../../integrations/destinations/bigquery.md) +- [MS Server SQL](../../integrations/destinations/mssql.md) +- [MySQL](../../integrations/destinations/mysql.md) - The server must support the `WITH` keyword. - Require MySQL >= 8.0, or MariaDB >= 10.2.1. -- [Postgres](../integrations/destinations/postgres.md) -- [Redshift](../integrations/destinations/redshift.md) -- [Snowflake](../integrations/destinations/snowflake.md) +- [Postgres](../../integrations/destinations/postgres.md) +- [Redshift](../../integrations/destinations/redshift.md) +- [Snowflake](../../integrations/destinations/snowflake.md) Basic Normalization can be configured when you're creating the connection between your Connection Setup and after in the Transformation Tab. Select the option: **Normalized tabular data**. @@ -131,8 +144,8 @@ Airbyte uses the types described in the catalog to determine the correct type fo | `bit` | boolean | | | `boolean` | boolean | | | `string` with format label `date-time` | timestamp with timezone | | -| `array` | new table | see [nesting](basic-normalization.md#Nesting) | -| `object` | new table | see [nesting](basic-normalization.md#Nesting) | +| `array` | new table | see [nesting](#Nesting) | +| `object` | new table | see [nesting](#Nesting) | ### Nesting @@ -326,11 +339,11 @@ As mentioned in the overview: To enable basic normalization \(which is optional\), you can toggle it on or disable it in the "Normalization and Transformation" section when setting up your connection: -![](../.gitbook/assets/basic-normalization-configuration.png) +![](../../.gitbook/assets/basic-normalization-configuration.png) ## Incremental runs -When the source is configured with sync modes compatible with incremental transformations (using append on destination) such as ( [full_refresh_append](connections/full-refresh-append.md), [incremental append](connections/incremental-append.md) or [incremental deduped history](connections/incremental-append-deduped.md)), only rows that have changed in the source are transferred over the network and written by the destination connector. +When the source is configured with sync modes compatible with incremental transformations (using append on destination) such as ( [full_refresh_append](./sync-modes/full-refresh-append.md), [incremental append](./sync-modes/incremental-append.md) or [incremental deduped history](./sync-modes/incremental-append-deduped.md)), only rows that have changed in the source are transferred over the network and written by the destination connector. Normalization will then try to build the normalized tables incrementally as the rows in the raw tables that have been created or updated since the last time dbt ran. As such, on each dbt run, the models get built incrementally. This limits the amount of data that needs to be transformed, vastly reducing the runtime of the transformations. This improves warehouse performance and reduces compute costs. Because normalization can be either run incrementally and, or, in full refresh, a technical column `_airbyte_normalized_at` can serve to track when was the last time a record has been transformed and written by normalization. This may greatly diverge from the `_airbyte_emitted_at` value as the normalized tables could be totally re-built at a latter time from the data stored in the `_airbyte_raw` tables. @@ -342,15 +355,15 @@ Normalization produces tables that are partitioned, clustered, sorted or indexed In general, normalization needs to do lookup on the last emitted_at column to know if a record is freshly produced and need to be incrementally processed or not. But in certain models, such as SCD tables for example, we also need to retrieve older data to update their type 2 SCD end_date and active_row flags, thus a different partitioning scheme is used to optimize that use case. -On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](connections/incremental-append-deduped.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). +On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](./sync-modes/incremental-append-deduped.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). ## Extending Basic Normalization Note that all the choices made by Normalization as described in this documentation page in terms of naming (and more) could be overridden by your own custom choices. To do so, you can follow the following tutorials: -- to build a [custom SQL view](../operator-guides/transformation-and-normalization/transformations-with-sql.md) with your own naming conventions -- to export, edit and run [custom dbt normalization](../operator-guides/transformation-and-normalization/transformations-with-dbt.md) yourself -- or further, you can configure the use of a custom dbt project within Airbyte by following [this guide](../operator-guides/transformation-and-normalization/transformations-with-airbyte.md). +- to build a [custom SQL view](../../operator-guides/transformation-and-normalization/transformations-with-sql.md) with your own naming conventions +- to export, edit and run [custom dbt normalization](../../operator-guides/transformation-and-normalization/transformations-with-dbt.md) yourself +- or further, you can configure the use of a custom dbt project within Airbyte by following [this guide](../../operator-guides/transformation-and-normalization/transformations-with-airbyte.md). ## CHANGELOG diff --git a/docs/using-airbyte/core-concepts/namespaces.md b/docs/using-airbyte/core-concepts/namespaces.md new file mode 100644 index 000000000000..31e092e0d862 --- /dev/null +++ b/docs/using-airbyte/core-concepts/namespaces.md @@ -0,0 +1,98 @@ +# Namespaces + +## High-Level Overview + +Namespaces are used to generally organize data, separate tests and production data, and enforce permissions. In most cases, namespaces are schemas in the database you're replicating to. + +As a part of connection setup, you select where in the destination you want to write your data. Note: The default configuration is **Destination default**. + +| Destination Namepsace | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| Destination default | All streams will be replicated to the single default namespace defined by the Destination. | +| Mirror source structure | Some sources (for example, databases) provide namespace information for a stream. If a source provides namespace information, the destination will mirror the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. | +| Custom format | All streams will be replicated to a single user-defined namespace. See Custom format for more details | + +Most of our destinations support this feature. To learn if your connector supports this, head to the individual connector page to learn more. If your desired destination doesn't support it, you can ignore this feature. + +## What is a Namespace? + +Systems often group their underlying data into namespaces with each namespace's data isolated from another namespace. This isolation allows for better organisation and flexibility, leading to better usability. + +An example of a namespace is the RDMS's `schema` concept. Some common use cases for schemas are enforcing permissions, segregating test and production data and general data organisation. + +In a source, the namespace is the location from where the data is replicated to the destination. In a destination, the namespace is the location where the replicated data is stored in the destination. + +Airbyte supports namespaces and allows Sources to define namespaces, and Destinations to write to various namespaces. In Airbyte, the following options are available and are set on each individual connection. + +### Destination default + +All streams will be replicated and stored in the default namespace defined on the destination settings page, which is typically defined when the destination was set up. Depending on your destination, the namespace refers to: + +| Destination Connector | Namespace setting | +| :--- | :--- | +| BigQuery | dataset | +| MSSQL | schema | +| MySql | database | +| Oracle DB | schema | +| Postgres | schema | +| Redshift | schema | +| Snowflake | schema | +| S3 | path prefix | + +:::tip +If you prefer to replicate multiple sources into the same namespace, use the `Stream Prefix` configuration to differentiate data from these sources to ensure no streams collide when writing to the destination. +::: + +### Mirror source structure + +Some sources \(such as databases based on JDBC\) provide namespace information from which a stream has been extracted. Whenever a source is able to fill this field in the catalog.json file, the destination will try to write to exactly the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will fall back to the "Destination default". Most APIs do not provide namespace information. + +### Custom format + +When replicating multiple sources into the same destination, you may create table conflicts where tables are overwritten by different syncs. This is where using a custom namespace will ensure data is synced accurately. + +For example, a Github source can be replicated into a `github` schema. However, you may have multiple connections writing from different GitHub repositories \(common in multi-tenant scenarios\). + +:::tip +To keep the same table names, Airbyte recommends writing the connections to unique namespaces to avoid mixing data from the different GitHub repositories. +::: + +You can enter plain text (most common) or additionally add a dynamic parameter `${SOURCE_NAMESPACE}`, which uses the namespace provided by the source if available. + +### Examples + +The following table summarises how this works. In this example, we're looking at the replication configuration between a Postgres Source and Snowflake Destination \(with settings of schema = "my\_schema"\): + +| Namespace Configuration | Source Namespace | Source Table Name | Destination Namespace | Destination Table Name | +| :--- | :--- | :--- | :--- | :--- | +| Destination default | public | my\_table | my\_schema | my\_table | +| Destination default | | my\_table | my\_schema | my\_table | +| Mirror source structure | public | my\_table | public | my\_table | +| Mirror source structure | | my\_table | my\_schema | my\_table | +| Custom format = "custom" | public | my\_table | custom | my\_table | +| Custom format = "${SOURCE\_NAMESPACE}" | public | my\_table | public | my\_table | +| Custom format = "my\_${SOURCE\_NAMESPACE}\_schema" | public | my\_table | my\_public\_schema | my\_table | +| Custom format = " " | public | my\_table | my\_schema | my\_table | + +## Syncing Details + +If the Source does not support namespaces, the data will be replicated into the Destination's default namespace. For databases, the default namespace is the schema provided in the destination configuration. + +If the Destination does not support namespaces, any preference set in the connection is ignored. + +## Using Namespaces with Basic Normalization + +As part of the connections sync settings, it is possible to configure the namespace used by: 1. destination connectors: to store the `_airbyte_raw_*` tables. 2. basic normalization: to store the final normalized tables. + +:::info +When basic normalization is enabled, this is the location that both your normalized and raw data will get written to. Your raw data will show up with the prefix `_airbyte_raw_` in the namespace you define. If you don't enable basic normalization, you will only receive the raw tables. +:::note + +Note custom transformation outputs are not affected by the namespace settings from Airbyte: It is up to the configuration of the custom dbt project, and how it is written to handle its [custom schemas](https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas). The default target schema for dbt in this case, will always be the destination namespace. + +## Requirements + +* Both Source and Destination connectors need to support namespaces. +* Relevant Source and Destination connectors need to be at least version `0.3.0` or later. +* Airbyte version `0.21.0-alpha` or later. + diff --git a/docs/using-airbyte/core-concepts/readme.md b/docs/using-airbyte/core-concepts/readme.md new file mode 100644 index 000000000000..9d8e495a62d5 --- /dev/null +++ b/docs/using-airbyte/core-concepts/readme.md @@ -0,0 +1,108 @@ +# Core Concepts + +Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, and how the data is written to in the destination. + +This page describes the concepts you need to know to use Airbyte. + +## Source + +A source is an API, file, database, or data warehouse that you want to ingest data from. + +## Destination + +A destination is a data warehouse, data lake, database, or an analytics tool where you want to load your ingested data. + +## Connector + +An Airbyte component which pulls data from a source or pushes data to a destination. + +## Connection + +A connection is an automated data pipeline that replicates data from a source to a destination. Setting up a connection enables configuration of the following parameters: + +| Concept | Description | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| Replication Frequency | When should a data sync be triggered? | +| Destination Namespace and Stream Prefix | Where should the replicated data be written? | +| Sync Mode | How should the streams be replicated (read and written)? | +| Schema Propagation | How should Airbyte handle schema drift in sources? | +| Catalog Selection | What data should be replicated from the source to the destination? | + +## Stream + +A stream is a group of related records. + +Examples of streams: + +- A table in a relational database +- A resource or API endpoint for a REST API +- The records from a directory containing many files in a filesystem + +## Field + +A field is an attribute of a record in a stream. + +Examples of fields: + +- A column in the table in a relational database +- A field in an API response + +## Sync Schedules + +There are three options for scheduling a sync to run: +- Scheduled (ie. every 24 hours, every 2 hours) +- [CRON schedule](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) +- Manual \(i.e: clicking the "Sync Now" button in the UI or through the API\) + +For more details, see our [Sync Schedules documentation](sync-schedules.md). + +## Destination Namespace + +A namespace defines where the data will be written to your destination. You can use the namespace to group streams in a source or destination. In a relational database system, this is typically known as a schema. + +For more details, see our [Namespace documentation](namespaces.md). + +## Sync Mode + +A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes depending on what you want to accomplish. + +Read more about each [sync mode](using-airbyte/core-concepts/sync-modes) and how they differ. + +## Typing and Deduping + +Typing and deduping ensures the data emitted from sources is written into the correct type-cast relational columns and only contains unique records. Typing and deduping is only relevant for the following relational database & warehouse destinations: + +- Snowflake +- BigQuery + +:::info +Typing and Deduping is the default method of transforming datasets within data warehouse and database destinations after they've been replicated. We are retaining documentation about normalization to support legacy destinations. +::: + +For more details, see our [Typing & Deduping documentation](/understanding-airbyte/typing-deduping). + +## Basic Normalization + +Basic Normalization transforms data after a sync to denest columns into their own tables. Note that normalization is only available for the following relational database & warehouse destinations: + +- Redshift +- Postgres +- Oracle +- MySQL +- MSSQL + +For more details, see our [Basic Normalization documentation](/using-airbyte/core-concepts/basic-normalization.md). + +## Custom Transformations + +Airbyte integrates natively with dbt to allow you to use dbt for post-sync transformations. This is useful if you would like to trigger dbt models after a sync successfully completes. + +For more details, see our [dbt integration documentation](/cloud/managing-airbyte-cloud/dbt-cloud-integration.md). + +## Workspace + +A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. + +## Glossary of Terms + +You can find a extended list of [Airbyte specific terms](https://glossary.airbyte.com/term/airbyte-glossary-of-terms/), [data engineering concepts](https://glossary.airbyte.com/term/data-engineering-concepts) or many [other data related terms](https://glossary.airbyte.com/). diff --git a/docs/using-airbyte/core-concepts/sync-modes/README.md b/docs/using-airbyte/core-concepts/sync-modes/README.md new file mode 100644 index 000000000000..a561506a1f73 --- /dev/null +++ b/docs/using-airbyte/core-concepts/sync-modes/README.md @@ -0,0 +1,20 @@ +# Sync Modes + +A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. To minimize confusion, a mode's behavior is reflected in its name. The easiest way to understand Airbyte's sync modes is to understand how the modes are named. + +1. The first part of the name denotes how the source connector reads data from the source: + 1. Incremental: Read records added to the source since the last sync job. \(The first sync using Incremental is equivalent to a Full Refresh\) + - Method 1: Using a cursor. Generally supported by all connectors whose data source allows extracting records incrementally. + - Method 2: Using change data capture. Only supported by some sources. See [CDC](../../../understanding-airbyte/cdc.md) for more info. + 2. Full Refresh: Read everything in the source. +2. The second part of the sync mode name denotes how the destination connector writes data. This is not affected by how the source connector produced the data: + 1. Overwrite: Overwrite by first deleting existing data in the destination. + 2. Append: Write by adding data to existing tables in the destination. + 3. Deduped History: Write by first adding data to existing tables in the destination to keep a history of changes. The final table is produced by de-duplicating the intermediate ones using a primary key. + +A sync mode is a combination of a source and destination mode together. The UI exposes the following options, whenever both source and destination connectors are capable to support it for the corresponding stream: + +- [Incremental Append + Deduped](./incremental-append-deduped.md): Sync new records from stream and append data in destination, also provides a de-duplicated view mirroring the state of the stream in the source. +- [Full Refresh Overwrite](./full-refresh-overwrite.md): Sync the whole stream and replace data in destination by overwriting it. +- [Full Refresh Append](./full-refresh-append.md): Sync the whole stream and append data in destination. +- [Incremental Append](./incremental-append.md): Sync new records from stream and append data in destination. diff --git a/docs/understanding-airbyte/connections/full-refresh-append.md b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-append.md similarity index 92% rename from docs/understanding-airbyte/connections/full-refresh-append.md rename to docs/using-airbyte/core-concepts/sync-modes/full-refresh-append.md index b7343fc1c07b..ccdd7951bbe5 100644 --- a/docs/understanding-airbyte/connections/full-refresh-append.md +++ b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-append.md @@ -2,7 +2,7 @@ ## Overview -The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available data requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](incremental-append.md), which does not sync data that has already been synced before. +The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available data requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](./incremental-append.md), which does not sync data that has already been synced before. In the **Append** variant, new syncs will take all data from the sync and append it to the destination table. Therefore, if syncing similar information multiple times, every sync will create duplicates of already existing data. diff --git a/docs/understanding-airbyte/connections/full-refresh-overwrite.md b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md similarity index 91% rename from docs/understanding-airbyte/connections/full-refresh-overwrite.md rename to docs/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md index 44d4ff5f6699..6de7d266c9ce 100644 --- a/docs/understanding-airbyte/connections/full-refresh-overwrite.md +++ b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md @@ -2,7 +2,7 @@ ## Overview -The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available information requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](incremental-append.md), which does not sync data that has already been synced before. +The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available information requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](./incremental-append.md), which does not sync data that has already been synced before. In the **Overwrite** variant, new syncs will destroy all data in the existing destination table and then pull the new data in. Therefore, data that has been removed from the source after an old sync will be deleted in the destination table. diff --git a/docs/understanding-airbyte/connections/incremental-append-deduped.md b/docs/using-airbyte/core-concepts/sync-modes/incremental-append-deduped.md similarity index 89% rename from docs/understanding-airbyte/connections/incremental-append-deduped.md rename to docs/using-airbyte/core-concepts/sync-modes/incremental-append-deduped.md index 86e8ee92ee75..6fa0272fda6e 100644 --- a/docs/understanding-airbyte/connections/incremental-append-deduped.md +++ b/docs/using-airbyte/core-concepts/sync-modes/incremental-append-deduped.md @@ -69,19 +69,19 @@ In the final de-duplicated table: ## Source-Defined Cursor -Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. +Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. -![](../../.gitbook/assets/incremental_source_defined.png) +![](../../../.gitbook/assets/incremental_source_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## User-Defined Cursor -Some sources cannot define the cursor without user input. For example, in the [postgres source](../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. +Some sources cannot define the cursor without user input. For example, in the [postgres source](../../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. -![](../../.gitbook/assets/incremental_user_defined.png) +![](../../../.gitbook/assets/incremental_user_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## Source-Defined Primary key @@ -91,7 +91,7 @@ Some sources are able to determine the primary key that they use without any use Some sources cannot define the cursor without user input or the user may want to specify their own primary key on the destination that is different from the source definitions. In these cases, select the column in the sync settings dropdown that should be used as the `primary key` or `composite primary keys`. -![](../../.gitbook/assets/primary_key_user_defined.png) +![](../../../.gitbook/assets/primary_key_user_defined.png) In this example, we selected both the `campaigns.id` and `campaigns.name` as the composite primary key of our `campaigns` table. @@ -118,4 +118,4 @@ select * from table where cursor_field > 'last_sync_max_cursor_field_value' **Note**: -Previous versions of Airbyte destinations supported SCD tables, which would sore every entry seen for a record. This was removed with Destinations V2 and [Typing and Deduplication](/understanding-airbyte/typing-deduping.md). +Previous versions of Airbyte destinations supported SCD tables, which would sore every entry seen for a record. This was removed with Destinations V2 and [Typing and Deduplication](../typing-deduping.md). diff --git a/docs/understanding-airbyte/connections/incremental-append.md b/docs/using-airbyte/core-concepts/sync-modes/incremental-append.md similarity index 88% rename from docs/understanding-airbyte/connections/incremental-append.md rename to docs/using-airbyte/core-concepts/sync-modes/incremental-append.md index c380d2226912..c9facb4711f3 100644 --- a/docs/understanding-airbyte/connections/incremental-append.md +++ b/docs/using-airbyte/core-concepts/sync-modes/incremental-append.md @@ -2,7 +2,7 @@ ## Overview -Airbyte supports syncing data in **Incremental Append** mode i.e: syncing only replicate _new_ or _modified_ data. This prevents re-fetching data that you have already replicated from a source. If the sync is running for the first time, it is equivalent to a [Full Refresh](full-refresh-append.md) since all data will be considered as _new_. +Airbyte supports syncing data in **Incremental Append** mode i.e: syncing only replicate _new_ or _modified_ data. This prevents re-fetching data that you have already replicated from a source. If the sync is running for the first time, it is equivalent to a [Full Refresh](./full-refresh-append.md) since all data will be considered as _new_. In this flavor of incremental, records in the warehouse destination will never be deleted or mutated. A copy of each new or updated record is _appended_ to the data in the warehouse. This means you can find multiple copies of the same record in the destination warehouse. We provide an "at least once" guarantee of replicating each record that is present when the sync runs. @@ -62,25 +62,25 @@ The output we expect to see in the warehouse is as follows: ## Source-Defined Cursor -Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. +Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. -![](../../.gitbook/assets/incremental_source_defined.png) +![](../../../.gitbook/assets/incremental_source_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## User-Defined Cursor -Some sources cannot define the cursor without user input. For example, in the [postgres source](../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. +Some sources cannot define the cursor without user input. For example, in the [postgres source](../../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. -![](../../.gitbook/assets/incremental_user_defined.png) +![](../../../.gitbook/assets/incremental_user_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## Getting the Latest Snapshot of data As demonstrated in the examples above, with **Incremental Append,** a record which was updated in the source will be appended to the destination rather than updated in-place. This means that if data in the source uses a primary key \(e.g: `user_id` in the `users` table\), then the destination will end up having multiple records with the same primary key value. -However, some use cases require only the latest snapshot of the data. This is available by using other flavors of sync modes such as [Incremental - Append + Deduped](incremental-append-deduped.md) instead. +However, some use cases require only the latest snapshot of the data. This is available by using other flavors of sync modes such as [Incremental - Append + Deduped](./incremental-append-deduped.md) instead. Note that in **Incremental Append**, the size of the data in your warehouse increases monotonically since an updated record in the source is appended to the destination rather than updated in-place. @@ -122,7 +122,7 @@ At the end of the second incremental sync, the data warehouse would still contai Similarly, if multiple modifications are made during the same day to the same records. If the frequency of the sync is not granular enough \(for example, set for every 24h\), then intermediate modifications to the data are not going to be detected and emitted. Only the state of data at the time the sync runs will be reflected in the destination. -Those concerns could be solved by using a different incremental approach based on binary logs, Write-Ahead-Logs \(WAL\), or also called [Change Data Capture \(CDC\)](../cdc.md). +Those concerns could be solved by using a different incremental approach based on binary logs, Write-Ahead-Logs \(WAL\), or also called [Change Data Capture \(CDC\)](../../../understanding-airbyte/cdc.md). The current behavior of **Incremental** is not able to handle source schema changes yet, for example, when a column is added, renamed or deleted from an existing table etc. It is recommended to trigger a [Full refresh - Overwrite](full-refresh-overwrite.md) to correctly replicate the data to the destination with the new schema changes. diff --git a/docs/using-airbyte/core-concepts/sync-schedules.md b/docs/using-airbyte/core-concepts/sync-schedules.md new file mode 100644 index 000000000000..a0d6c22fbee9 --- /dev/null +++ b/docs/using-airbyte/core-concepts/sync-schedules.md @@ -0,0 +1,39 @@ +# Sync Schedules + +For each connection, you can select between three options that allow a sync to run. The three options for `Replication Frequency` are: + +- Scheduled (e.g. every 24 hours, every 2 hours) +- Cron scheduling +- Manual + +## Sync Limitations + +* Only one sync per connection can run at a time. +* If a sync is scheduled to run before the previous sync finishes, the scheduled sync will start after the completion of the previous sync. +* Syncs can run at most every 60 minutes. Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour. + +## Scheduled syncs +When a scheduled connection is first created, a sync is executed immediately after creation. After that, a sync is run once the time since the last sync \(whether it was triggered manually or due to a schedule\) has exceeded the schedule interval. For example: + +- **October 1st, 2pm**, a user sets up a connection to sync data every 24 hours. +- **October 1st, 2:01pm**: sync job runs +- **October 2nd, 2:01pm:** 24 hours have passed since the last sync, so a sync is triggered. +- **October 2nd, 5pm**: The user manually triggers a sync from the UI +- **October 3rd, 2:01pm:** since the last sync was less than 24 hours ago, no sync is run +- **October 3rd, 5:01pm:** It has been more than 24 hours since the last sync, so a sync is run + +## Cron Scheduling +If you prefer more flexibility in scheduling your sync, you can also use CRON scheduling to set a precise time of day or month. + +Airbyte uses the CRON scheduler from [Quartz](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). We recommend reading their [documentation](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) to learn more about how to + +When setting up the cron extpression, you will also be asked to choose a time zone the sync will run in. + +:::note +For Scheduled or cron scheduled syncs, Airbyte guarantees syncs will initiate with a schedule accuracy of +/- 30 minutes. +::: + +## Manual Syncs +When the connection is set to replicate with `Manual` frequency, the sync will not automatically run. + +It can be triggered by clicking the "Sync Now" button at any time through the UI or be triggered through the UI. \ No newline at end of file diff --git a/docs/understanding-airbyte/typing-deduping.md b/docs/using-airbyte/core-concepts/typing-deduping.md similarity index 87% rename from docs/understanding-airbyte/typing-deduping.md rename to docs/using-airbyte/core-concepts/typing-deduping.md index f66e6a3c59ba..1cd029e47a03 100644 --- a/docs/understanding-airbyte/typing-deduping.md +++ b/docs/using-airbyte/core-concepts/typing-deduping.md @@ -1,6 +1,6 @@ # Typing and Deduping -This page refers to new functionality added by [Destinations V2](/release_notes/upgrading_to_destinations_v2/). Typing and deduping is the default method of transforming datasets within data warehouse and database destinations after they've been replicated. Please check each destination to learn if Typing and deduping is supported. +This page refers to new functionality added by [Destinations V2](/release_notes/upgrading_to_destinations_v2/). Typing and deduping is the default method of transforming datasets within data warehouse and database destinations after they've been replicated. Please check each destination to learn if Typing and Deduping is supported. ## What is Destinations V2? @@ -11,6 +11,12 @@ This page refers to new functionality added by [Destinations V2](/release_notes/ - Internal Airbyte tables in the `airbyte_internal` schema: Airbyte will now generate all raw tables in the `airbyte_internal` schema. We no longer clutter your desired schema with raw data tables. - Incremental delivery for large syncs: Data will be incrementally delivered to your final tables when possible. No more waiting hours to see the first rows in your destination table. +:::note + +Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. + +::: + ## `_airbyte_meta` Errors "Per-row error handling" is a new paradigm for Airbyte which provides greater flexibility for our users. Airbyte now separates `data-moving problems` from `data-content problems`. Prior to Destinations V2, both types of errors were handled the same way: by failing the sync. Now, a failing sync means that Airbyte could not _move_ all of your data. You can query the `_airbyte_meta` column to see which rows failed for _content_ reasons, and why. This is a more flexible approach, as you can now decide how to handle rows with errors on a case-by-case basis. @@ -34,7 +40,7 @@ Depending on your use-case, it may still be valuable to consider rows with error ## Destinations V2 Example -Consider the following [source schema](https://docs.airbyte.com/integrations/sources/faker) for stream `users`: +Consider the following [source schema](/integrations/sources/faker) for stream `users`: ```json { @@ -58,7 +64,7 @@ The data from one stream will now be mapped to one table in your schema as below | Failed typing that didn’t break other rows ⟶ | yyy-yyy-yyy | 2022-01-01 12:00:00 | { errors: {[“fish” is not a valid integer for column “age”]} | 2 | evan | NULL | { city: “Menlo Park”, zip: “94002” } | | Not-yet-typed ⟶ | | | | | | | | -In legacy normalization, columns of [Airbyte type](https://docs.airbyte.com/understanding-airbyte/supported-data-types/#the-types) `Object` in the Destination were "unnested" into separate tables. In this example, with Destinations V2, the previously unnested `public.users_address` table with columns `city` and `zip` will no longer be generated. +In legacy normalization, columns of [Airbyte type](/understanding-airbyte/supported-data-types/#the-types) `Object` in the Destination were "unnested" into separate tables. In this example, with Destinations V2, the previously unnested `public.users_address` table with columns `city` and `zip` will no longer be generated. #### Destination Table Name: _airbyte.raw_public_users_ (`airbyte.{namespace}_{stream}`) @@ -70,4 +76,4 @@ In legacy normalization, columns of [Airbyte type](https://docs.airbyte.com/unde You also now see the following changes in Airbyte-provided columns: -![Airbyte Destinations V2 Column Changes](../release_notes/assets/updated_table_columns.png) +![Airbyte Destinations V2 Column Changes](../../release_notes/assets/updated_table_columns.png) diff --git a/docs/quickstart/add-a-destination.md b/docs/using-airbyte/getting-started/add-a-destination.md similarity index 81% rename from docs/quickstart/add-a-destination.md rename to docs/using-airbyte/getting-started/add-a-destination.md index 594acd02cf9e..cc473d8384f3 100644 --- a/docs/quickstart/add-a-destination.md +++ b/docs/using-airbyte/getting-started/add-a-destination.md @@ -1,20 +1,20 @@ # Add a Destination -Destinations are the data warehouses, data lakes, databases and analytics tools where you will load the data from your chosen source(s). The steps to setting up your first destination are very similar to those for [setting up a source](https://docs.airbyte.com/quickstart/add-a-source). +Destinations are the data warehouses, data lakes, databases and analytics tools where you will load the data from your chosen source(s). The steps to setting up your first destination are very similar to those for [setting up a source](./add-a-source). Once you've logged in to your Airbyte Open Source deployment, click on the **Destinations** tab in the navigation bar found on the left side of the dashboard. This will take you to the list of available destinations. -![Destination List](../.gitbook/assets/add-a-destination/getting-started-destination-list.png) +![Destination List](../../.gitbook/assets/add-a-destination/getting-started-destination-list.png) You can use the provided search bar at the top of the page, or scroll down the list to find the destination you want to replicate data from. :::tip -You can filter the list of destinations by support level. Airbyte connectors are categorized in two support levels, Certified and Community. See our [Product Support Levels](https://docs.airbyte.com/project-overview/product-support-levels) page for more information on this topic. +You can filter the list of destinations by support level. Airbyte connectors are categorized in two support levels, Certified and Community. See our [Connector Support Levels](/integrations/connector-support-levels.md) page for more information on this topic. ::: As an example, we'll be setting up a simple JSON file that will be saved on our local system as the destination. Select **Local JSON** from the list of destinations. This will take you to the destination setup page. -![Destination Page](../.gitbook/assets/add-a-destination/getting-started-destination-page.png) +![Destination Page](../../.gitbook/assets/add-a-destination/getting-started-destination-page.png) The left half of the page contains a set of fields that you will have to fill out. In the **Destination name** field, you can enter a name of your choosing to help you identify this instance of the connector. By default, this will be set to the name of the destination (i.e., `Local JSON`). @@ -26,4 +26,4 @@ Each destination will have its own set of required fields to configure during se Some destinations will also have an **Optional Fields** tab located beneath the required fields. You can open this tab to view and configure any additional optional parameters that exist for the source. These fields generally grant you more fine-grained control over your data replication, but you can safely ignore them. ::: -Once you've filled out the required fields, select **Set up destination**. A connection check will run to verify that a successful connection can be established. Now you're ready to [set up your first connection](https://docs.airbyte.com/quickstart/set-up-a-connection)! +Once you've filled out the required fields, select **Set up destination**. A connection check will run to verify that a successful connection can be established. Now you're ready to [set up your first connection](./set-up-a-connection)! diff --git a/docs/quickstart/add-a-source.md b/docs/using-airbyte/getting-started/add-a-source.md similarity index 86% rename from docs/quickstart/add-a-source.md rename to docs/using-airbyte/getting-started/add-a-source.md index 633d9a1d8b77..e5f59b2f7517 100644 --- a/docs/quickstart/add-a-source.md +++ b/docs/using-airbyte/getting-started/add-a-source.md @@ -2,11 +2,11 @@ Setting up a new source in Airbyte is a quick and simple process! When viewing the Airbyte UI, you'll see the main navigation bar on the left side of your screen. Click the **Sources** tab to bring up a list of all available sources. -![](../.gitbook/assets/add-a-source/getting-started-source-list.png) +![](../../.gitbook/assets/add-a-source/getting-started-source-list.png) You can use the provided search bar, or simply scroll down the list to find the source you want to replicate data from. Let's use Google Sheets as an example. Clicking on the **Google Sheets** card will bring us to its setup page. -![](../.gitbook/assets/add-a-source/getting-started-source-page.png) +![](../../.gitbook/assets/add-a-source/getting-started-source-page.png) The left half of the page contains a set of fields that you will have to fill out. In the **Source name** field, you can enter a name of your choosing to help you identify this instance of the connector. By default, this will be set to the name of the source (ie, `Google Sheets`). @@ -18,5 +18,5 @@ Some sources will also have an **Optional Fields** tab. You can open this tab to Once you've filled out all the required fields, click on the **Set up source** button and Airbyte will run a check to verify the connection. Happy replicating! -Can't find the connectors that you want? Try your hand at easily building one yourself using our [Connector Builder!](../connector-development/connector-builder-ui/overview.md) +Can't find the connectors that you want? Try your hand at easily building one yourself using our [Connector Builder!](../../connector-development/connector-builder-ui/overview.md) diff --git a/docs/using-airbyte/getting-started/readme.md b/docs/using-airbyte/getting-started/readme.md new file mode 100644 index 000000000000..ab860999e2fb --- /dev/null +++ b/docs/using-airbyte/getting-started/readme.md @@ -0,0 +1,32 @@ +# Getting Started + +Getting started with Airbyte takes only a few steps! This page guides you through the initial steps to get started and you'll learn how to setup your first connection on the following pages. + +You have two options to run Airbyte: Use **Airbyte Cloud** (recommended) or **self-host Airbyte** in your infrastructure. + +## Sign Up for Airbyte Cloud + +To use Airbyte Cloud, [sign up](https://cloud.airbyte.io/signup) with your email address, Google login, or GitHub login. Upon signing up, you'll be taken to your workspace, which lets you collaborate with team members and share resources across your team under a shared billing account. + +Airbyte Cloud offers a 14-day free trial that begins after your first successful sync. For more details on our pricing model, see our [pricing page](https://www.airbyte.com/pricing). + +To start setting up a data pipeline, see how to [set up a source](./add-a-source.md). + +:::info +Depending on your data residency, you may need to [allowlist IP addresses](/operating-airbyte/security.md#network-security-1) to enable access to Airbyte. +::: + +## Deploy Airbyte (Open Source) + +To use Airbyte Open Source, you can use on the following options to deploy it on your infrastructure. + +- [Local Deployment](/deploying-airbyte/local-deployment.md) (recommended when trying out Airbyte) +- [On Aws](/deploying-airbyte/on-aws-ec2.md) +- [On Azure VM Cloud Shell](/deploying-airbyte/on-azure-vm-cloud-shell.md) +- [On Digital Ocean Droplet](/deploying-airbyte/on-digitalocean-droplet.md) +- [On GCP.md](/deploying-airbyte/on-gcp-compute-engine.md) +- [On Kubernetes](/deploying-airbyte/on-kubernetes-via-helm.md) +- [On OCI VM](/deploying-airbyte/on-oci-vm.md) +- [On Restack](/deploying-airbyte/on-restack.md) +- [On Plural](/deploying-airbyte/on-plural.md) +- [On AWS ECS](/deploying-airbyte/on-aws-ecs.md) (Spoiler alert: it doesn't work) diff --git a/docs/quickstart/set-up-a-connection.md b/docs/using-airbyte/getting-started/set-up-a-connection.md similarity index 63% rename from docs/quickstart/set-up-a-connection.md rename to docs/using-airbyte/getting-started/set-up-a-connection.md index c9144ec08c43..7948eeeda06a 100644 --- a/docs/quickstart/set-up-a-connection.md +++ b/docs/using-airbyte/getting-started/set-up-a-connection.md @@ -1,38 +1,44 @@ # Set up a Connection -Now that you've learned how to [deploy Airbyte locally](https://docs.airbyte.com/quickstart/deploy-airbyte) and set up your first [source](https://docs.airbyte.com/quickstart/add-a-source) and [destination](https://docs.airbyte.com/quickstart/add-a-destination), it's time to finish the job by creating your very first connection! +Now that you've learned how to set up your first [source](./add-a-source) and [destination](./add-a-destination), it's time to finish the job by creating your very first connection! On the left side of your main Airbyte dashboard, select **Connections**. You will be prompted to choose which source and destination to use for this connection. As an example, we'll use the **Google Sheets** source and **Local JSON** destination. ## Configure the connection -Once you've chosen your source and destination, you'll be able to configure the connection. You can refer to [this page](https://docs.airbyte.com/cloud/managing-airbyte-cloud/configuring-connections) for more information on each available configuration. For this demo, we'll simply set the **Replication frequency** to a 24 hour interval and leave the other fields at their default values. +Once you've chosen your source and destination, you'll be able to configure the connection. You can refer to [this page](/cloud/managing-airbyte-cloud/configuring-connections.md) for more information on each available configuration. For this demo, we'll simply set the **Replication frequency** to a 24 hour interval and leave the other fields at their default values. -![Connection config](../.gitbook/assets/set-up-a-connection/getting-started-connection-config.png) +![Connection config](../../.gitbook/assets/set-up-a-connection/getting-started-connection-config.png) -Next, you can toggle which streams you want to replicate, as well as setting up the desired sync mode for each stream. For more information on the nature of each sync mode supported by Airbyte, see [this page](https://docs.airbyte.com/understanding-airbyte/connections/#sync-modes). +:::note +By default, data will sync to the default defined in the destination. To ensure your data is synced to the correct place, see our examples for [Destination Namespace](/using-airbyte/core-concepts/namespaces.md) +::: + +Next, you can toggle which streams you want to replicate, as well as setting up the desired sync mode for each stream. For more information on the nature of each sync mode supported by Airbyte, see [this page](/using-airbyte/core-concepts/sync-modes). Our test data consists of a single stream cleverly named `Test Data`, which we've enabled and set to `Full Refresh - Overwrite` sync mode. -![Stream config](../.gitbook/assets/set-up-a-connection/getting-started-connection-streams.png) +![Stream config](../../.gitbook/assets/set-up-a-connection/getting-started-connection-streams.png) Click **Set up connection** to complete your first connection. Your first sync is about to begin! -## Connector Dashboard +## Connection Overview -Once you've finished setting up the connection, you will be automatically redirected to a dashboard containing all the tools you need to keep track of your connection. +Once you've finished setting up the connection, you will be automatically redirected to a connection overview containing all the tools you need to keep track of your connection. -![Connection dashboard](../.gitbook/assets/set-up-a-connection/getting-started-connection-success.png) +![Connection dashboard](../../.gitbook/assets/set-up-a-connection/getting-started-connection-success.png) Here's a basic overview of the tabs and their use: -1. The **Status** tab shows you an overview of your connector's sync schedule and health. +1. The **Status** tab shows you an overview of your connector's sync health. 2. The **Job History** tab allows you to check the logs for each sync. If you encounter any errors or unexpected behaviors during a sync, checking the logs is always a good first step to finding the cause and solution. 3. The **Replication** tab allows you to modify the configurations you chose during the connection setup. 4. The **Settings** tab contains additional settings, and the option to delete the connection if you no longer wish to use it. ### Check the data from your first sync +Once the first sync has completed, you can verify the sync has completed by checking the data in your destination. + If you followed along and created your own connection using a `Local JSON` destination, you can use this command to check the file's contents to make sure the replication worked as intended (be sure to replace YOUR_PATH with the path you chose in your destination setup, and YOUR_STREAM_NAME with the name of an actual stream you replicated): ```bash @@ -42,12 +48,12 @@ cat /tmp/airbyte_local/YOUR_PATH/_airbyte_raw_YOUR_STREAM_NAME.jsonl You should see a list of JSON objects, each containing a unique `airbyte_ab_id`, an `emitted_at` timestamp, and `airbyte_data` containing the extracted record. :::tip -If you are using Airbyte on Windows with WSL2 and Docker, refer to [this guide](https://docs.airbyte.com/operator-guides/locating-files-local-destination) to locate the replicated folder and file. +If you are using Airbyte on Windows with WSL2 and Docker, refer to [this guide](/integrations/locating-files-local-destination.md) to locate the replicated folder and file. ::: ## What's next? -Congratulations on successfully setting up your first connection using Airbyte Open Source! We hope that this will be just the first step on your journey with us. We support a large, ever-growing [catalog of sources and destinations](https://docs.airbyte.com/integrations/), and you can even [contribute your own](https://docs.airbyte.com/connector-development/). +Congratulations on successfully setting up your first connection using Airbyte Open Source! We hope that this will be just the first step on your journey with us. We support a large, ever-growing [catalog of sources and destinations](/integrations/), and you can even [contribute your own](/connector-development/). If you have any questions at all, please reach out to us on [Slack](https://slack.airbyte.io/). If you would like to see a missing feature or connector added, please create an issue on our [Github](https://github.com/airbytehq/airbyte). Our community's participation is invaluable in helping us grow and improve every day, and we always welcome your feedback. diff --git a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md b/docs/using-airbyte/workspaces.md similarity index 84% rename from docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md rename to docs/using-airbyte/workspaces.md index 1db3697191a5..710242ca4728 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md +++ b/docs/using-airbyte/workspaces.md @@ -1,6 +1,8 @@ # Manage your workspace -An Airbyte Cloud workspace allows you to collaborate with other users and manage connections under a shared billing account. +A workspace in Airbyte allows you to collaborate with other users and manage connections together. On Airbyte Cloud it will allow you to share billing details for a workspace. + + :::info Airbyte [credits](https://airbyte.com/pricing) are assigned per workspace and cannot be transferred between workspaces. @@ -10,7 +12,7 @@ Airbyte [credits](https://airbyte.com/pricing) are assigned per workspace and ca To add a user to your workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **Access Management**. @@ -28,7 +30,7 @@ To add a user to your workspace: To remove a user from your workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **Access Management**. @@ -40,7 +42,7 @@ To remove a user from your workspace: To rename a workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **General Settings**. @@ -52,7 +54,7 @@ To rename a workspace: To delete a workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **General Settings**. @@ -78,8 +80,6 @@ You can use one or multiple workspaces with Airbyte Cloud, which gives you flexi To switch between workspaces: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click the current workspace name under the Airbyte logo in the navigation bar. - -2. Click **View all workspaces**. +1. Click the current workspace name under the Airbyte logo in the navigation bar. -3. Click the name of the workspace you want to switch to. +2. Search for the workspace or click the name of the workspace you want to switch to. diff --git a/docusaurus/redirects.yml b/docusaurus/redirects.yml index b69386db8c1d..080a84f1dd4c 100644 --- a/docusaurus/redirects.yml +++ b/docusaurus/redirects.yml @@ -1,28 +1,16 @@ # A list of URLs that should be redirected to new pathes -- from: /airbyte-pro - to: /airbyte-enterprise - from: /upgrading-airbyte to: /operator-guides/upgrading-airbyte - from: /catalog to: /understanding-airbyte/airbyte-protocol - from: /integrations/sources/appstore-singer to: /integrations/sources/appstore -- from: - - /project-overview/security - - /operator-guides/securing-airbyte - to: /operator-guides/security - from: /connector-development/config-based/ to: /connector-development/config-based/low-code-cdk-overview - from: /project-overview/changelog to: /category/release-notes - from: /connector-development/config-based/understanding-the-yaml-file/stream-slicers/ to: /connector-development/config-based/understanding-the-yaml-file/partition-router -- from: /cloud/managing-airbyte-cloud - to: /category/using-airbyte-cloud -- from: /category/managing-airbyte-cloud - to: /category/using-airbyte-cloud -- from: /category/airbyte-open-source-quick-start - to: /category/getting-started - from: /cloud/dbt-cloud-integration to: /cloud/managing-airbyte-cloud/dbt-cloud-integration - from: /cloud/managing-airbyte-cloud/review-sync-summary @@ -31,5 +19,74 @@ to: /cloud/managing-airbyte-cloud/manage-connection-state - from: /cloud/managing-airbyte-cloud/edit-stream-configuration to: /cloud/managing-airbyte-cloud/configuring-connections -- from: /project-overview/product-release-stages - to: /project-overview/product-support-levels +# November 2023 documentation restructure: +- from: + - /project-overview/product-support-levels + - /project-overview/product-release-stages + to: /integrations/connector-support-levels +- from: + - /project-overview/code-of-conduct + - /project-overview/slack-code-of-conduct + to: /community/code-of-conduct +- from: /project-overview/licenses/ + to: /developer-guides/licenses/ +- from: /project-overview/licenses/license-faq + to: /developer-guides/licenses/license-faq +- from: /project-overview/licenses/elv2-license + to: /developer-guides/licenses/elv2-license +- from: /project-overview/licenses/mit-license + to: /developer-guides/licenses/mit-license +- from: /project-overview/licenses/examples + to: /developer-guides/licenses/examples +- from: + - /enterprise-setup/self-managed/ + - /airbyte-pro + - /airbyte-enterprise + to: /enterprise-setup/ +- from: /enterprise-setup/self-managed/implementation-guide + to: /enterprise-setup/implementation-guide +- from: /enterprise-setup/self-managed/sso + to: /enterprise-setup/sso +- from: + - /project-overview/security + - /operator-guides/securing-airbyte + - /operator-guides/security + to: /operating-airbyte/security +- from: + - /cloud/getting-started-with-airbyte-cloud + - /quickstart/deploy-airbyte + - /category/getting-started + - /category/airbyte-open-source-quick-start + to: /using-airbyte/getting-started/ +- from: /quickstart/add-a-source + to: /using-airbyte/getting-started/add-a-source +- from: /quickstart/add-a-destination + to: /using-airbyte/getting-started/add-a-destination +- from: /quickstart/set-up-a-connection + to: /using-airbyte/getting-started/set-up-a-connection +- from: /cloud/core-concepts + to: /using-airbyte/core-concepts/ +- from: /understanding-airbyte/namespaces + to: /using-airbyte/core-concepts/namespaces +- from: /understanding-airbyte/connections/ + to: /using-airbyte/core-concepts/sync-modes/ +- from: /understanding-airbyte/connections/full-refresh-overwrite + to: /using-airbyte/core-concepts/sync-modes/full-refresh-overwrite +- from: /understanding-airbyte/connections/full-refresh-append + to: /using-airbyte/core-concepts/sync-modes/full-refresh-append +- from: /understanding-airbyte/connections/incremental-append + to: /using-airbyte/core-concepts/sync-modes/incremental-append +- from: /understanding-airbyte/connections/incremental-append-deduped + to: /using-airbyte/core-concepts/sync-modes/incremental-append-deduped +- from: /understanding-airbyte/basic-normalization + to: /using-airbyte/core-concepts/basic-normalization +- from: /understanding-airbyte/typing-deduping + to: /using-airbyte/core-concepts/typing-deduping +- from: + - /troubleshooting + - /operator-guides/contact-support + to: /community/getting-support +- from: /cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace + to: /using-airbyte/workspaces +- from: /operator-guides/locating-files-local-destination + to: /integrations/locating-files-local-destination diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js index 2b915c7b4bb4..e42c14f55492 100644 --- a/docusaurus/sidebars.js +++ b/docusaurus/sidebars.js @@ -319,49 +319,6 @@ const contributeToAirbyte = { ], }; -const airbyteCloud = [ - { - type: "doc", - label: "Getting Started", - id: "cloud/getting-started-with-airbyte-cloud", - }, - "cloud/core-concepts", - { - type: "category", - label: "Using Airbyte Cloud", - link: { - type: "generated-index", - }, - items: [ - "cloud/managing-airbyte-cloud/configuring-connections", - "cloud/managing-airbyte-cloud/review-connection-status", - "cloud/managing-airbyte-cloud/review-sync-history", - "cloud/managing-airbyte-cloud/manage-schema-changes", - "cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications", - "cloud/managing-airbyte-cloud/manage-data-residency", - "cloud/managing-airbyte-cloud/dbt-cloud-integration", - "cloud/managing-airbyte-cloud/manage-credits", - "cloud/managing-airbyte-cloud/manage-connection-state", - "cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace", - "cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits", - ], - }, -]; - -const ossGettingStarted = { - type: "category", - label: "Getting Started", - link: { - type: "generated-index", - }, - items: [ - "quickstart/deploy-airbyte", - "quickstart/add-a-source", - "quickstart/add-a-destination", - "quickstart/set-up-a-connection", - ], -}; - const deployAirbyte = { type: "category", label: "Deploy Airbyte", @@ -379,7 +336,11 @@ const deployAirbyte = { label: "On AWS EC2", id: "deploying-airbyte/on-aws-ec2", }, - + { + type: "doc", + label: "On AWS ECS", + id: "deploying-airbyte/on-aws-ecs", + }, { type: "doc", label: "On Azure", @@ -418,40 +379,6 @@ const deployAirbyte = { ], }; -const operatorGuide = { - type: "category", - label: "Manage Airbyte", - link: { - type: "generated-index", - }, - items: [ - "operator-guides/upgrading-airbyte", - "operator-guides/reset", - "operator-guides/configuring-airbyte-db", - "operator-guides/configuring-connector-resources", - "operator-guides/browsing-output-logs", - "operator-guides/using-the-airflow-airbyte-operator", - "operator-guides/using-prefect-task", - "operator-guides/using-dagster-integration", - "operator-guides/using-kestra-plugin", - "operator-guides/locating-files-local-destination", - "operator-guides/collecting-metrics", - { - type: "category", - label: "Transformations and Normalization", - items: [ - "operator-guides/transformation-and-normalization/transformations-with-sql", - "operator-guides/transformation-and-normalization/transformations-with-dbt", - "operator-guides/transformation-and-normalization/transformations-with-airbyte", - ], - }, - "operator-guides/configuring-airbyte", - "operator-guides/using-custom-connectors", - "operator-guides/scaling-airbyte", - "operator-guides/configuring-sync-notifications", - ], -}; - const understandingAirbyte = { type: "category", label: "Understand Airbyte", @@ -459,68 +386,154 @@ const understandingAirbyte = { "understanding-airbyte/beginners-guide-to-catalog", "understanding-airbyte/airbyte-protocol", "understanding-airbyte/airbyte-protocol-docker", - "understanding-airbyte/basic-normalization", - "understanding-airbyte/typing-deduping", - { - type: "category", - label: "Connections and Sync Modes", - items: [ - { - type: "doc", - label: "Connections Overview", - id: "understanding-airbyte/connections/README", - }, - "understanding-airbyte/connections/full-refresh-overwrite", - "understanding-airbyte/connections/full-refresh-append", - "understanding-airbyte/connections/incremental-append", - "understanding-airbyte/connections/incremental-append-deduped", - ], - }, "understanding-airbyte/operations", "understanding-airbyte/high-level-view", "understanding-airbyte/jobs", "understanding-airbyte/tech-stack", "understanding-airbyte/cdc", - "understanding-airbyte/namespaces", "understanding-airbyte/supported-data-types", "understanding-airbyte/json-avro-conversion", "understanding-airbyte/database-data-catalog", ], }; -const security = { - type: "doc", - id: "operator-guides/security", -}; - -const support = { - type: "doc", - id: "operator-guides/contact-support", -}; - module.exports = { - mySidebar: [ - { - type: "doc", - label: "Start here", - id: "readme", - }, + docs: [ sectionHeader("Airbyte Connectors"), connectorCatalog, buildAConnector, - sectionHeader("Airbyte Cloud"), - ...airbyteCloud, - sectionHeader("Airbyte Open Source (OSS)"), - ossGettingStarted, + "integrations/connector-support-levels", + sectionHeader("Using Airbyte"), + { + type: "category", + label: "Getting Started", + link: { + type: "doc", + id: "using-airbyte/getting-started/readme", + }, + items: [ + "using-airbyte/getting-started/add-a-source", + "using-airbyte/getting-started/add-a-destination", + "using-airbyte/getting-started/set-up-a-connection", + ], + }, + { + type: "category", + label: "Core Concepts", + link: { + type: "doc", + id: "using-airbyte/core-concepts/readme" + }, + items: [ + "using-airbyte/core-concepts/sync-schedules", + "using-airbyte/core-concepts/namespaces", + { + type: "category", + label: "Sync Modes", + link: { + type: "doc", + id: "using-airbyte/core-concepts/sync-modes/README" + }, + items: [ + "using-airbyte/core-concepts/sync-modes/incremental-append-deduped", + "using-airbyte/core-concepts/sync-modes/incremental-append", + "using-airbyte/core-concepts/sync-modes/full-refresh-append", + "using-airbyte/core-concepts/sync-modes/full-refresh-overwrite", + ], + }, + "using-airbyte/core-concepts/typing-deduping", + "using-airbyte/core-concepts/basic-normalization", + ], + }, + { + type: "category", + label: "Configuring Connections", + link: { + type: "doc", + id: "cloud/managing-airbyte-cloud/configuring-connections" + }, + items: [ + "cloud/managing-airbyte-cloud/manage-schema-changes", + "cloud/managing-airbyte-cloud/manage-data-residency", + "cloud/managing-airbyte-cloud/manage-connection-state", + { + type: "category", + label: "Transformations", + items: [ + "cloud/managing-airbyte-cloud/dbt-cloud-integration", + "operator-guides/transformation-and-normalization/transformations-with-sql", + "operator-guides/transformation-and-normalization/transformations-with-dbt", + "operator-guides/transformation-and-normalization/transformations-with-airbyte", + ] + }, + ] + }, + { + type: "category", + label: "Managing Syncs", + items: [ + "cloud/managing-airbyte-cloud/review-connection-status", + "cloud/managing-airbyte-cloud/review-sync-history", + "operator-guides/browsing-output-logs", + "operator-guides/reset", + ], + }, + { + type: "category", + label: "Workspace Management", + items: [ + "using-airbyte/workspaces", + "cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications", + "cloud/managing-airbyte-cloud/manage-credits", + "operator-guides/using-custom-connectors", + ] + }, + sectionHeader("Operating Airbyte"), deployAirbyte, - operatorGuide, { - type: "doc", - id: "troubleshooting", + type: "category", + label: "Airbyte Enterprise", + link: { + type: "doc", + id: "enterprise-setup/README", + }, + items: [ + "enterprise-setup/implementation-guide", + "enterprise-setup/sso", + ] + }, + "operator-guides/upgrading-airbyte", + { + type: "category", + label: "Configuring Airbyte", + link: { + type: "doc", + id: "operator-guides/configuring-airbyte", + }, + items: [ + "operator-guides/configuring-airbyte-db", + "operator-guides/configuring-connector-resources", + ] }, { - type: "doc", - id: "airbyte-enterprise", + type: "category", + label: "Airbyte at Scale", + items: [ + "operator-guides/collecting-metrics", + "operator-guides/scaling-airbyte", + "cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits", + ] + }, + "operating-airbyte/security", + { + type: "category", + label: "Integrating with Airbyte", + items: [ + "operator-guides/using-the-airflow-airbyte-operator", + "operator-guides/using-prefect-task", + "operator-guides/using-dagster-integration", + "operator-guides/using-kestra-plugin", + ], }, sectionHeader("Developer Guides"), { @@ -537,42 +550,29 @@ module.exports = { }, understandingAirbyte, contributeToAirbyte, - sectionHeader("Resources"), - support, - security, { type: "category", - label: "Project Overview", + label: "Licenses", + link: { + type: "doc", + id: "developer-guides/licenses/README", + }, items: [ - { - type: "link", - label: "Roadmap", - href: "https://go.airbyte.com/roadmap", - }, - "project-overview/product-support-levels", - "project-overview/slack-code-of-conduct", - "project-overview/code-of-conduct", - { - type: "link", - label: "Airbyte Repository", - href: "https://github.com/airbytehq/airbyte", - }, - { - type: "category", - label: "Licenses", - link: { - type: "doc", - id: "project-overview/licenses/README", - }, - items: [ - "project-overview/licenses/license-faq", - "project-overview/licenses/elv2-license", - "project-overview/licenses/mit-license", - "project-overview/licenses/examples", - ], - }, + "developer-guides/licenses/license-faq", + "developer-guides/licenses/elv2-license", + "developer-guides/licenses/mit-license", + "developer-guides/licenses/examples", ], }, + sectionHeader("Community"), + "community/getting-support", + "community/code-of-conduct", + sectionHeader("Product Updates"), + { + type: "link", + label: "Roadmap", + href: "https://go.airbyte.com/roadmap", + }, { type: "category", label: "Release Notes", diff --git a/docusaurus/src/components/ConnectorRegistry.jsx b/docusaurus/src/components/ConnectorRegistry.jsx index 3b81708e3192..d3548c350d34 100644 --- a/docusaurus/src/components/ConnectorRegistry.jsx +++ b/docusaurus/src/components/ConnectorRegistry.jsx @@ -1,6 +1,8 @@ import React from "react"; import { useEffect, useState } from "react"; +import styles from "./ConnectorRegistry.module.css"; + const registry_url = "https://connectors.airbyte.com/files/generated_reports/connector_registry_report.json"; @@ -46,7 +48,6 @@ export default function ConnectorRegistry({ type }) { Connector Name - Icon Links Support Level OSS @@ -64,14 +65,12 @@ export default function ConnectorRegistry({ type }) { return ( - +
    + {connector.iconUrl_oss && ( + + )} {connector.name_oss} - - - - {connector.iconUrl_oss ? ( - - ) : null} +
    {/* min width to prevent wrapping */} diff --git a/docusaurus/src/components/ConnectorRegistry.module.css b/docusaurus/src/components/ConnectorRegistry.module.css new file mode 100644 index 000000000000..e3d085db4932 --- /dev/null +++ b/docusaurus/src/components/ConnectorRegistry.module.css @@ -0,0 +1,6 @@ +.connectorName { + display: flex; + align-items: center; + gap: 4px; + font-weight: bold; +} diff --git a/docusaurus/src/css/custom.css b/docusaurus/src/css/custom.css index 56563f0b9d24..ba56dadcae02 100644 --- a/docusaurus/src/css/custom.css +++ b/docusaurus/src/css/custom.css @@ -124,11 +124,19 @@ html[data-theme="dark"] .docusaurus-highlight-code-line { font-weight: 700; font-size: 0.8em; padding: 0.4em 0 0.4em 0.4em; - margin-top: 1.1em; color: var(--docsearch-text-color); background-color: var(--ifm-hover-overlay); } +.navbar__category:not(:first-child) { + margin-top: 1.1em; +} + +/* Hide the breadcrumbs if they have only the house as an entry (i.e. on the start page) */ +.breadcrumbs:has(li:first-child:last-child) { + display: none; +} + .cloudStatusLink { display: flex; gap: 4px; diff --git a/docusaurus/src/scripts/cloudStatus.js b/docusaurus/src/scripts/cloudStatus.js index fa1844409227..e3428ac94ed3 100644 --- a/docusaurus/src/scripts/cloudStatus.js +++ b/docusaurus/src/scripts/cloudStatus.js @@ -9,12 +9,12 @@ if (ExecutionEnvironment.canUseDOM) { .then((summary) => { const status = summary.page.status; const el = document.querySelector(".cloudStatusLink"); - el.classList.forEach((className) => { + el?.classList.forEach((className) => { if (className.startsWith("status-")) { el.classList.remove(className); } }); - el.classList.add(`status-${status.toLowerCase()}`) + el?.classList.add(`status-${status.toLowerCase()}`) }); }