From 6253c3f365fcf83a38d649b2aa487c3563d6a482 Mon Sep 17 00:00:00 2001 From: Augustin Date: Thu, 16 Nov 2023 11:04:30 +0100 Subject: [PATCH 01/57] airbyte-ci: use new self hosted runners (#32537) --- .github/workflows/airbyte-ci-tests.yml | 2 +- .github/workflows/cat-tests.yml | 2 +- .github/workflows/connectors_nightly_build.yml | 8 ++++---- .github/workflows/connectors_tests.yml | 4 ++-- .github/workflows/connectors_weekly_build.yml | 8 ++++---- .github/workflows/format_check.yml | 2 +- .github/workflows/format_fix.yml | 2 +- .../metadata_service_deploy_orchestrator_dagger.yml | 2 +- .github/workflows/publish_connectors.yml | 4 ++-- .github/workflows/publish_pypi.yml | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/airbyte-ci-tests.yml b/.github/workflows/airbyte-ci-tests.yml index 6069ea917a70..f11f6947b469 100644 --- a/.github/workflows/airbyte-ci-tests.yml +++ b/.github/workflows/airbyte-ci-tests.yml @@ -14,7 +14,7 @@ on: jobs: run-airbyte-ci-tests: name: Run Airbyte CI tests - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-test-large-dagger-0-6-4" steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/cat-tests.yml b/.github/workflows/cat-tests.yml index 97de1c1fda89..e341407dd8f1 100644 --- a/.github/workflows/cat-tests.yml +++ b/.github/workflows/cat-tests.yml @@ -16,7 +16,7 @@ on: jobs: run-cat-unit-tests: name: Run CAT unit tests - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-test-large-dagger-0-6-4" steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index c7f7eb7dddf5..6b9d5d6ce5fa 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -8,19 +8,19 @@ on: inputs: runs-on: type: string - default: conn-nightly-xlarge-runner + default: ci-runner-connector-nightly-xlarge-dagger-0-6-4 required: true test-connectors-options: default: --concurrency=5 --support-level=certified required: true -run-name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" +run-name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" jobs: test_connectors: - name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" + name: "Test connectors: ${{ inputs.test-connectors-options || 'nightly build for Certified connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" timeout-minutes: 720 # 12 hours - runs-on: ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/connectors_tests.yml b/.github/workflows/connectors_tests.yml index 610e4fc94ad1..c371735c35bb 100644 --- a/.github/workflows/connectors_tests.yml +++ b/.github/workflows/connectors_tests.yml @@ -19,7 +19,7 @@ on: default: "--modified" runner: description: "The runner to use for this job" - default: "conn-prod-xlarge-runner" + default: "ci-runner-connector-test-large-dagger-0-6-4" pull_request: types: - opened @@ -29,7 +29,7 @@ jobs: connectors_ci: name: Connectors CI timeout-minutes: 1440 # 24 hours - runs-on: ${{ inputs.runner || 'conn-prod-xlarge-runner'}} + runs-on: ${{ inputs.runner || 'ci-runner-connector-test-large-dagger-0-6-4'}} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/connectors_weekly_build.yml b/.github/workflows/connectors_weekly_build.yml index ccf1f0b52199..aa96a832b9b8 100644 --- a/.github/workflows/connectors_weekly_build.yml +++ b/.github/workflows/connectors_weekly_build.yml @@ -8,19 +8,19 @@ on: inputs: runs-on: type: string - default: conn-nightly-xlarge-runner + default: ci-runner-connector-nightly-xlarge-dagger-0-6-4 required: true test-connectors-options: default: --concurrency=3 --support-level=community required: true -run-name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" +run-name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" jobs: test_connectors: - name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }}" + name: "Test connectors: ${{ inputs.test-connectors-options || 'weekly build for Community connectors' }} - on ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }}" timeout-minutes: 8640 # 6 days - runs-on: ${{ inputs.runs-on || 'conn-nightly-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-nightly-xlarge-dagger-0-6-4' }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/format_check.yml b/.github/workflows/format_check.yml index 290eb5662fd9..97530ad424e7 100644 --- a/.github/workflows/format_check.yml +++ b/.github/workflows/format_check.yml @@ -10,7 +10,7 @@ on: - master jobs: format-check: - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-format-medium-dagger-0-6-4" name: "Check for formatting errors on ${{ github.head_ref }}" timeout-minutes: 40 steps: diff --git a/.github/workflows/format_fix.yml b/.github/workflows/format_fix.yml index b1b2c31562aa..d689a8891063 100644 --- a/.github/workflows/format_fix.yml +++ b/.github/workflows/format_fix.yml @@ -10,7 +10,7 @@ on: pull_request: jobs: format-fix: - runs-on: "conn-prod-xlarge-runner" + runs-on: "ci-runner-connector-format-medium-dagger-0-6-4" name: "Apply All Formatting Rules" timeout-minutes: 40 steps: diff --git a/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml b/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml index 0da841726893..9550a5b1635c 100644 --- a/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml +++ b/.github/workflows/metadata_service_deploy_orchestrator_dagger.yml @@ -10,7 +10,7 @@ on: jobs: connector_metadata_service_deploy_orchestrator: name: Connector metadata service deploy orchestrator - runs-on: medium-runner + runs-on: ci-runner-connector-test-large-dagger-0-6-4 steps: - name: Checkout Airbyte uses: actions/checkout@v2 diff --git a/.github/workflows/publish_connectors.yml b/.github/workflows/publish_connectors.yml index 5fdc8dfcde60..ded1f9fb11ae 100644 --- a/.github/workflows/publish_connectors.yml +++ b/.github/workflows/publish_connectors.yml @@ -16,12 +16,12 @@ on: default: "--pre-release" runs-on: type: string - default: conn-prod-xlarge-runner + default: ci-runner-connector-publish-large-dagger-0-6-4 required: true jobs: publish_connectors: name: Publish connectors - runs-on: ${{ inputs.runs-on || 'conn-prod-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-publish-large-dagger-0-6-4' }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index 477db6af6d6a..05deefcb7845 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -5,12 +5,12 @@ on: inputs: runs-on: type: string - default: conn-prod-xlarge-runner + default: ci-runner-connector-publish-large-dagger-0-6-4 required: true jobs: no-op: name: No-op - runs-on: ${{ inputs.runs-on || 'conn-prod-xlarge-runner' }} + runs-on: ${{ inputs.runs-on || 'ci-runner-connector-publish-large-dagger-0-6-4' }} steps: - run: echo 'hi!' From 316069e66d358cff18b8cc993f8a8c4a16cd4a13 Mon Sep 17 00:00:00 2001 From: Augustin Date: Thu, 16 Nov 2023 12:28:04 +0100 Subject: [PATCH 02/57] airbyte-ci bin release: trigger on all PRs (#32590) --- .github/workflows/airbyte-ci-release.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/airbyte-ci-release.yml b/.github/workflows/airbyte-ci-release.yml index 1d42e96e7d3b..aad8f7629559 100644 --- a/.github/workflows/airbyte-ci-release.yml +++ b/.github/workflows/airbyte-ci-release.yml @@ -6,8 +6,6 @@ concurrency: on: push: - branches: - - master paths: - "airbyte-ci/connectors/pipelines/**" workflow_dispatch: From fc1b08c65b7c43359ae6f89de6432cad6bea229b Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Thu, 16 Nov 2023 14:35:24 +0100 Subject: [PATCH 03/57] =?UTF-8?q?=F0=9F=9A=A8=F0=9F=9A=A8=20Source=20Amazo?= =?UTF-8?q?n=20Seller=20Partner:=20remove=20brand=20analytics=20reports=20?= =?UTF-8?q?from=20cloud=20(#32355)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: artem1205 Co-authored-by: Anton Karpets --- .../acceptance-test-config.yml | 10 ----- ...ba_fulfillment_current_inventory_data.json | 28 -------------- ...ulfillment_inventory_adjustments_data.json | 31 ---------------- ...a_fulfillment_inventory_receipts_data.json | 27 -------------- ...ba_fulfillment_inventory_summary_data.json | 28 -------------- ...ba_fulfillment_monthly_inventory_data.json | 29 --------------- .../integration_tests/sample_state.json | 15 -------- .../metadata.yaml | 7 +++- ...BA_FULFILLMENT_CURRENT_INVENTORY_DATA.json | 16 -------- ...ULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json | 19 ---------- ...A_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json | 15 -------- ...BA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json | 16 -------- ...BA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json | 17 --------- .../source_amazon_seller_partner/source.py | 37 +++++++++---------- .../source_amazon_seller_partner/streams.py | 20 ---------- .../amazon-seller-partner-migrations.md | 21 +++++++++++ .../sources/amazon-seller-partner.md | 19 +++++----- 17 files changed, 53 insertions(+), 302 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json create mode 100644 docs/integrations/sources/amazon-seller-partner-migrations.md diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml index 1ea5e6f7eb52..47cd75672448 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/acceptance-test-config.yml @@ -55,8 +55,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_FLAT_FILE_RETURNS_DATA_BY_RETURN_DATE bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA - bypass_reason: "no records" - name: GET_VENDOR_SALES_REPORT bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT @@ -67,8 +65,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_V2_SETTLEMENT_REPORT_DATA_FLAT_FILE bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA - bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT bypass_reason: "no records" - name: GET_AFN_INVENTORY_DATA @@ -83,8 +79,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA - bypass_reason: "no records" - name: GET_MERCHANT_LISTINGS_DATA_BACK_COMPAT bypass_reason: "no records" - name: GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT @@ -97,8 +91,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_FBA_SNS_PERFORMANCE_DATA bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA - bypass_reason: "no records" - name: GET_FBA_ESTIMATED_FBA_FEES_TXT_DATA bypass_reason: "no records" - name: GET_FBA_INVENTORY_PLANNING_DATA @@ -113,8 +105,6 @@ acceptance_tests: bypass_reason: "no records" - name: GET_STRANDED_INVENTORY_UI_DATA bypass_reason: "no records" - - name: GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA - bypass_reason: "no records" - name: GET_XML_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL bypass_reason: "no records" - name: ListFinancialEvents diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json deleted file mode 100644 index 376c90214355..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_current_inventory_data.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA", - "json_schema": { - "title": "FBA Daily Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json deleted file mode 100644 index eb241d9e7e3e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_adjustments_data.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA", - "json_schema": { - "title": "FBA Inventory Adjustments Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "adjusted-date": { "type": ["null", "string"] }, - "transaction-item-id": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "reason": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] }, - "reconciled": { "type": ["null", "string"] }, - "unreconciled": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json deleted file mode 100644 index 92575cfb052e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_receipts_data.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA", - "json_schema": { - "title": "FBA Received Inventory Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "received-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fba-shipment-id": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json deleted file mode 100644 index b38e9b2849ef..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_inventory_summary_data.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA", - "json_schema": { - "title": "FBA Inventory Event Detail Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "transaction-type": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json deleted file mode 100644 index c695f887c910..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/configured_catalog_get_fba_fulfillment_monthly_inventory_data.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "streams": [ - { - "stream": { - "name": "GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA", - "json_schema": { - "title": "FBA Monthly Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "month": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "average-quantity": { "type": ["null", "string"] }, - "end-quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } - }, - "supported_sync_modes": ["full_refresh"] - }, - "sync_mode": "full_refresh", - "destination_sync_mode": "overwrite" - } - ] -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json index bebd0fa00a49..a58a834e17d3 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/sample_state.json @@ -41,24 +41,9 @@ "GET_FBA_ESTIMATED_FBA_FEES_TXT_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, - "GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, "GET_FBA_FULFILLMENT_CUSTOMER_SHIPMENT_PROMOTION_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, - "GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, - "GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, - "GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, - "GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA": { - "createdTime": "2021-07-01T00:00:00Z" - }, "GET_FBA_MYI_UNSUPPRESSED_INVENTORY_DATA": { "createdTime": "2021-07-01T00:00:00Z" }, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml index 83c764ab41f1..2bad20c6b106 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 - dockerImageTag: 1.6.2 + dockerImageTag: 2.0.0 dockerRepository: airbyte/source-amazon-seller-partner documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner githubIssueLabel: source-amazon-seller-partner @@ -20,6 +20,11 @@ data: oss: enabled: true releaseStage: alpha + releases: + breakingChanges: + 2.0.0: + message: "Deprecated FBA reports will be removed permanently from Cloud and Brand Analytics Reports will be removed temporarily. Updates on Brand Analytics Reports can be tracked here: [#32353](https://github.com/airbytehq/airbyte/issues/32353)" + upgradeDeadline: "2023-11-29" supportLevel: community tags: - language:python diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json deleted file mode 100644 index 401cbf484380..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "title": "FBA Daily Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json deleted file mode 100644 index 916f932cc057..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "title": "FBA Inventory Adjustments Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "adjusted-date": { "type": ["null", "string"] }, - "transaction-item-id": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "reason": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] }, - "reconciled": { "type": ["null", "string"] }, - "unreconciled": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json deleted file mode 100644 index 3d23369d51e1..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "title": "FBA Received Inventory Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "received-date": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "fba-shipment-id": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json deleted file mode 100644 index 1ddf4fceca59..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "title": "FBA Inventory Event Detail Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "snapshot-date": { "type": ["null", "string"] }, - "transaction-type": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "quantity": { "type": ["null", "string"] }, - "disposition": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json deleted file mode 100644 index 796985c5210e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/schemas/GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "title": "FBA Monthly Inventory History Report", - "description": "", - "type": "object", - "$schema": "http://json-schema.org/draft-07/schema#", - "properties": { - "month": { "type": ["null", "string"] }, - "fnsku": { "type": ["null", "string"] }, - "sku": { "type": ["null", "string"] }, - "product-name": { "type": ["null", "string"] }, - "average-quantity": { "type": ["null", "string"] }, - "end-quantity": { "type": ["null", "string"] }, - "fulfillment-center-id": { "type": ["null", "string"] }, - "detailed-disposition": { "type": ["null", "string"] }, - "country": { "type": ["null", "string"] } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py index 2ca326c5f47f..5a565c21ad71 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +from os import getenv from typing import Any, List, Mapping, Tuple from airbyte_cdk.logger import AirbyteLogger @@ -20,12 +20,7 @@ FbaAfnInventoryReports, FbaCustomerReturnsReports, FbaEstimatedFbaFeesTxtReport, - FbaFulfillmentCurrentInventoryReport, FbaFulfillmentCustomerShipmentPromotionReport, - FbaFulfillmentInventoryAdjustReport, - FbaFulfillmentInventoryReceiptsReport, - FbaFulfillmentInventorySummaryReport, - FbaFulfillmentMonthlyInventoryReport, FbaInventoryPlaningReport, FbaMyiUnsuppressedInventoryReport, FbaOrdersReports, @@ -127,7 +122,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: :param config: A Mapping of the user input configuration as defined in the connector spec. """ stream_kwargs = self._get_stream_kwargs(config) - return [ + streams = [ FbaCustomerReturnsReports(**stream_kwargs), FbaAfnInventoryReports(**stream_kwargs), FbaAfnInventoryByCountryReports(**stream_kwargs), @@ -144,28 +139,16 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: FulfilledShipmentsReports(**stream_kwargs), MerchantListingsReports(**stream_kwargs), VendorDirectFulfillmentShipping(**stream_kwargs), - VendorInventoryReports(**stream_kwargs), - VendorSalesReports(**stream_kwargs), Orders(**stream_kwargs), OrderItems(**stream_kwargs), OrderReportDataShipping(**stream_kwargs), - SellerAnalyticsSalesAndTrafficReports(**stream_kwargs), SellerFeedbackReports(**stream_kwargs), - BrandAnalyticsMarketBasketReports(**stream_kwargs), - BrandAnalyticsSearchTermsReports(**stream_kwargs), - BrandAnalyticsRepeatPurchaseReports(**stream_kwargs), - BrandAnalyticsAlternatePurchaseReports(**stream_kwargs), - BrandAnalyticsItemComparisonReports(**stream_kwargs), GetXmlBrowseTreeData(**stream_kwargs), ListFinancialEventGroups(**stream_kwargs), ListFinancialEvents(**stream_kwargs), LedgerDetailedViewReports(**stream_kwargs), FbaEstimatedFbaFeesTxtReport(**stream_kwargs), - FbaFulfillmentCurrentInventoryReport(**stream_kwargs), FbaFulfillmentCustomerShipmentPromotionReport(**stream_kwargs), - FbaFulfillmentInventoryAdjustReport(**stream_kwargs), - FbaFulfillmentInventoryReceiptsReport(**stream_kwargs), - FbaFulfillmentInventorySummaryReport(**stream_kwargs), FbaMyiUnsuppressedInventoryReport(**stream_kwargs), MerchantCancelledListingsReport(**stream_kwargs), MerchantListingsReport(**stream_kwargs), @@ -173,7 +156,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: MerchantListingsInactiveData(**stream_kwargs), StrandedInventoryUiReport(**stream_kwargs), XmlAllOrdersDataByOrderDataGeneral(**stream_kwargs), - FbaFulfillmentMonthlyInventoryReport(**stream_kwargs), MerchantListingsFypReport(**stream_kwargs), FbaSnsForecastReport(**stream_kwargs), FbaSnsPerformanceReport(**stream_kwargs), @@ -183,3 +165,18 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: LedgerSummaryViewReport(**stream_kwargs), FbaReimbursementsReports(**stream_kwargs), ] + # TODO: Remove after Brand Analytics will be enabled in CLOUD: + # https://github.com/airbytehq/airbyte/issues/32353 + if getenv("DEPLOYMENT_MODE", "").upper() != "CLOUD": + brand_analytics_reports = [ + BrandAnalyticsMarketBasketReports(**stream_kwargs), + BrandAnalyticsSearchTermsReports(**stream_kwargs), + BrandAnalyticsRepeatPurchaseReports(**stream_kwargs), + BrandAnalyticsAlternatePurchaseReports(**stream_kwargs), + BrandAnalyticsItemComparisonReports(**stream_kwargs), + SellerAnalyticsSalesAndTrafficReports(**stream_kwargs), + VendorSalesReports(**stream_kwargs), + VendorInventoryReports(**stream_kwargs), + ] + streams += brand_analytics_reports + return streams diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index 38304231378d..401332714252 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -474,26 +474,10 @@ class FbaEstimatedFbaFeesTxtReport(ReportsAmazonSPStream): name = "GET_FBA_ESTIMATED_FBA_FEES_TXT_DATA" -class FbaFulfillmentCurrentInventoryReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA" - - class FbaFulfillmentCustomerShipmentPromotionReport(ReportsAmazonSPStream): name = "GET_FBA_FULFILLMENT_CUSTOMER_SHIPMENT_PROMOTION_DATA" -class FbaFulfillmentInventoryAdjustReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA" - - -class FbaFulfillmentInventoryReceiptsReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA" - - -class FbaFulfillmentInventorySummaryReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA" - - class FbaMyiUnsuppressedInventoryReport(ReportsAmazonSPStream): name = "GET_FBA_MYI_UNSUPPRESSED_INVENTORY_DATA" @@ -532,10 +516,6 @@ class MerchantCancelledListingsReport(ReportsAmazonSPStream): name = "GET_MERCHANT_CANCELLED_LISTINGS_DATA" -class FbaFulfillmentMonthlyInventoryReport(ReportsAmazonSPStream): - name = "GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA" - - class MerchantListingsFypReport(ReportsAmazonSPStream): name = "GET_MERCHANTS_LISTINGS_FYP_REPORT" diff --git a/docs/integrations/sources/amazon-seller-partner-migrations.md b/docs/integrations/sources/amazon-seller-partner-migrations.md new file mode 100644 index 000000000000..4f51ba68b60c --- /dev/null +++ b/docs/integrations/sources/amazon-seller-partner-migrations.md @@ -0,0 +1,21 @@ +# Amazon Seller Partner Migration Guide + +## Upgrading to 2.0.0 + +This change removes Brand Analytics and permanently removes deprecated FBA reports (from Airbyte Cloud). +Customers who have those streams must refresh their schema OR disable the following streams: +* GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT +* GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT +* GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT +* GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT +* GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT +* GET_SALES_AND_TRAFFIC_REPORT +* GET_VENDOR_SALES_REPORT +* GET_VENDOR_INVENTORY_REPORT + +Customers, who have the following streams, will have to disable them: +* GET_FBA_FULFILLMENT_INVENTORY_ADJUSTMENTS_DATA +* GET_FBA_FULFILLMENT_CURRENT_INVENTORY_DATA +* GET_FBA_FULFILLMENT_INVENTORY_RECEIPTS_DATA +* GET_FBA_FULFILLMENT_INVENTORY_SUMMARY_DATA +* GET_FBA_FULFILLMENT_MONTHLY_INVENTORY_DATA diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index 3841fa3e416a..c5da26fc4d3d 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -70,21 +70,16 @@ This source is capable of syncing the following tables and their data: - [Orders](https://developer-docs.amazon.com/sp-api/docs/orders-api-v0-reference) \(incremental\) - [Orders Items](https://developer-docs.amazon.com/sp-api/docs/orders-api-v0-reference#getorderitems) \(incremental\) - [Seller Feedback Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(incremental\) -- [Brand Analytics Alternate Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Item Comparison Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Market Basket Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Repeat Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) -- [Brand Analytics Search Terms Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) +- [Brand Analytics Alternate Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Item Comparison Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Market Basket Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Repeat Purchase Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) +- [Brand Analytics Search Terms Report](https://developer-docs.amazon.com/sp-api/docs/report-type-values#brand-analytics-reports) \(only available in OSS\) - [Browse tree report](https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reporttype-values.md#browse-tree-report) - [Financial Event Groups](https://developer-docs.amazon.com/sp-api/docs/finances-api-reference#get-financesv0financialeventgroups) - [Financial Events](https://developer-docs.amazon.com/sp-api/docs/finances-api-reference#get-financesv0financialevents) - [FBA Fee Preview Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Daily Inventory History Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [FBA Promotions Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Inventory Adjustments Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Received Inventory Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Inventory Event Detail Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) -- [FBA Monthly Inventory History Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [FBA Manage Inventory](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [Subscribe and Save Forecast Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [Subscribe and Save Performance Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) @@ -102,6 +97,9 @@ This source is capable of syncing the following tables and their data: - [Inventory Ledger Report - Summary View](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) - [FBA Reimbursements Report](https://sellercentral.amazon.com/help/hub/reference/G200732720) - [Order Data Shipping Report](https://developer-docs.amazon.com/sp-api/docs/order-reports-attributes#get_order_report_data_shipping) +- [Sales and Traffic Business Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(only available in OSS\) +- [Vendor Sales Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(only available in OSS\) +- [Vendor Inventory Report](https://developer-docs.amazon.com/sp-api/docs/reports-api-v2021-06-30-reference) \(only available in OSS\) ## Report options @@ -126,6 +124,7 @@ So, for any value that exceeds the limit, the `period_in_days` will be automatic | Version | Date | Pull Request | Subject | |:---------|:-----------|:--------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `2.0.0` | 2023-11-23 | [\#32355](https://github.com/airbytehq/airbyte/pull/32355) | Remove Brand Analytics from Airbyte Cloud, permanently remove deprecated FBA reports | | `1.6.2` | 2023-11-14 | [\#32508](https://github.com/airbytehq/airbyte/pull/32508) | Do not use AWS signature as it is no longer required by the Amazon API | | `1.6.1` | 2023-11-13 | [\#32457](https://github.com/airbytehq/airbyte/pull/32457) | Fix report decompression | | `1.6.0` | 2023-11-09 | [\#32259](https://github.com/airbytehq/airbyte/pull/32259) | mark "aws_secret_key" and "aws_access_key" as required in specification; update schema for stream `Orders` | From a38eca14ec22dbad6e376851185d5e28815133c3 Mon Sep 17 00:00:00 2001 From: Eugene Kulak Date: Thu, 16 Nov 2023 17:02:31 +0200 Subject: [PATCH 04/57] Enable client-side rate limiting on source-stripe #31512 (#32284) Co-authored-by: Eugene Kulak Co-authored-by: keu --- .../connectors/source-stripe/metadata.yaml | 2 +- .../connectors/source-stripe/setup.py | 2 +- .../source-stripe/source_stripe/source.py | 59 +++++++++++++++++-- .../source-stripe/source_stripe/spec.yaml | 16 +++-- .../source-stripe/unit_tests/test_source.py | 58 +++++++++++++++++- docs/integrations/sources/stripe.md | 3 +- 6 files changed, 128 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-stripe/metadata.yaml b/airbyte-integrations/connectors/source-stripe/metadata.yaml index 0c34e7b7c27d..1aaaa9ee97c4 100644 --- a/airbyte-integrations/connectors/source-stripe/metadata.yaml +++ b/airbyte-integrations/connectors/source-stripe/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: e094cb9a-26de-4645-8761-65c0c425d1de - dockerImageTag: 4.5.3 + dockerImageTag: 4.5.4 dockerRepository: airbyte/source-stripe documentationUrl: https://docs.airbyte.com/integrations/sources/stripe githubIssueLabel: source-stripe diff --git a/airbyte-integrations/connectors/source-stripe/setup.py b/airbyte-integrations/connectors/source-stripe/setup.py index 8ce3d6936bdd..55bb256393b6 100644 --- a/airbyte-integrations/connectors/source-stripe/setup.py +++ b/airbyte-integrations/connectors/source-stripe/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk==0.52.8", "stripe==2.56.0", "pendulum==2.1.2"] +MAIN_REQUIREMENTS = ["airbyte-cdk==0.53.6", "stripe==2.56.0", "pendulum==2.1.2"] TEST_REQUIREMENTS = ["pytest-mock~=3.6.1", "pytest~=6.1", "requests-mock", "requests_mock~=1.8", "freezegun==1.2.2"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py index e72e4dd5398b..5683c875497e 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py @@ -2,7 +2,9 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import logging import os +from datetime import timedelta from typing import Any, List, Mapping, MutableMapping, Optional, Tuple import pendulum @@ -13,6 +15,7 @@ from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.message.repository import InMemoryMessageRepository from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.call_rate import AbstractAPIBudget, HttpAPIBudget, HttpRequestMatcher, MovingWindowCallRatePolicy, Rate from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator @@ -33,9 +36,12 @@ UpdatedCursorIncrementalStripeStream, ) -_MAX_CONCURRENCY = 3 +logger = logging.getLogger("airbyte") + +_MAX_CONCURRENCY = 20 _CACHE_DISABLED = os.environ.get("CACHE_DISABLED") USE_CACHE = not _CACHE_DISABLED +STRIPE_TEST_ACCOUNT_PREFIX = "sk_test_" class SourceStripe(AbstractSource): @@ -114,6 +120,52 @@ def customers(**args): **args, ) + @staticmethod + def is_test_account(config: Mapping[str, Any]) -> bool: + """Check if configuration uses Stripe test account (https://stripe.com/docs/keys#obtain-api-keys) + + :param config: + :return: True if configured to use a test account, False - otherwise + """ + + return str(config["client_secret"]).startswith(STRIPE_TEST_ACCOUNT_PREFIX) + + def get_api_call_budget(self, config: Mapping[str, Any]) -> AbstractAPIBudget: + """Get API call budget which connector is allowed to use. + + :param config: + :return: + """ + + max_call_rate = 25 if self.is_test_account(config) else 100 + if config.get("call_rate_limit"): + call_limit = config["call_rate_limit"] + if call_limit > max_call_rate: + logger.warning( + "call_rate_limit is larger than maximum allowed %s, fallback to default %s.", + max_call_rate, + max_call_rate, + ) + call_limit = max_call_rate + else: + call_limit = max_call_rate + + policies = [ + MovingWindowCallRatePolicy( + rates=[Rate(limit=20, interval=timedelta(seconds=1))], + matchers=[ + HttpRequestMatcher(url="https://api.stripe.com/v1/files"), + HttpRequestMatcher(url="https://api.stripe.com/v1/file_links"), + ], + ), + MovingWindowCallRatePolicy( + rates=[Rate(limit=call_limit, interval=timedelta(seconds=1))], + matchers=[], + ), + ] + + return HttpAPIBudget(policies=policies) + def streams(self, config: Mapping[str, Any]) -> List[Stream]: config = self.validate_and_fill_with_defaults(config) authenticator = TokenAuthenticator(config["client_secret"]) @@ -122,6 +174,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: "account_id": config["account_id"], "start_date": config["start_date"], "slice_range": config["slice_range"], + "api_budget": self.get_api_call_budget(config), } incremental_args = {**args, "lookback_window_days": config["lookback_window_days"]} subscriptions = IncrementalStripeStream( @@ -441,9 +494,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ), ] - # We cap the number of workers to avoid hitting the Stripe rate limit - # The limit can be removed or increased once we have proper rate limiting - concurrency_level = min(config.get("num_workers", 2), _MAX_CONCURRENCY) + concurrency_level = min(config.get("num_workers", 10), _MAX_CONCURRENCY) streams[0].logger.info(f"Using concurrent cdk with concurrency level {concurrency_level}") return [ diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml index f65886c41298..5a31b610cd27 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml @@ -61,10 +61,18 @@ connectionSpecification: type: integer title: Number of concurrent workers minimum: 1 - maximum: 3 - default: 2 + maximum: 20 + default: 10 examples: [1, 2, 3] description: >- - The number of worker thread to use for the sync. The bigger the value is, the faster the sync will be. - Be careful as rate limiting is not implemented. + The number of worker thread to use for the sync. + The performance upper boundary depends on call_rate_limit setting and type of account. order: 5 + call_rate_limit: + type: integer + title: Max number of API calls per second + examples: [25, 100] + description: >- + The number of API calls per second that you allow connector to make. This value can not be bigger than real + API call rate limit (https://stripe.com/docs/rate-limits). If not specified the default maximum is 25 and 100 + calls per second for test and production tokens respectively. diff --git a/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py b/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py index 61b226b5da83..476dbd38a689 100644 --- a/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-stripe/unit_tests/test_source.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +import datetime import logging from contextlib import nullcontext as does_not_raise from unittest.mock import patch @@ -10,7 +10,9 @@ import source_stripe import stripe from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode +from airbyte_cdk.sources.streams.call_rate import CachedLimiterSession, LimiterSession, Rate from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade +from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.utils import AirbyteTracedException from source_stripe import SourceStripe @@ -92,3 +94,57 @@ def test_when_streams_return_full_refresh_as_concurrent(): ).streams(_a_valid_config()) assert len(list(filter(lambda stream: isinstance(stream, StreamFacade), streams))) == 1 + + +@pytest.mark.parametrize( + "input_config, default_call_limit", + ( + ({"account_id": 1, "client_secret": "secret"}, 100), + ({"account_id": 1, "client_secret": "secret", "call_rate_limit": 10}, 10), + ({"account_id": 1, "client_secret": "secret", "call_rate_limit": 110}, 100), + ({"account_id": 1, "client_secret": "sk_test_some_secret"}, 25), + ({"account_id": 1, "client_secret": "sk_test_some_secret", "call_rate_limit": 10}, 10), + ({"account_id": 1, "client_secret": "sk_test_some_secret", "call_rate_limit": 30}, 25), + ), +) +def test_call_budget_creation(mocker, input_config, default_call_limit): + """Test that call_budget was created with specific config i.e., that first policy has specific matchers.""" + + policy_mock = mocker.patch("source_stripe.source.MovingWindowCallRatePolicy") + matcher_mock = mocker.patch("source_stripe.source.HttpRequestMatcher") + source = SourceStripe(catalog=None) + + source.get_api_call_budget(input_config) + + policy_mock.assert_has_calls( + calls=[ + mocker.call(matchers=[mocker.ANY, mocker.ANY], rates=[Rate(limit=20, interval=datetime.timedelta(seconds=1))]), + mocker.call(matchers=[], rates=[Rate(limit=default_call_limit, interval=datetime.timedelta(seconds=1))]), + ], + ) + + matcher_mock.assert_has_calls( + calls=[ + mocker.call(url="https://api.stripe.com/v1/files"), + mocker.call(url="https://api.stripe.com/v1/file_links"), + ] + ) + + +def test_call_budget_passed_to_every_stream(mocker): + """Test that each stream has call_budget passed and creates a proper session""" + + prod_config = {"account_id": 1, "client_secret": "secret"} + source = SourceStripe(catalog=None) + get_api_call_budget_mock = mocker.patch.object(source, "get_api_call_budget") + + streams = source.streams(prod_config) + + assert streams + get_api_call_budget_mock.assert_called_once() + + for stream in streams: + assert isinstance(stream, HttpStream) + session = stream.request_session() + assert isinstance(session, (CachedLimiterSession, LimiterSession)) + assert session._api_budget == get_api_call_budget_mock.return_value diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index 06bea065cf1f..bde9f9d6d0e6 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -216,6 +216,7 @@ Each record is marked with `is_deleted` flag when the appropriate event happens | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| +| 4.5.4 | 2023-11-16 | [32284](https://github.com/airbytehq/airbyte/pull/32284/) | Enable client-side rate limiting | | 4.5.3 | 2023-11-14 | [32473](https://github.com/airbytehq/airbyte/pull/32473/) | Have all full_refresh stream syncs be concurrent | | 4.5.2 | 2023-11-03 | [32146](https://github.com/airbytehq/airbyte/pull/32146/) | Fix multiple BankAccount issues | | 4.5.1 | 2023-11-01 | [32056](https://github.com/airbytehq/airbyte/pull/32056/) | Use CDK version 0.52.8 | @@ -299,4 +300,4 @@ Each record is marked with `is_deleted` flag when the appropriate event happens | 0.1.9 | 2021-05-13 | [3367](https://github.com/airbytehq/airbyte/pull/3367) | Add acceptance tests for connected accounts | | 0.1.8 | 2021-05-11 | [3566](https://github.com/airbytehq/airbyte/pull/3368) | Bump CDK connectors | - \ No newline at end of file + From 509afc9858a9c6444ea9f4e21146de218d4cc821 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 16 Nov 2023 16:23:16 +0100 Subject: [PATCH 05/57] File CDK: Improve unstructured parser (#32554) Co-authored-by: flash1293 --- .../sources/file_based/file_types/unstructured_parser.py | 6 +++--- airbyte-cdk/python/setup.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py b/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py index 54d0ab9d7c73..b91be567f9b3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py @@ -115,9 +115,9 @@ def _read_file(self, file_handle: IOBase, remote_file: RemoteFile, format: Unstr if filetype == FileType.PDF: # for PDF, read the file into a BytesIO object because some code paths in pdf parsing are doing an instance check on the file object and don't work with file-like objects file_handle.seek(0) - file = BytesIO(file_handle.read()) - file_handle.seek(0) - elements = unstructured_partition_pdf(file=file) + with BytesIO(file_handle.read()) as file: + file_handle.seek(0) + elements = unstructured_partition_pdf(file=file) elif filetype == FileType.DOCX: elements = unstructured_partition_docx(file=file) elif filetype == FileType.PPTX: diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 9949399cfcca..f31184728351 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -23,8 +23,8 @@ tiktoken_dependency = "tiktoken==0.4.0" unstructured_dependencies = [ - "unstructured==0.10.19", - "unstructured[docx,pptx]==0.10.19", + "unstructured==0.10.27", # can't be bumped higher due to transitive dependencies we can't provide + "unstructured[docx,pptx]==0.10.27", "pdf2image==1.16.3", "pdfminer.six==20221105", "unstructured.pytesseract>=0.3.12", From aa111d2bead8a9bca8c12ae85fb26292b688c47b Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 16 Nov 2023 16:32:15 +0100 Subject: [PATCH 06/57] Vector DB CDK: Delete cdc records (#32496) --- .../vector_db_based/document_processor.py | 23 +- .../document_processor_test.py | 232 ++++++++++++++++-- 2 files changed, 223 insertions(+), 32 deletions(-) diff --git a/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py b/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py index 7d7d174baee9..3ed3e3511dd1 100644 --- a/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py +++ b/airbyte-cdk/python/airbyte_cdk/destinations/vector_db_based/document_processor.py @@ -19,6 +19,8 @@ METADATA_STREAM_FIELD = "_ab_stream" METADATA_RECORD_ID_FIELD = "_ab_record_id" +CDC_DELETED_FIELD = "_ab_cdc_deleted_at" + @dataclass class Chunk: @@ -103,6 +105,8 @@ def process(self, record: AirbyteRecordMessage) -> Tuple[List[Chunk], Optional[s :param records: List of AirbyteRecordMessages :return: Tuple of (List of document chunks, record id to delete if a stream is in dedup mode to avoid stale documents in the vector store) """ + if CDC_DELETED_FIELD in record.data and record.data[CDC_DELETED_FIELD]: + return [], self._extract_primary_key(record) doc = self._generate_document(record) if doc is None: text_fields = ", ".join(self.text_fields) if self.text_fields else "all fields" @@ -139,22 +143,27 @@ def _extract_relevant_fields(self, record: AirbyteRecordMessage, fields: Optiona def _extract_metadata(self, record: AirbyteRecordMessage) -> Dict[str, Any]: metadata = self._extract_relevant_fields(record, self.metadata_fields) + metadata[METADATA_STREAM_FIELD] = create_stream_identifier(record) + primary_key = self._extract_primary_key(record) + if primary_key: + metadata[METADATA_RECORD_ID_FIELD] = primary_key + return metadata + + def _extract_primary_key(self, record: AirbyteRecordMessage) -> Optional[str]: stream_identifier = create_stream_identifier(record) current_stream: ConfiguredAirbyteStream = self.streams[stream_identifier] - metadata[METADATA_STREAM_FIELD] = stream_identifier # if the sync mode is deduping, use the primary key to upsert existing records instead of appending new ones - if current_stream.primary_key and current_stream.destination_sync_mode == DestinationSyncMode.append_dedup: - metadata[METADATA_RECORD_ID_FIELD] = f"{stream_identifier}_{self._extract_primary_key(record, current_stream)}" - return metadata + if not current_stream.primary_key or current_stream.destination_sync_mode != DestinationSyncMode.append_dedup: + return None - def _extract_primary_key(self, record: AirbyteRecordMessage, stream: ConfiguredAirbyteStream) -> str: primary_key = [] - for key in stream.primary_key: + for key in current_stream.primary_key: try: primary_key.append(str(dpath.util.get(record.data, key))) except KeyError: primary_key.append("__not_found__") - return "_".join(primary_key) + stringified_primary_key = "_".join(primary_key) + return f"{stream_identifier}_{stringified_primary_key}" def _split_document(self, doc: Document) -> List[Document]: chunks: List[Document] = self.splitter.split_documents([doc]) diff --git a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py index 59f5f8c011bb..2660ee791512 100644 --- a/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py +++ b/airbyte-cdk/python/unit_tests/destinations/vector_db_based/document_processor_test.py @@ -23,13 +23,22 @@ def initialize_processor(config=ProcessingConfigModel(chunk_size=48, chunk_overl catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( - stream=AirbyteStream(name="stream1", json_schema={}, namespace="namespace1", supported_sync_modes=[SyncMode.full_refresh]), + stream=AirbyteStream( + name="stream1", + json_schema={}, + namespace="namespace1", + supported_sync_modes=[SyncMode.full_refresh], + ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.overwrite, primary_key=[["id"]], ), ConfiguredAirbyteStream( - stream=AirbyteStream(name="stream2", json_schema={}, supported_sync_modes=[SyncMode.full_refresh]), + stream=AirbyteStream( + name="stream2", + json_schema={}, + supported_sync_modes=[SyncMode.full_refresh], + ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.overwrite, ), @@ -53,8 +62,14 @@ def initialize_processor(config=ProcessingConfigModel(chunk_size=48, chunk_overl ), (["id"], {"_ab_stream": "namespace1_stream1", "id": 1}), (["id", "non_existing"], {"_ab_stream": "namespace1_stream1", "id": 1}), - (["id", "complex.test"], {"_ab_stream": "namespace1_stream1", "id": 1, "complex.test": "abc"}), - (["id", "arr.*.test"], {"_ab_stream": "namespace1_stream1", "id": 1, "arr.*.test": ["abc", "def"]}), + ( + ["id", "complex.test"], + {"_ab_stream": "namespace1_stream1", "id": 1, "complex.test": "abc"}, + ), + ( + ["id", "arr.*.test"], + {"_ab_stream": "namespace1_stream1", "id": 1, "arr.*.test": ["abc", "def"]}, + ), ], ) def test_process_single_chunk_with_metadata(metadata_fields, expected_metadata): @@ -82,7 +97,7 @@ def test_process_single_chunk_with_metadata(metadata_fields, expected_metadata): assert id_to_delete is None -def test_process_single_chunk_limit4ed_metadata(): +def test_process_single_chunk_limited_metadata(): processor = initialize_processor() record = AirbyteRecordMessage( @@ -112,7 +127,11 @@ def test_process_single_chunk_without_namespace(): catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( - stream=AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.full_refresh]), + stream=AirbyteStream( + name="stream1", + json_schema={}, + supported_sync_modes=[SyncMode.full_refresh], + ), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.overwrite, ), @@ -155,7 +174,12 @@ def test_complex_text_fields(): emitted_at=1234, ) - processor.text_fields = ["nested.texts.*.text", "text", "other_nested.non_text", "non.*.existing"] + processor.text_fields = [ + "nested.texts.*.text", + "text", + "other_nested.non_text", + "non.*.existing", + ] processor.metadata_fields = ["non_text", "non_text_2", "id"] chunks, _ = processor.process(record) @@ -169,7 +193,12 @@ def test_complex_text_fields(): other_nested.non_text: \na: xyz b: abc""" ) - assert chunks[0].metadata == {"id": 1, "non_text": "a", "non_text_2": 1, "_ab_stream": "namespace1_stream1"} + assert chunks[0].metadata == { + "id": 1, + "non_text": "a", + "non_text_2": 1, + "_ab_stream": "namespace1_stream1", + } def test_no_text_fields(): @@ -228,7 +257,11 @@ def test_process_multiple_chunks_with_relevant_fields(): 10, 0, None, - ["text: By default, splits are done", "on multi newlines,", "then single newlines, then spaces"], + [ + "text: By default, splits are done", + "on multi newlines,", + "then single newlines, then spaces", + ], ), ( "Overlap splitting", @@ -346,7 +379,11 @@ def test_process_multiple_chunks_with_relevant_fields(): def test_text_splitters(label, text, chunk_size, chunk_overlap, splitter_config, expected_chunks): processor = initialize_processor( ProcessingConfigModel( - chunk_size=chunk_size, chunk_overlap=chunk_overlap, text_fields=["text"], metadata_fields=None, text_splitter=splitter_config + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + text_fields=["text"], + metadata_fields=None, + text_splitter=splitter_config, ) ) @@ -378,16 +415,42 @@ def test_text_splitters(label, text, chunk_size, chunk_overlap, splitter_config, @pytest.mark.parametrize( "label, split_config, has_error_message", [ - ("Invalid separator", SeparatorSplitterConfigModel(mode="separator", separators=['"xxx']), True), - ("Missing quotes", SeparatorSplitterConfigModel(mode="separator", separators=["xxx"]), True), - ("Non-string separator", SeparatorSplitterConfigModel(mode="separator", separators=["123"]), True), - ("Object separator", SeparatorSplitterConfigModel(mode="separator", separators=["{}"]), True), - ("Proper separator", SeparatorSplitterConfigModel(mode="separator", separators=['"xxx"', '"\\n\\n"']), False), + ( + "Invalid separator", + SeparatorSplitterConfigModel(mode="separator", separators=['"xxx']), + True, + ), + ( + "Missing quotes", + SeparatorSplitterConfigModel(mode="separator", separators=["xxx"]), + True, + ), + ( + "Non-string separator", + SeparatorSplitterConfigModel(mode="separator", separators=["123"]), + True, + ), + ( + "Object separator", + SeparatorSplitterConfigModel(mode="separator", separators=["{}"]), + True, + ), + ( + "Proper separator", + SeparatorSplitterConfigModel(mode="separator", separators=['"xxx"', '"\\n\\n"']), + False, + ), ], ) def test_text_splitter_check(label, split_config, has_error_message): error = DocumentProcessor.check_config( - ProcessingConfigModel(chunk_size=48, chunk_overlap=0, text_fields=None, metadata_fields=None, text_splitter=split_config) + ProcessingConfigModel( + chunk_size=48, + chunk_overlap=0, + text_fields=None, + metadata_fields=None, + text_splitter=split_config, + ) ) if has_error_message: assert error is not None @@ -400,12 +463,22 @@ def test_text_splitter_check(label, split_config, has_error_message): [ (None, {"abc": "def", "xyz": 123}, {"abc": "def", "xyz": 123}), ([], {"abc": "def", "xyz": 123}, {"abc": "def", "xyz": 123}), - ([FieldNameMappingConfigModel(from_field="abc", to_field="AAA")], {"abc": "def", "xyz": 123}, {"AAA": "def", "xyz": 123}), - ([FieldNameMappingConfigModel(from_field="non_existing", to_field="AAA")], {"abc": "def", "xyz": 123}, {"abc": "def", "xyz": 123}), + ( + [FieldNameMappingConfigModel(from_field="abc", to_field="AAA")], + {"abc": "def", "xyz": 123}, + {"AAA": "def", "xyz": 123}, + ), + ( + [FieldNameMappingConfigModel(from_field="non_existing", to_field="AAA")], + {"abc": "def", "xyz": 123}, + {"abc": "def", "xyz": 123}, + ), ], ) def test_rename_metadata_fields( - mappings: Optional[List[FieldNameMappingConfigModel]], fields: Mapping[str, Any], expected_chunk_metadata: Mapping[str, Any] + mappings: Optional[List[FieldNameMappingConfigModel]], + fields: Mapping[str, Any], + expected_chunk_metadata: Mapping[str, Any], ): processor = initialize_processor() @@ -422,21 +495,43 @@ def test_rename_metadata_fields( chunks, id_to_delete = processor.process(record) assert len(chunks) == 1 - assert chunks[0].metadata == {**expected_chunk_metadata, "_ab_stream": "namespace1_stream1", "text": "abc"} + assert chunks[0].metadata == { + **expected_chunk_metadata, + "_ab_stream": "namespace1_stream1", + "text": "abc", + } @pytest.mark.parametrize( "primary_key_value, stringified_primary_key, primary_key", [ ({"id": 99}, "namespace1_stream1_99", [["id"]]), - ({"id": 99, "name": "John Doe"}, "namespace1_stream1_99_John Doe", [["id"], ["name"]]), - ({"id": 99, "name": "John Doe", "age": 25}, "namespace1_stream1_99_John Doe_25", [["id"], ["name"], ["age"]]), - ({"nested": {"id": "abc"}, "name": "John Doe"}, "namespace1_stream1_abc_John Doe", [["nested", "id"], ["name"]]), - ({"nested": {"id": "abc"}}, "namespace1_stream1_abc___not_found__", [["nested", "id"], ["name"]]), + ( + {"id": 99, "name": "John Doe"}, + "namespace1_stream1_99_John Doe", + [["id"], ["name"]], + ), + ( + {"id": 99, "name": "John Doe", "age": 25}, + "namespace1_stream1_99_John Doe_25", + [["id"], ["name"], ["age"]], + ), + ( + {"nested": {"id": "abc"}, "name": "John Doe"}, + "namespace1_stream1_abc_John Doe", + [["nested", "id"], ["name"]], + ), + ( + {"nested": {"id": "abc"}}, + "namespace1_stream1_abc___not_found__", + [["nested", "id"], ["name"]], + ), ], ) def test_process_multiple_chunks_with_dedupe_mode( - primary_key_value: Mapping[str, Any], stringified_primary_key: str, primary_key: List[List[str]] + primary_key_value: Mapping[str, Any], + stringified_primary_key: str, + primary_key: List[List[str]], ): processor = initialize_processor() @@ -462,3 +557,90 @@ def test_process_multiple_chunks_with_dedupe_mode( for chunk in chunks: assert chunk.metadata["_ab_record_id"] == stringified_primary_key assert id_to_delete == stringified_primary_key + + +@pytest.mark.parametrize( + "record, sync_mode, has_chunks, raises, expected_id_to_delete", + [ + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"text": "This is the text", "id": "1"}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + True, + False, + "namespace1_stream1_1", + id="update", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"text": "This is the text", "id": "1"}, + emitted_at=1234, + ), + DestinationSyncMode.append, + True, + False, + None, + id="append", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"text": "This is the text", "id": "1", "_ab_cdc_deleted_at": 1234}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + False, + False, + "namespace1_stream1_1", + id="cdc_delete", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"id": "1", "_ab_cdc_deleted_at": 1234}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + False, + False, + "namespace1_stream1_1", + id="cdc_delete_without_text", + ), + pytest.param( + AirbyteRecordMessage( + stream="stream1", + namespace="namespace1", + data={"id": "1"}, + emitted_at=1234, + ), + DestinationSyncMode.append_dedup, + False, + True, + "namespace1_stream1_1", + id="update_without_text", + ), + ], +) +def test_process_cdc_records(record, sync_mode, has_chunks, raises, expected_id_to_delete): + processor = initialize_processor() + + processor.text_fields = ["text"] + + processor.streams["namespace1_stream1"].destination_sync_mode = sync_mode + + if raises: + with pytest.raises(AirbyteTracedException): + processor.process(record) + else: + chunks, id_to_delete = processor.process(record) + if has_chunks: + assert len(chunks) > 0 + assert id_to_delete == expected_id_to_delete From 261a6fd976e92fe01be130d4327bbbae55008047 Mon Sep 17 00:00:00 2001 From: flash1293 Date: Thu, 16 Nov 2023 15:43:52 +0000 Subject: [PATCH 07/57] =?UTF-8?q?=F0=9F=A4=96=20Bump=20patch=20version=20o?= =?UTF-8?q?f=20Python=20CDK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- airbyte-cdk/python/.bumpversion.cfg | 2 +- airbyte-cdk/python/CHANGELOG.md | 3 +++ airbyte-cdk/python/Dockerfile | 4 ++-- airbyte-cdk/python/setup.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/.bumpversion.cfg b/airbyte-cdk/python/.bumpversion.cfg index 08bf1ea465db..d93cc7ac0f15 100644 --- a/airbyte-cdk/python/.bumpversion.cfg +++ b/airbyte-cdk/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.53.7 +current_version = 0.53.8 commit = False [bumpversion:file:setup.py] diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 8778ed30577b..fc944faa64c8 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.53.8 +Vector DB CDK: Remove CDC records, File CDK: Update unstructured parser + ## 0.53.7 low-code: fix debug logging when using --debug flag diff --git a/airbyte-cdk/python/Dockerfile b/airbyte-cdk/python/Dockerfile index fdd41a9820ee..21618bcb99ff 100644 --- a/airbyte-cdk/python/Dockerfile +++ b/airbyte-cdk/python/Dockerfile @@ -10,7 +10,7 @@ RUN apk --no-cache upgrade \ && apk --no-cache add tzdata build-base # install airbyte-cdk -RUN pip install --prefix=/install airbyte-cdk==0.53.7 +RUN pip install --prefix=/install airbyte-cdk==0.53.8 # build a clean environment FROM base @@ -32,5 +32,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] # needs to be the same as CDK -LABEL io.airbyte.version=0.53.7 +LABEL io.airbyte.version=0.53.8 LABEL io.airbyte.name=airbyte/source-declarative-manifest diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index f31184728351..8c2f8921ae71 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -36,7 +36,7 @@ name="airbyte-cdk", # The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be # updated if our semver format changes such as using release candidate versions. - version="0.53.7", + version="0.53.8", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", From 7dc53c27e6f3a1169c946a8eff2940e2d0604a40 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 16 Nov 2023 07:45:48 -0800 Subject: [PATCH 08/57] enable parallelism on bigquery tests (#32580) --- .../connectors/destination-bigquery/gradle.properties | 1 + 1 file changed, 1 insertion(+) create mode 100644 airbyte-integrations/connectors/destination-bigquery/gradle.properties diff --git a/airbyte-integrations/connectors/destination-bigquery/gradle.properties b/airbyte-integrations/connectors/destination-bigquery/gradle.properties new file mode 100644 index 000000000000..4dbe8b8729df --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 From f157e14346cfc3d8f8aa27703b4370d3f924647c Mon Sep 17 00:00:00 2001 From: Maxime Carbonneau-Leclerc Date: Thu, 16 Nov 2023 11:45:52 -0500 Subject: [PATCH 09/57] =?UTF-8?q?=F0=9F=90=9B=20Source=20Bing=20Ads:=20Dis?= =?UTF-8?q?able=20upgrade=20to=202.0.0=20(#32604)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- airbyte-integrations/connectors/source-bing-ads/metadata.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml index 63f2a336f97f..599a7e5c7206 100644 --- a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml +++ b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml @@ -25,9 +25,11 @@ data: name: Bing Ads registries: cloud: + dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU enabled: true oss: enabled: true + dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU releaseStage: generally_available releases: breakingChanges: From 136535d546ded551035b5251b7d3169fb21bb127 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 16 Nov 2023 17:48:23 +0100 Subject: [PATCH 10/57] Google Drive: Improve folder URL input (#32547) Co-authored-by: flash1293 --- .../integration_tests/spec.json | 2 ++ .../source-google-drive/metadata.yaml | 2 +- .../connectors/source-google-drive/setup.py | 2 +- .../source_google_drive/spec.py | 2 ++ .../source_google_drive/stream_reader.py | 18 ++---------- .../source_google_drive/utils.py | 21 ++++++++++++++ .../unit_tests/test_utils.py | 28 +++++++++++++++++++ docs/integrations/sources/google-drive.md | 1 + 8 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py create mode 100644 airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py diff --git a/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json b/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json index 0b148260c015..709efd036a5b 100644 --- a/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-google-drive/integration_tests/spec.json @@ -304,6 +304,8 @@ "https://drive.google.com/drive/folders/1Xaz0vXXXX2enKnNYU5qSt9NS70gvMyYn" ], "order": 0, + "pattern": "^https://drive.google.com/.+", + "pattern_descriptor": "https://drive.google.com/drive/folders/MY-FOLDER-ID", "type": "string" }, "credentials": { diff --git a/airbyte-integrations/connectors/source-google-drive/metadata.yaml b/airbyte-integrations/connectors/source-google-drive/metadata.yaml index f7eb6998beff..e561f16c6a0d 100644 --- a/airbyte-integrations/connectors/source-google-drive/metadata.yaml +++ b/airbyte-integrations/connectors/source-google-drive/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: file connectorType: source definitionId: 9f8dda77-1048-4368-815b-269bf54ee9b8 - dockerImageTag: 0.0.2 + dockerImageTag: 0.0.3 dockerRepository: airbyte/source-google-drive githubIssueLabel: source-google-drive icon: google-drive.svg diff --git a/airbyte-integrations/connectors/source-google-drive/setup.py b/airbyte-integrations/connectors/source-google-drive/setup.py index af0e32a9949e..ed7492559cd9 100644 --- a/airbyte-integrations/connectors/source-google-drive/setup.py +++ b/airbyte-integrations/connectors/source-google-drive/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk[file-based]>=0.53.5", + "airbyte-cdk[file-based]>=0.53.8", "google-api-python-client==2.104.0", "google-auth-httplib2==0.1.1", "google-auth-oauthlib==1.1.0", diff --git a/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py b/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py index 7c2a60b27b82..00a360e0640b 100644 --- a/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py +++ b/airbyte-integrations/connectors/source-google-drive/source_google_drive/spec.py @@ -55,6 +55,8 @@ class Config: description="URL for the folder you want to sync. Using individual streams and glob patterns, it's possible to only sync a subset of all files located in the folder.", examples=["https://drive.google.com/drive/folders/1Xaz0vXXXX2enKnNYU5qSt9NS70gvMyYn"], order=0, + pattern="^https://drive.google.com/.+", + pattern_descriptor="https://drive.google.com/drive/folders/MY-FOLDER-ID", ) credentials: Union[OAuthCredentials, ServiceAccountCredentials] = Field( diff --git a/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py b/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py index 2b6fa5f9cccc..dd786360f7a3 100644 --- a/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py +++ b/airbyte-integrations/connectors/source-google-drive/source_google_drive/stream_reader.py @@ -16,6 +16,7 @@ from google.oauth2 import credentials, service_account from googleapiclient.discovery import build from googleapiclient.http import MediaIoBaseDownload +from source_google_drive.utils import get_folder_id from .spec import SourceGoogleDriveSpec @@ -86,7 +87,7 @@ def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: lo Get all files matching the specified glob patterns. """ service = self.google_drive_service - root_folder_id = self._get_folder_id(self.config.folder_url) + root_folder_id = get_folder_id(self.config.folder_url) # ignore prefix argument as it's legacy only and this is a new connector prefixes = self.get_prefixes_from_globs(globs) @@ -141,21 +142,6 @@ def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: lo if request is None: break - def _get_folder_id(self, url): - # Regular expression pattern to check the URL structure and extract the ID - pattern = r"^https://drive\.google\.com/drive/folders/([a-zA-Z0-9_-]+)$" - - # Find the pattern in the URL - match = re.search(pattern, url) - - if match: - # The matched group is the ID - drive_id = match.group(1) - return drive_id - else: - # If no match is found - raise ValueError(f"Could not extract folder ID from {url}") - def _is_exportable_document(self, mime_type: str): """ Returns true if the given file is a Google App document that can be exported. diff --git a/airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py b/airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py new file mode 100644 index 000000000000..c0994802358b --- /dev/null +++ b/airbyte-integrations/connectors/source-google-drive/source_google_drive/utils.py @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from urllib.parse import urlparse + + +def get_folder_id(url_string: str) -> str: + """ + Extract the folder ID from a Google Drive folder URL. + + Takes the last path segment of the URL, which is the folder ID (ignoring trailing slashes and query parameters). + """ + try: + parsed_url = urlparse(url_string) + if parsed_url.scheme != "https" or parsed_url.netloc != "drive.google.com": + raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/") + path_segments = list(filter(None, parsed_url.path.split("/"))) + if path_segments[-2] != "folders" or len(path_segments) < 3: + raise ValueError("Folder URL has to be of the form https://drive.google.com/drive/folders/") + return path_segments[-1] + except Exception: + raise ValueError("Folder URL is invalid") diff --git a/airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py b/airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py new file mode 100644 index 000000000000..8dcb7e52e223 --- /dev/null +++ b/airbyte-integrations/connectors/source-google-drive/unit_tests/test_utils.py @@ -0,0 +1,28 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + + +import pytest +from source_google_drive.utils import get_folder_id + + +@pytest.mark.parametrize( + "input, output, raises", + [ + ("https://drive.google.com/drive/folders/1q2w3e4r5t6y7u8i9o0p", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/folders/1q2w3e4r5t6y7u8i9o0p/", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/folders/1q2w3e4r5t6y7u8i9o0p?usp=link_sharing", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p/", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p?usp=link_sharing", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p#abc", "1q2w3e4r5t6y7u8i9o0p", False), + ("https://docs.google.com/document/d/fsgfjdsh", None, True), + ("https://drive.google.com/drive/my-drive", None, True), + ("http://drive.google.com/drive/u/0/folders/1q2w3e4r5t6y7u8i9o0p/", None, True), + ("https://drive.google.com/", None, True), + ] +) +def test_get_folder_id(input, output, raises): + if raises: + with pytest.raises(ValueError): + get_folder_id(input) + else: + assert get_folder_id(input) == output \ No newline at end of file diff --git a/docs/integrations/sources/google-drive.md b/docs/integrations/sources/google-drive.md index cff121b4e696..df8aa03e2bba 100644 --- a/docs/integrations/sources/google-drive.md +++ b/docs/integrations/sources/google-drive.md @@ -247,6 +247,7 @@ Before parsing each document, the connector exports Google Document files to Doc | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|-----------------------------------------------------------------------------------| +| 0.0.3 | 2023-11-16 | [31458](https://github.com/airbytehq/airbyte/pull/31458) | Improve folder id input and update document file type parser | | 0.0.2 | 2023-11-02 | [31458](https://github.com/airbytehq/airbyte/pull/31458) | Allow syncs on shared drives | | 0.0.1 | 2023-11-02 | [31458](https://github.com/airbytehq/airbyte/pull/31458) | Initial Google Drive source | From f5cbe29622c5fe847353a48a276d25ec99095846 Mon Sep 17 00:00:00 2001 From: Denys Davydov Date: Thu, 16 Nov 2023 19:15:23 +0200 Subject: [PATCH 11/57] :rotating_light: :rotating_light: Source Stripe: fix multiple issues regarding Refunds, CheckoutSessions and CheckoutSessionsLineItems + fix stream schemas (#32286) Co-authored-by: davydov-d --- .../source-stripe/acceptance-test-config.yml | 2 + .../integration_tests/abnormal_state.json | 4 +- .../integration_tests/configured_catalog.json | 8 +- .../integration_tests/expected_records.jsonl | 16 +- .../connectors/source-stripe/metadata.yaml | 7 +- .../schemas/checkout_sessions_line_items.json | 2 + .../source_stripe/schemas/invoices.json | 5 +- .../schemas/subscription_schedule.json | 6 +- .../source_stripe/schemas/subscriptions.json | 5 +- .../source-stripe/source_stripe/source.py | 72 ++- .../source-stripe/source_stripe/spec.yaml | 4 +- .../source-stripe/source_stripe/streams.py | 238 +++---- .../source-stripe/unit_tests/test_streams.py | 582 +++++++++++++++--- .../integrations/sources/stripe-migrations.md | 15 + docs/integrations/sources/stripe.md | 197 +++--- 15 files changed, 788 insertions(+), 375 deletions(-) diff --git a/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml b/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml index 0dda354b4266..dfdddbb6ca31 100644 --- a/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-stripe/acceptance-test-config.yml @@ -13,6 +13,8 @@ acceptance_tests: discovery: tests: - config_path: "secrets/config.json" + backward_compatibility_tests_config: + disable_for_version: 4.4.2 basic_read: tests: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json index e34da831b7be..97d865ec3c49 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/abnormal_state.json @@ -163,14 +163,14 @@ { "type": "STREAM", "stream": { - "stream_state": { "expires_at": 10000000000 }, + "stream_state": { "updated": 10000000000 }, "stream_descriptor": { "name": "checkout_sessions" } } }, { "type": "STREAM", "stream": { - "stream_state": { "checkout_session_expires_at": 10000000000 }, + "stream_state": { "checkout_session_updated": 10000000000 }, "stream_descriptor": { "name": "checkout_sessions_line_items" } } }, diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json index fc3ccb073b53..281642987467 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/configured_catalog.json @@ -143,11 +143,11 @@ "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["checkout_session_expires_at"], + "default_cursor_field": ["checkout_session_updated"], "source_defined_primary_key": [["id"]] }, "primary_key": [["id"]], - "cursor_field": ["checkout_session_expires_at"], + "cursor_field": ["checkout_session_updated"], "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, @@ -459,11 +459,11 @@ "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": ["updated"], + "default_cursor_field": ["created"], "source_defined_primary_key": [["id"]] }, "primary_key": [["id"]], - "cursor_field": ["updated"], + "cursor_field": ["created"], "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl index a3732127059f..a84e541f0ecf 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl @@ -1,12 +1,12 @@ -{"stream": "checkout_sessions_line_items", "data": {"id": "li_1O2XZ1EcXtiJtvvh26q22omU", "object": "item", "amount_discount": 0, "amount_subtotal": 3400, "amount_tax": 0, "amount_total": 3400, "currency": "usd", "description": "Test Product 1", "discounts": [], "price": {"id": "price_1MX364EcXtiJtvvh6jKcimNL", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1675345504, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NHcKselSHfKdfc", "recurring": null, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "one_time", "unit_amount": 1700, "unit_amount_decimal": "1700"}, "quantity": 2, "taxes": [], "checkout_session_id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "checkout_session_expires_at": 1697713523}, "emitted_at": 1697627220862} +{"stream": "checkout_sessions_line_items", "data": {"checkout_session_id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "checkout_session_expires_at": 1697713523, "checkout_session_created": 1697627124, "checkout_session_updated": 1697627124, "id": "li_1O2XZ1EcXtiJtvvh26q22omU", "object": "item", "amount_discount": 0, "amount_subtotal": 3400, "amount_tax": 0, "amount_total": 3400, "currency": "usd", "description": "Test Product 1", "discounts": [], "price": {"id": "price_1MX364EcXtiJtvvh6jKcimNL", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1675345504, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NHcKselSHfKdfc", "recurring": null, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "one_time", "unit_amount": 1700, "unit_amount_decimal": "1700"}, "quantity": 2, "taxes": []}, "emitted_at": 1699376426293} {"stream": "customer_balance_transactions", "data": {"id": "cbtxn_1MX2zPEcXtiJtvvhr4L2D3Q1", "object": "customer_balance_transaction", "amount": -50000.0, "created": 1675345091, "credit_note": null, "currency": "usd", "customer": "cus_NGoTFiJFVbSsvZ", "description": null, "ending_balance": 0.0, "invoice": "in_1MX2yFEcXtiJtvvhMXhUCgKx", "livemode": false, "metadata": {}, "type": "applied_to_invoice"}, "emitted_at": 1697627222916} {"stream": "customer_balance_transactions", "data": {"id": "cbtxn_1MWIPLEcXtiJtvvhLnQYjVCj", "object": "customer_balance_transaction", "amount": 50000.0, "created": 1675166031, "credit_note": null, "currency": "usd", "customer": "cus_NGoTFiJFVbSsvZ", "description": "Test credit balance", "ending_balance": 50000.0, "invoice": null, "livemode": false, "metadata": {}, "type": "adjustment"}, "emitted_at": 1697627222918} {"stream": "setup_attempts", "data": {"id": "setatt_1KnfIjEcXtiJtvvhqDfSlpM4", "object": "setup_attempt", "application": null, "created": 1649752937, "customer": null, "flow_directions": null, "livemode": false, "on_behalf_of": null, "payment_method": "pm_1KnfIj2eZvKYlo2CAlv2Vhqc", "payment_method_details": {"acss_debit": {}, "type": "acss_debit"}, "setup_error": null, "setup_intent": "seti_1KnfIjEcXtiJtvvhPw5znVKY", "status": "succeeded", "usage": "off_session"}, "emitted_at": 1697627241471} {"stream": "setup_attempts", "data": {"id": "setatt_1KnfIdEcXtiJtvvhpDrYVlRP", "object": "setup_attempt", "application": null, "created": 1649752931, "customer": null, "flow_directions": null, "livemode": false, "on_behalf_of": null, "payment_method": "pm_1KnfIc2eZvKYlo2Civ7snSPy", "payment_method_details": {"acss_debit": {}, "type": "acss_debit"}, "setup_error": null, "setup_intent": "seti_1KnfIcEcXtiJtvvh61qlCaDf", "status": "succeeded", "usage": "off_session"}, "emitted_at": 1697627242509} {"stream": "setup_attempts", "data": {"id": "setatt_1KnfIVEcXtiJtvvhqouWGuhD", "object": "setup_attempt", "application": null, "created": 1649752923, "customer": null, "flow_directions": null, "livemode": false, "on_behalf_of": null, "payment_method": "pm_1KnfIV2eZvKYlo2CaOLGBF00", "payment_method_details": {"acss_debit": {}, "type": "acss_debit"}, "setup_error": null, "setup_intent": "seti_1KnfIVEcXtiJtvvhWiIbMkpH", "status": "succeeded", "usage": "off_session"}, "emitted_at": 1697627243547} -{"stream": "accounts", "data": {"id": "acct_1NGp6SD04fX0Aizk", "object": "account", "capabilities": {"acss_debit_payments": "active", "affirm_payments": "active", "afterpay_clearpay_payments": "active", "bancontact_payments": "active", "card_payments": "active", "cartes_bancaires_payments": "pending", "cashapp_payments": "active", "eps_payments": "active", "giropay_payments": "active", "ideal_payments": "active", "klarna_payments": "active", "link_payments": "active", "p24_payments": "active", "sepa_debit_payments": "active", "sofort_payments": "active", "transfers": "active", "us_bank_account_ach_payments": "active"}, "charges_enabled": true, "country": "US", "default_currency": "usd", "details_submitted": true, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "payouts_enabled": true, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": "AIRBYTE", "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": "Airbyte", "timezone": "Asia/Tbilisi"}, "payments": {"statement_descriptor": "WWW.AIRBYTE.COM", "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267880} -{"stream": "accounts", "data": {"id": "acct_1MwD6tIyVv44cUB4", "object": "account", "business_profile": {"mcc": null, "name": null, "product_description": null, "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "business_type": null, "capabilities": {"card_payments": "inactive", "transfers": "inactive"}, "charges_enabled": false, "country": "US", "created": 1681342196, "default_currency": "usd", "details_submitted": false, "email": "jenny.rosen@example.com", "external_accounts": {"object": "list", "data": [], "has_more": false, "total_count": 0, "url": "/v1/accounts/acct_1MwD6tIyVv44cUB4/external_accounts"}, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "pending_verification": []}, "settings": {"bacs_debit_payments": {}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"decline_on": {"avs_failure": false, "cvc_failure": false}, "statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "payouts": {"debit_negative_balances": false, "schedule": {"delay_days": 2, "interval": "daily"}, "statement_descriptor": null}, "sepa_debit_payments": {}}, "tos_acceptance": {"date": null, "ip": null, "user_agent": null}, "type": "custom"}, "emitted_at": 1697627267882} -{"stream": "accounts", "data": {"id": "acct_1Jx8unEYmRTj5on1", "object": "account", "business_profile": {"mcc": null, "name": "Airbyte", "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "capabilities": {}, "charges_enabled": false, "controller": {"type": "account"}, "country": "US", "default_currency": "usd", "details_submitted": false, "email": null, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267884} +{"stream": "accounts", "data": {"id": "acct_1NGp6SD04fX0Aizk", "object": "account", "capabilities": {"acss_debit_payments": "active", "affirm_payments": "active", "afterpay_clearpay_payments": "active", "bancontact_payments": "active", "card_payments": "active", "cartes_bancaires_payments": "pending", "cashapp_payments": "active", "eps_payments": "active", "giropay_payments": "active", "ideal_payments": "active", "klarna_payments": "active", "link_payments": "active", "p24_payments": "active", "sepa_debit_payments": "active", "sofort_payments": "active", "transfers": "active", "us_bank_account_ach_payments": "active"}, "charges_enabled": true, "country": "US", "default_currency": "usd", "details_submitted": true, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "payouts_enabled": true, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {"display_name": null, "service_user_number": null}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": "AIRBYTE", "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": "Airbyte", "timezone": "Asia/Tbilisi"}, "payments": {"statement_descriptor": "WWW.AIRBYTE.COM", "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267880} +{"stream": "accounts", "data": {"id": "acct_1MwD6tIyVv44cUB4", "object": "account", "business_profile": {"mcc": null, "name": null, "product_description": null, "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "business_type": null, "capabilities": {"card_payments": "inactive", "transfers": "inactive"}, "charges_enabled": false, "country": "US", "created": 1681342196, "default_currency": "usd", "details_submitted": false, "email": "jenny.rosen@example.com", "external_accounts": {"object": "list", "data": [], "has_more": false, "total_count": 0, "url": "/v1/accounts/acct_1MwD6tIyVv44cUB4/external_accounts"}, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": ["business_profile.mcc", "business_profile.url", "business_type", "external_account", "representative.first_name", "representative.last_name", "tos_acceptance.date", "tos_acceptance.ip"], "pending_verification": []}, "settings": {"bacs_debit_payments": {"display_name": null, "service_user_number": null}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"decline_on": {"avs_failure": false, "cvc_failure": false}, "statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "payouts": {"debit_negative_balances": false, "schedule": {"delay_days": 2, "interval": "daily"}, "statement_descriptor": null}, "sepa_debit_payments": {}}, "tos_acceptance": {"date": null, "ip": null, "user_agent": null}, "type": "custom"}, "emitted_at": 1697627267882} +{"stream": "accounts", "data": {"id": "acct_1Jx8unEYmRTj5on1", "object": "account", "business_profile": {"mcc": null, "name": "Airbyte", "support_address": null, "support_email": null, "support_phone": null, "support_url": null, "url": null}, "capabilities": {}, "charges_enabled": false, "controller": {"type": "account"}, "country": "US", "default_currency": "usd", "details_submitted": false, "email": null, "future_requirements": {"alternatives": [], "current_deadline": null, "currently_due": [], "disabled_reason": null, "errors": [], "eventually_due": [], "past_due": [], "pending_verification": []}, "metadata": {}, "payouts_enabled": false, "requirements": {"alternatives": [], "current_deadline": null, "currently_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "disabled_reason": "requirements.past_due", "errors": [], "eventually_due": ["business_profile.product_description", "business_profile.support_phone", "business_profile.url", "external_account", "tos_acceptance.date", "tos_acceptance.ip"], "past_due": [], "pending_verification": []}, "settings": {"bacs_debit_payments": {"display_name": null, "service_user_number": null}, "branding": {"icon": null, "logo": null, "primary_color": null, "secondary_color": null}, "card_issuing": {"tos_acceptance": {"date": null, "ip": null}}, "card_payments": {"statement_descriptor_prefix": null, "statement_descriptor_prefix_kana": null, "statement_descriptor_prefix_kanji": null}, "dashboard": {"display_name": null, "timezone": "Etc/UTC"}, "payments": {"statement_descriptor": null, "statement_descriptor_kana": null, "statement_descriptor_kanji": null}, "sepa_debit_payments": {}}, "type": "standard"}, "emitted_at": 1697627267884} {"stream": "shipping_rates", "data": {"id": "shr_1NXgplEcXtiJtvvhA1ntV782", "object": "shipping_rate", "active": true, "created": 1690274589, "delivery_estimate": "{'maximum': {'unit': 'business_day', 'value': 14}, 'minimum': {'unit': 'business_day', 'value': 10}}", "display_name": "Test Ground Shipping", "fixed_amount": {"amount": 999, "currency": "usd"}, "livemode": false, "metadata": {}, "tax_behavior": "inclusive", "tax_code": "txcd_92010001", "type": "fixed_amount"}, "emitted_at": 1697627269309} {"stream": "balance_transactions", "data": {"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": -9164, "available_on": 1645488000, "created": 1645406919, "currency": "usd", "description": "STRIPE PAYOUT", "exchange_rate": null, "fee": 0, "fee_details": [], "net": -9164, "reporting_category": "payout", "source": "po_1KVQhfEcXtiJtvvhZlUkl08U", "status": "available", "type": "payout"}, "emitted_at": 1697627270253} {"stream": "balance_transactions", "data": {"id": "txn_3K9FSOEcXtiJtvvh0KoS5mx7", "object": "balance_transaction", "amount": 5300, "available_on": 1640649600, "created": 1640120473, "currency": "usd", "description": null, "exchange_rate": null, "fee": 184, "fee_details": [{"amount": 184, "application": null, "currency": "usd", "description": "Stripe processing fees", "type": "stripe_fee"}], "net": 5116, "reporting_category": "charge", "source": "ch_3K9FSOEcXtiJtvvh0zxb7clc", "status": "available", "type": "charge"}, "emitted_at": 1697627270254} @@ -17,7 +17,7 @@ {"stream": "file_links", "data": {"id": "link_1KnfIiEcXtiJtvvhCNceSyei", "object": "file_link", "created": 1649752936, "expired": false, "expires_at": null, "file": "file_1Jx631EcXtiJtvvh9J1J59wL", "livemode": false, "metadata": {}, "url": "https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfY1FvanBFTmt0dUdrRWJXTHBpUlVYVUtu007305bsv3"}, "emitted_at": 1697627273833} {"stream": "file_links", "data": {"id": "link_1KnfIbEcXtiJtvvhyBLUqkSt", "object": "file_link", "created": 1649752929, "expired": false, "expires_at": null, "file": "file_1Jx631EcXtiJtvvh9J1J59wL", "livemode": false, "metadata": {}, "url": "https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfaXh1blBqMmY0MzI3SHZWbUZIeFVGU3Nl0022JjupYq"}, "emitted_at": 1697627273834} {"stream": "file_links", "data": {"id": "link_1KnfIUEcXtiJtvvh0ktKHfWz", "object": "file_link", "created": 1649752922, "expired": false, "expires_at": null, "file": "file_1Jx631EcXtiJtvvh9J1J59wL", "livemode": false, "metadata": {}, "url": "https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfNzhlbE9MUGNYbkJzMkRLSWdEcnhvY3FH00DK5jBVaH"}, "emitted_at": 1697627273835} -{"stream": "checkout_sessions", "data": {"id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "object": "checkout.session", "after_expiration": null, "allow_promotion_codes": null, "amount_subtotal": 3400, "amount_total": 3400, "automatic_tax": {"enabled": false, "status": null}, "billing_address_collection": null, "cancel_url": null, "client_reference_id": null, "client_secret": null, "consent": null, "consent_collection": null, "created": 1697627124, "currency": "usd", "currency_conversion": null, "custom_fields": [], "custom_text": {"shipping_address": null, "submit": null, "terms_of_service_acceptance": null}, "customer": null, "customer_creation": "always", "customer_details": null, "customer_email": null, "expires_at": 1697713523, "invoice": null, "invoice_creation": {"enabled": false, "invoice_data": {"account_tax_ids": null, "custom_fields": null, "description": null, "footer": null, "metadata": {}, "rendering_options": null}}, "livemode": false, "locale": null, "metadata": {}, "mode": "payment", "payment_intent": "pi_3O2XZ1EcXtiJtvvh0zWGn33E", "payment_link": null, "payment_method_collection": "always", "payment_method_configuration_details": {"id": "pmc_1MC0oMEcXtiJtvvhmhbSUwTJ", "parent": null}, "payment_method_options": {"us_bank_account": {"financial_connections": {"permissions": ["payment_method"], "prefetch": []}, "verification_method": "automatic"}, "wechat_pay": {"app_id": null, "client": "web"}}, "payment_method_types": ["card", "alipay", "klarna", "link", "us_bank_account", "wechat_pay", "cashapp"], "payment_status": "unpaid", "phone_number_collection": {"enabled": false}, "recovered_from": null, "setup_intent": null, "shipping_address_collection": null, "shipping_cost": null, "shipping_details": null, "shipping_options": [], "status": "expired", "submit_type": null, "subscription": null, "success_url": "https://example.com/success", "total_details": {"amount_discount": 0, "amount_shipping": 0, "amount_tax": 0}, "ui_mode": "hosted", "url": null, "updated": 1697713523}, "emitted_at": 1697627275062} +{"stream": "checkout_sessions", "data": {"id": "cs_test_a1uSLwxkrTLjGhRXgzJweMwh09uvSZcWIkGLcIqDXzYADowSPwkAmJUrAN", "object": "checkout.session", "after_expiration": null, "allow_promotion_codes": null, "amount_subtotal": 3400, "amount_total": 3400, "automatic_tax": {"enabled": false, "status": null}, "billing_address_collection": null, "cancel_url": null, "client_reference_id": null, "client_secret": null, "consent": null, "consent_collection": null, "created": 1697627124, "currency": "usd", "currency_conversion": null, "custom_fields": [], "custom_text": {"shipping_address": null, "submit": null, "terms_of_service_acceptance": null}, "customer": null, "customer_creation": "always", "customer_details": null, "customer_email": null, "expires_at": 1697713523, "invoice": null, "invoice_creation": {"enabled": false, "invoice_data": {"account_tax_ids": null, "custom_fields": null, "description": null, "footer": null, "metadata": {}, "rendering_options": null}}, "livemode": false, "locale": null, "metadata": {}, "mode": "payment", "payment_intent": "pi_3O2XZ1EcXtiJtvvh0zWGn33E", "payment_link": null, "payment_method_collection": "always", "payment_method_configuration_details": {"id": "pmc_1MC0oMEcXtiJtvvhmhbSUwTJ", "parent": null}, "payment_method_options": {"us_bank_account": {"financial_connections": {"permissions": ["payment_method"], "prefetch": []}, "verification_method": "automatic"}, "wechat_pay": {"app_id": null, "client": "web"}}, "payment_method_types": ["card", "alipay", "klarna", "link", "us_bank_account", "wechat_pay", "cashapp"], "payment_status": "unpaid", "phone_number_collection": {"enabled": false}, "recovered_from": null, "setup_intent": null, "shipping_address_collection": null, "shipping_cost": null, "shipping_details": null, "shipping_options": [], "status": "expired", "submit_type": null, "subscription": null, "success_url": "https://example.com/success", "total_details": {"amount_discount": 0, "amount_shipping": 0, "amount_tax": 0}, "ui_mode": "hosted", "url": null, "updated": 1697627124}, "emitted_at": 1697627275062} {"stream": "credit_notes", "data": {"id": "cn_1NGPwmEcXtiJtvvhNXwHpgJF", "object": "credit_note", "amount": 8400, "amount_shipping": 0, "created": 1686158100, "currency": "usd", "customer": "cus_Kou8knsO3qQOwU", "customer_balance_transaction": null, "discount_amount": "0", "discount_amounts": [], "effective_at": 1686158100, "invoice": "in_1K9GK0EcXtiJtvvhSo2LvGqT", "lines": {"object": "list", "data": [{"id": "cnli_1NGPwmEcXtiJtvvhcL7yEIBJ", "object": "credit_note_line_item", "amount": 8400, "amount_excluding_tax": 8400, "description": "a box of parsnips", "discount_amount": 0, "discount_amounts": [], "invoice_line_item": "il_1K9GKLEcXtiJtvvhhHaYMebN", "livemode": false, "quantity": 1, "tax_amounts": [], "tax_rates": [], "type": "invoice_line_item", "unit_amount": 8400, "unit_amount_decimal": 8400.0, "unit_amount_excluding_tax": 8400.0}], "has_more": false, "url": "/v1/credit_notes/cn_1NGPwmEcXtiJtvvhNXwHpgJF/lines"}, "livemode": false, "memo": null, "metadata": {}, "number": "CA35DF83-0001-CN-01", "out_of_band_amount": null, "pdf": "https://pay.stripe.com/credit_notes/acct_1JwnoiEcXtiJtvvh/test_YWNjdF8xSndub2lFY1h0aUp0dnZoLF9PMlV3dFlJelh4NHM1R0VIWnhMR3RjWUtlejFlRWtILDg4MTY4MDc20200Sa50llWu/pdf?s=ap", "reason": null, "refund": null, "shipping_cost": null, "status": "issued", "subtotal": 8400, "subtotal_excluding_tax": 8400, "tax_amounts": [], "total": 8400, "total_excluding_tax": 8400, "type": "pre_payment", "voided_at": null, "updated": 1686158100}, "emitted_at": 1697627276386} {"stream": "customers", "data": {"id": "cus_LIiHR6omh14Xdg", "object": "customer", "address": {"city": "san francisco", "country": "US", "line1": "san francisco", "line2": "", "postal_code": "", "state": "CA"}, "balance": 0, "created": 1646998902, "currency": "usd", "default_source": "card_1MSHU1EcXtiJtvvhytSN6V54", "delinquent": false, "description": "test", "discount": null, "email": "test@airbyte_integration_test.com", "invoice_prefix": "09A6A98F", "invoice_settings": {"custom_fields": null, "default_payment_method": null, "footer": null, "rendering_options": null}, "livemode": false, "metadata": {}, "name": "Test", "next_invoice_sequence": 1, "phone": null, "preferred_locales": [], "shipping": {"address": {"city": "", "country": "US", "line1": "", "line2": "", "postal_code": "", "state": ""}, "name": "", "phone": ""}, "tax_exempt": "none", "test_clock": null, "updated": 1646998902}, "emitted_at": 1697627278433} {"stream": "customers", "data": {"id": "cus_Kou8knsO3qQOwU", "object": "customer", "address": null, "balance": 0, "created": 1640123795, "currency": "usd", "default_source": "src_1MSID8EcXtiJtvvhxIT9lXRy", "delinquent": false, "description": null, "discount": null, "email": "edward.gao+stripe-test-customer-1@airbyte.io", "invoice_prefix": "CA35DF83", "invoice_settings": {"custom_fields": null, "default_payment_method": null, "footer": null, "rendering_options": null}, "livemode": false, "metadata": {}, "name": "edgao-test-customer-1", "next_invoice_sequence": 2, "phone": null, "preferred_locales": [], "shipping": null, "tax_exempt": "none", "test_clock": null, "updated": 1640123795}, "emitted_at": 1697627278435} @@ -50,9 +50,9 @@ {"stream": "subscription_schedule", "data": {"id": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "object": "subscription_schedule", "application": null, "canceled_at": null, "completed_at": null, "created": 1697550676, "current_phase": {"end_date": 1705499476, "start_date": 1697550676}, "customer": "cus_NGoTFiJFVbSsvZ", "default_settings": {"application_fee_percent": null, "automatic_tax": {"enabled": false}, "billing_cycle_anchor": "automatic", "billing_thresholds": null, "collection_method": "charge_automatically", "default_payment_method": null, "default_source": null, "description": "Test Test", "invoice_settings": "{'days_until_due': None}", "on_behalf_of": null, "transfer_data": null}, "end_behavior": "cancel", "livemode": false, "metadata": {}, "phases": [{"add_invoice_items": [], "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": null, "billing_thresholds": null, "collection_method": "charge_automatically", "coupon": null, "currency": "usd", "default_payment_method": null, "default_tax_rates": [], "description": "Test Test", "end_date": 1705499476, "invoice_settings": "{'days_until_due': None}", "items": [{"billing_thresholds": null, "metadata": {}, "plan": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "price": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "quantity": 1, "tax_rates": []}], "metadata": {}, "on_behalf_of": null, "proration_behavior": "create_prorations", "start_date": 1697550676, "transfer_data": null, "trial_end": null}], "released_at": null, "released_subscription": null, "renewal_interval": null, "status": "active", "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "test_clock": null, "updated": 1697550676}, "emitted_at": 1697627312079} {"stream": "transfers", "data": {"id": "tr_1NH18zEcXtiJtvvhnd827cNO", "object": "transfer", "amount": 10000, "amount_reversed": 0, "balance_transaction": "txn_1NH190EcXtiJtvvhBO3PeR7p", "created": 1686301085, "currency": "usd", "description": null, "destination": "acct_1Jx8unEYmRTj5on1", "destination_payment": "py_1NH18zEYmRTj5on1GkCCsqLK", "livemode": false, "metadata": {}, "reversals": {"object": "list", "data": [], "has_more": false, "total_count": 0.0, "url": "/v1/transfers/tr_1NH18zEcXtiJtvvhnd827cNO/reversals"}, "reversed": false, "source_transaction": null, "source_type": "card", "transfer_group": null, "updated": 1686301085}, "emitted_at": 1697627313262} {"stream": "transfers", "data": {"id": "tr_1NGoaCEcXtiJtvvhjmHtOGOm", "object": "transfer", "amount": 100, "amount_reversed": 100, "balance_transaction": "txn_1NGoaDEcXtiJtvvhsZrNMsdJ", "created": 1686252800, "currency": "usd", "description": null, "destination": "acct_1Jx8unEYmRTj5on1", "destination_payment": "py_1NGoaCEYmRTj5on1LAlAIG3a", "livemode": false, "metadata": {}, "reversals": {"object": "list", "data": [{"id": "trr_1NGolCEcXtiJtvvhOYPck3CP", "object": "transfer_reversal", "amount": 100, "balance_transaction": "txn_1NGolCEcXtiJtvvhZRy4Kd5S", "created": 1686253482, "currency": "usd", "destination_payment_refund": "pyr_1NGolBEYmRTj5on1STal3rmp", "metadata": {}, "source_refund": null, "transfer": "tr_1NGoaCEcXtiJtvvhjmHtOGOm"}], "has_more": false, "total_count": 1.0, "url": "/v1/transfers/tr_1NGoaCEcXtiJtvvhjmHtOGOm/reversals"}, "reversed": true, "source_transaction": null, "source_type": "card", "transfer_group": "ORDER10", "updated": 1686252800}, "emitted_at": 1697627313264} -{"stream": "refunds", "data": {"id": "re_3MVuZyEcXtiJtvvh0A6rSbeJ", "object": "refund", "amount": 200000, "balance_transaction": "txn_3MVuZyEcXtiJtvvh0v0QyAMx", "charge": "ch_3MVuZyEcXtiJtvvh0tiVC7DI", "created": 1675074488, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MVuZyEcXtiJtvvh07Ehi4cx", "reason": "fraudulent", "receipt_number": "3278-5368", "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null, "updated": 1675074488}, "emitted_at": 1697627314206} -{"stream": "refunds", "data": {"id": "re_3NcwAGEcXtiJtvvh1UT4PBe6", "object": "refund", "amount": 600, "balance_transaction": "txn_3NcwAGEcXtiJtvvh1AcNi3Ma", "charge": "ch_3NcwAGEcXtiJtvvh1m0SSmfQ", "created": 1692782173, "currency": "usd", "metadata": {}, "payment_intent": "pi_3NcwAGEcXtiJtvvh1olHTPmH", "reason": null, "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null, "updated": 1692782173}, "emitted_at": 1697627314485} -{"stream": "refunds", "data": {"id": "re_3MngeoEcXtiJtvvh0c4KeMOd", "object": "refund", "amount": 540, "balance_transaction": "txn_3MngeoEcXtiJtvvh0Cz3qwU2", "charge": "ch_3MngeoEcXtiJtvvh0SBFQWe2", "created": 1683889626, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MngeoEcXtiJtvvh0B7Tcbr4", "reason": "requested_by_customer", "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null, "updated": 1683889626}, "emitted_at": 1697627314486} +{"stream": "refunds", "data": {"id": "re_3MVuZyEcXtiJtvvh0A6rSbeJ", "object": "refund", "amount": 200000, "balance_transaction": "txn_3MVuZyEcXtiJtvvh0v0QyAMx", "charge": "ch_3MVuZyEcXtiJtvvh0tiVC7DI", "created": 1675074488, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MVuZyEcXtiJtvvh07Ehi4cx", "reason": "fraudulent", "receipt_number": "3278-5368", "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null}, "emitted_at": 1697627314206} +{"stream": "refunds", "data": {"id": "re_3NcwAGEcXtiJtvvh1UT4PBe6", "object": "refund", "amount": 600, "balance_transaction": "txn_3NcwAGEcXtiJtvvh1AcNi3Ma", "charge": "ch_3NcwAGEcXtiJtvvh1m0SSmfQ", "created": 1692782173, "currency": "usd", "metadata": {}, "payment_intent": "pi_3NcwAGEcXtiJtvvh1olHTPmH", "reason": null, "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null}, "emitted_at": 1697627314485} +{"stream": "refunds", "data": {"id": "re_3MngeoEcXtiJtvvh0c4KeMOd", "object": "refund", "amount": 540, "balance_transaction": "txn_3MngeoEcXtiJtvvh0Cz3qwU2", "charge": "ch_3MngeoEcXtiJtvvh0SBFQWe2", "created": 1683889626, "currency": "usd", "metadata": {}, "payment_intent": "pi_3MngeoEcXtiJtvvh0B7Tcbr4", "reason": "requested_by_customer", "receipt_number": null, "source_transfer_reversal": null, "status": "succeeded", "transfer_reversal": null}, "emitted_at": 1697627314486} {"stream": "payment_intents", "data": {"id": "pi_3K9FSOEcXtiJtvvh0AEIFllC", "object": "payment_intent", "amount": 5300, "amount_capturable": 0, "amount_details": {"tip": {}}, "amount_received": 5300, "application": null, "application_fee_amount": null, "automatic_payment_methods": null, "canceled_at": null, "cancellation_reason": null, "capture_method": "automatic", "client_secret": "pi_3K9FSOEcXtiJtvvh0AEIFllC_secret_uPUtIaSltgtW0qK7mLD0uF2Mr", "confirmation_method": "automatic", "created": 1640120472, "currency": "usd", "customer": null, "description": null, "invoice": null, "last_payment_error": null, "latest_charge": "ch_3K9FSOEcXtiJtvvh0zxb7clc", "livemode": false, "metadata": {}, "next_action": null, "on_behalf_of": null, "payment_method": null, "payment_method_configuration_details": null, "payment_method_options": {"card": {"installments": null, "mandate_options": null, "network": null, "request_three_d_secure": "automatic"}}, "payment_method_types": ["card"], "processing": null, "receipt_email": null, "review": null, "setup_future_usage": null, "shipping": null, "source": "src_1K9FSOEcXtiJtvvhHGu1qtOx", "statement_descriptor": "airbyte.io", "statement_descriptor_suffix": null, "status": "succeeded", "transfer_data": null, "transfer_group": null, "updated": 1640120472}, "emitted_at": 1697627315508} {"stream": "payment_intents", "data": {"id": "pi_3K9F5DEcXtiJtvvh16scJMp6", "object": "payment_intent", "amount": 4200, "amount_capturable": 0, "amount_details": {"tip": {}}, "amount_received": 4200, "application": null, "application_fee_amount": null, "automatic_payment_methods": null, "canceled_at": null, "cancellation_reason": null, "capture_method": "automatic", "client_secret": "pi_3K9F5DEcXtiJtvvh16scJMp6_secret_YwhzCTpXtfcKYeklXnPnysRRi", "confirmation_method": "automatic", "created": 1640119035, "currency": "usd", "customer": null, "description": "edgao test", "invoice": null, "last_payment_error": null, "latest_charge": "ch_3K9F5DEcXtiJtvvh1w2MaTpj", "livemode": false, "metadata": {}, "next_action": null, "on_behalf_of": null, "payment_method": null, "payment_method_configuration_details": null, "payment_method_options": {"card": {"installments": null, "mandate_options": null, "network": null, "request_three_d_secure": "automatic"}}, "payment_method_types": ["card"], "processing": null, "receipt_email": null, "review": null, "setup_future_usage": null, "shipping": null, "source": "src_1K9F5CEcXtiJtvvhrsZdur8Y", "statement_descriptor": "airbyte.io", "statement_descriptor_suffix": null, "status": "succeeded", "transfer_data": null, "transfer_group": null, "updated": 1640119035}, "emitted_at": 1697627315511} {"stream": "payment_intents", "data": {"id": "pi_3K9F4mEcXtiJtvvh18NKhEuo", "object": "payment_intent", "amount": 4200, "amount_capturable": 0, "amount_details": {"tip": {}}, "amount_received": 0, "application": null, "application_fee_amount": null, "automatic_payment_methods": null, "canceled_at": null, "cancellation_reason": null, "capture_method": "automatic", "client_secret": "pi_3K9F4mEcXtiJtvvh18NKhEuo_secret_pfUt7CTkPjVdJacycm0bMpdLt", "confirmation_method": "automatic", "created": 1640119008, "currency": "usd", "customer": null, "description": "edgao test", "invoice": null, "last_payment_error": {"charge": "ch_3K9F4mEcXtiJtvvh1kUzxjwN", "code": "card_declined", "decline_code": "test_mode_live_card", "doc_url": "https://stripe.com/docs/error-codes/card-declined", "message": "Your card was declined. Your request was in test mode, but used a non test (live) card. For a list of valid test cards, visit: https://stripe.com/docs/testing.", "source": {"id": "src_1K9F4hEcXtiJtvvhrUEwvCyi", "object": "source", "amount": null, "card": {"address_line1_check": null, "address_zip_check": null, "brand": "Visa", "country": "US", "cvc_check": "unchecked", "dynamic_last4": null, "exp_month": 9, "exp_year": 2028, "fingerprint": "Re3p4j8issXA77iI", "funding": "credit", "last4": "8097", "name": null, "three_d_secure": "optional", "tokenization_method": null}, "client_secret": "src_client_secret_b3v8YqNMLGykB120fqv2Tjhq", "created": 1640119003, "currency": null, "flow": "none", "livemode": false, "metadata": {}, "owner": {"address": null, "email": null, "name": null, "phone": null, "verified_address": null, "verified_email": null, "verified_name": null, "verified_phone": null}, "statement_descriptor": null, "status": "consumed", "type": "card", "usage": "reusable"}, "type": "card_error"}, "latest_charge": "ch_3K9F4mEcXtiJtvvh1kUzxjwN", "livemode": false, "metadata": {}, "next_action": null, "on_behalf_of": null, "payment_method": null, "payment_method_configuration_details": null, "payment_method_options": {"card": {"installments": null, "mandate_options": null, "network": null, "request_three_d_secure": "automatic"}}, "payment_method_types": ["card"], "processing": null, "receipt_email": null, "review": null, "setup_future_usage": null, "shipping": null, "source": null, "statement_descriptor": "airbyte.io", "statement_descriptor_suffix": null, "status": "requires_payment_method", "transfer_data": null, "transfer_group": null, "updated": 1640119008}, "emitted_at": 1697627315513} diff --git a/airbyte-integrations/connectors/source-stripe/metadata.yaml b/airbyte-integrations/connectors/source-stripe/metadata.yaml index 1aaaa9ee97c4..f521fb172278 100644 --- a/airbyte-integrations/connectors/source-stripe/metadata.yaml +++ b/airbyte-integrations/connectors/source-stripe/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: e094cb9a-26de-4645-8761-65c0c425d1de - dockerImageTag: 4.5.4 + dockerImageTag: 5.0.0 dockerRepository: airbyte/source-stripe documentationUrl: https://docs.airbyte.com/integrations/sources/stripe githubIssueLabel: source-stripe @@ -33,6 +33,11 @@ data: schema refresh of all effected streams is required to use the new cursor format. upgradeDeadline: "2023-09-14" + 5.0.0: + message: + Version 5.0.0 introduces fixes for the `CheckoutSessions`, `CheckoutSessionsLineItems` and `Refunds` streams. The cursor field is changed for the `CheckoutSessionsLineItems` and `Refunds` streams. This will prevent data loss during incremental syncs. + Also, the `Invoices`, `Subscriptions` and `SubscriptionSchedule` stream schemas have been updated. + upgradeDeadline: "2023-11-30" suggestedStreams: streams: - customers diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json index 09c6e9e28f3e..b00f6569d12e 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/checkout_sessions_line_items.json @@ -5,6 +5,8 @@ "id": { "type": ["null", "string"] }, "checkout_session_id": { "type": ["null", "string"] }, "checkout_session_expires_at": { "type": ["null", "integer"] }, + "checkout_session_created": { "type": ["null", "integer"] }, + "checkout_session_updated": { "type": ["null", "integer"] }, "object": { "type": ["null", "string"] }, "amount_subtotal": { "type": ["null", "integer"] }, "amount_tax": { "type": ["null", "integer"] }, diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json index 33d407425ea0..6959909cc77f 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/invoices.json @@ -499,10 +499,7 @@ "type": ["null", "integer"] }, "default_tax_rates": { - "type": ["null", "array"], - "items": { - "$ref": "tax_rates.json" - } + "$ref": "tax_rates.json" }, "total_excluding_tax": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json index 9f187d82924c..cc14a57138fd 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscription_schedule.json @@ -134,11 +134,7 @@ "type": ["null", "string"] }, "default_tax_rates": { - "type": ["null", "array"], - "items": { - "type": ["null", "object"], - "additionalProperties": true - } + "$ref": "tax_rates.json" }, "description": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json index 5d06810fb4c0..1a720f6fd034 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/schemas/subscriptions.json @@ -323,10 +323,7 @@ } }, "default_tax_rates": { - "type": ["null", "array"], - "items": { - "$ref": "tax_rates.json" - } + "$ref": "tax_rates.json" }, "pause_collection": { "type": ["null", "object"], diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py index 5683c875497e..449eb53f99db 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/source.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/source.py @@ -22,11 +22,11 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException from airbyte_protocol.models import SyncMode from source_stripe.streams import ( - CheckoutSessionsLineItems, CreatedCursorIncrementalStripeStream, CustomerBalanceTransactions, Events, IncrementalStripeStream, + ParentIncrementalStipeSubStream, Persons, SetupAttempts, StripeLazySubStream, @@ -197,10 +197,9 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: subscription_items = StripeLazySubStream( name="subscription_items", path="subscription_items", - extra_request_params=lambda self, stream_slice, *args, **kwargs: {"subscription": stream_slice[self.parent_id]}, + extra_request_params=lambda self, stream_slice, *args, **kwargs: {"subscription": stream_slice["parent"]["id"]}, parent=subscriptions, use_cache=USE_CACHE, - parent_id="subscription_id", sub_items_attr="items", **args, ) @@ -238,8 +237,22 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ], **args, ) + checkout_sessions = UpdatedCursorIncrementalStripeStream( + name="checkout_sessions", + path="checkout/sessions", + use_cache=USE_CACHE, + legacy_cursor_field="created", + event_types=[ + "checkout.session.async_payment_failed", + "checkout.session.async_payment_succeeded", + "checkout.session.completed", + "checkout.session.expired", + ], + **args, + ) + streams = [ - CheckoutSessionsLineItems(**incremental_args), + checkout_sessions, CustomerBalanceTransactions(**args), Events(**incremental_args), UpdatedCursorIncrementalStripeStream( @@ -267,19 +280,10 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: CreatedCursorIncrementalStripeStream(name="balance_transactions", path="balance_transactions", **incremental_args), CreatedCursorIncrementalStripeStream(name="files", path="files", **incremental_args), CreatedCursorIncrementalStripeStream(name="file_links", path="file_links", **incremental_args), - UpdatedCursorIncrementalStripeStream( - name="checkout_sessions", - path="checkout/sessions", - use_cache=USE_CACHE, - legacy_cursor_field="expires_at", - event_types=[ - "checkout.session.async_payment_failed", - "checkout.session.async_payment_succeeded", - "checkout.session.completed", - "checkout.session.expired", - ], - **args, - ), + # The Refunds stream does not utilize the Events API as it created issues with data loss during the incremental syncs. + # Therefore, we're using the regular API with the `created` cursor field. A bug has been filed with Stripe. + # See more at https://github.com/airbytehq/oncall/issues/3090, https://github.com/airbytehq/oncall/issues/3428 + CreatedCursorIncrementalStripeStream(name="refunds", path="refunds", **incremental_args), UpdatedCursorIncrementalStripeStream( name="payment_methods", path="payment_methods", @@ -396,9 +400,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: **args, ), transfers, - IncrementalStripeStream( - name="refunds", path="refunds", use_cache=USE_CACHE, event_types=["refund.created", "refund.updated"], **args - ), IncrementalStripeStream( name="payment_intents", path="payment_intents", @@ -449,45 +450,56 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ), UpdatedCursorIncrementalStripeLazySubStream( name="application_fees_refunds", - path=lambda self, stream_slice, *args, **kwargs: f"application_fees/{stream_slice[self.parent_id]}/refunds", + path=lambda self, stream_slice, *args, **kwargs: f"application_fees/{stream_slice['parent']['id']}/refunds", parent=application_fees, event_types=["application_fee.refund.updated"], - parent_id="refund_id", sub_items_attr="refunds", - add_parent_id=True, **args, ), UpdatedCursorIncrementalStripeLazySubStream( name="bank_accounts", - path=lambda self, stream_slice, *args, **kwargs: f"customers/{stream_slice[self.parent_id]}/sources", + path=lambda self, stream_slice, *args, **kwargs: f"customers/{stream_slice['parent']['id']}/sources", parent=self.customers(expand_items=["data.sources"], **args), event_types=["customer.source.created", "customer.source.expiring", "customer.source.updated", "customer.source.deleted"], legacy_cursor_field=None, - parent_id="customer_id", sub_items_attr="sources", extra_request_params={"object": "bank_account"}, response_filter=lambda record: record["object"] == "bank_account", **args, ), + ParentIncrementalStipeSubStream( + name="checkout_sessions_line_items", + path=lambda self, stream_slice, *args, **kwargs: f"checkout/sessions/{stream_slice['parent']['id']}/line_items", + parent=checkout_sessions, + expand_items=["data.discounts", "data.taxes"], + cursor_field="checkout_session_updated", + slice_data_retriever=lambda record, stream_slice: { + "checkout_session_id": stream_slice["parent"]["id"], + "checkout_session_expires_at": stream_slice["parent"]["expires_at"], + "checkout_session_created": stream_slice["parent"]["created"], + "checkout_session_updated": stream_slice["parent"]["updated"], + **record, + }, + **args, + ), StripeLazySubStream( name="invoice_line_items", - path=lambda self, stream_slice, *args, **kwargs: f"invoices/{stream_slice[self.parent_id]}/lines", + path=lambda self, stream_slice, *args, **kwargs: f"invoices/{stream_slice['parent']['id']}/lines", parent=invoices, - parent_id="invoice_id", sub_items_attr="lines", - add_parent_id=True, + slice_data_retriever=lambda record, stream_slice: {"invoice_id": stream_slice["parent"]["id"], **record}, **args, ), subscription_items, StripeSubStream( name="transfer_reversals", - path=lambda self, stream_slice, *args, **kwargs: f"transfers/{stream_slice.get('parent', {}).get('id')}/reversals", + path=lambda self, stream_slice, *args, **kwargs: f"transfers/{stream_slice['parent']['id']}/reversals", parent=transfers, **args, ), StripeSubStream( name="usage_records", - path=lambda self, stream_slice, *args, **kwargs: f"subscription_items/{stream_slice.get('parent', {}).get('id')}/usage_record_summaries", + path=lambda self, stream_slice, *args, **kwargs: f"subscription_items/{stream_slice['parent']['id']}/usage_record_summaries", parent=subscription_items, primary_key=None, **args, diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml index 5a31b610cd27..719177412a96 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.yaml @@ -42,8 +42,8 @@ connectionSpecification: description: >- When set, the connector will always re-export data from the past N days, where N is the value set here. This is useful if your data is frequently updated - after creation. Applies only to streams that do not support event-based incremental syncs: CheckoutSessionLineItems, - Events, SetupAttempts, ShippingRates, BalanceTransactions, Files, FileLinks. More info here order: 3 slice_range: diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py index 460059716a75..a8b7feaef07f 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py @@ -25,41 +25,54 @@ class IRecordExtractor(ABC): @abstractmethod - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[Mapping]: + def extract_records(self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None) -> Iterable[Mapping]: pass class DefaultRecordExtractor(IRecordExtractor): - def __init__(self, response_filter: Optional[Callable] = None): - self._response_filter = response_filter or (lambda x: x) + def __init__(self, response_filter: Optional[Callable] = None, slice_data_retriever: Optional[Callable] = None): + self._response_filter = response_filter or (lambda record: record) + self._slice_data_retriever = slice_data_retriever or (lambda record, *_: record) - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: - yield from filter(self._response_filter, records) + def extract_records( + self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[MutableMapping]: + yield from filter(self._response_filter, map(lambda x: self._slice_data_retriever(x, stream_slice), records)) class EventRecordExtractor(DefaultRecordExtractor): - def __init__(self, cursor_field: str, response_filter: Optional[Callable] = None): - super().__init__(response_filter) + def __init__(self, cursor_field: str, response_filter: Optional[Callable] = None, slice_data_retriever: Optional[Callable] = None): + super().__init__(response_filter, slice_data_retriever) self.cursor_field = cursor_field - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: + def extract_records( + self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[MutableMapping]: for record in records: item = record["data"]["object"] item[self.cursor_field] = record["created"] if record["type"].endswith(".deleted"): item["is_deleted"] = True if self._response_filter(item): - yield item + yield self._slice_data_retriever(item, stream_slice) class UpdatedCursorIncrementalRecordExtractor(DefaultRecordExtractor): - def __init__(self, cursor_field: str, legacy_cursor_field: Optional[str], response_filter: Optional[Callable] = None): - super().__init__(response_filter) + def __init__( + self, + cursor_field: str, + legacy_cursor_field: Optional[str], + response_filter: Optional[Callable] = None, + slice_data_retriever: Optional[Callable] = None, + ): + super().__init__(response_filter, slice_data_retriever) self.cursor_field = cursor_field self.legacy_cursor_field = legacy_cursor_field - def extract_records(self, records: Iterable[MutableMapping]) -> Iterable[MutableMapping]: - records = super().extract_records(records) + def extract_records( + self, records: Iterable[MutableMapping], stream_slice: Optional[Mapping[str, Any]] = None + ) -> Iterable[MutableMapping]: + records = super().extract_records(records, stream_slice) for record in records: if self.cursor_field in record: yield record @@ -126,13 +139,14 @@ def __init__( expand_items: Optional[List[str]] = None, extra_request_params: Optional[Union[Mapping[str, Any], Callable]] = None, response_filter: Optional[Callable] = None, + slice_data_retriever: Optional[Callable] = None, primary_key: Optional[str] = "id", **kwargs, ): self.account_id = account_id self.start_date = start_date self.slice_range = slice_range or self.DEFAULT_SLICE_RANGE - self._record_extractor = record_extractor or DefaultRecordExtractor(response_filter) + self._record_extractor = record_extractor or DefaultRecordExtractor(response_filter, slice_data_retriever) self._name = name self._path = path self._use_cache = use_cache @@ -174,7 +188,7 @@ def parse_response( stream_slice: Optional[Mapping[str, Any]] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Iterable[Mapping[str, Any]]: - yield from self.record_extractor.extract_records(response.json().get("data", [])) + yield from self.record_extractor.extract_records(response.json().get("data", []), stream_slice) def request_headers(self, **kwargs) -> Mapping[str, Any]: headers = {"Stripe-Version": STRIPE_API_VERSION} @@ -251,7 +265,8 @@ def stream_slices( def get_start_timestamp(self, stream_state) -> int: start_point = self.start_date - start_point = max(start_point, stream_state.get(self.cursor_field, 0)) + # we use +1 second because date range is inclusive + start_point = max(start_point, stream_state.get(self.cursor_field, 0) + 1) if start_point and self.lookback_window_days: self.logger.info(f"Applying lookback window of {self.lookback_window_days} days to stream {self.name}") @@ -471,100 +486,6 @@ def read_records( yield from self.parent_stream.read_records(sync_mode, cursor_field, stream_slice, stream_state) -class CheckoutSessionsLineItems(CreatedCursorIncrementalStripeStream): - """ - API docs: https://stripe.com/docs/api/checkout/sessions/line_items - """ - - cursor_field = "checkout_session_expires_at" - - @property - def expand_items(self) -> Optional[List[str]]: - return ["data.discounts", "data.taxes"] - - @property - def checkout_session(self): - return UpdatedCursorIncrementalStripeStream( - name="checkout_sessions", - path="checkout/sessions", - use_cache=USE_CACHE, - legacy_cursor_field="expires_at", - event_types=[ - "checkout.session.async_payment_failed", - "checkout.session.async_payment_succeeded", - "checkout.session.completed", - "checkout.session.expired", - ], - authenticator=self.authenticator, - account_id=self.account_id, - start_date=self.start_date, - slice_range=self.slice_range, - ) - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - # https://stripe.com/docs/api/checkout/sessions/create#create_checkout_session-expires_at - # 'expires_at' - can be anywhere from 1 to 24 hours after Checkout Session creation. - # thus we should always add 1 day to lookback window to avoid possible checkout_sessions losses - self.lookback_window_days = self.lookback_window_days + 1 - - def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): - return f"checkout/sessions/{stream_slice['checkout_session_id']}/line_items" - - def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - ) -> MutableMapping[str, Any]: - # override to not refer to slice values - params = { - "limit": 100, - **self.extra_request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token), - } - if self.expand_items: - params["expand[]"] = self.expand_items - if next_page_token: - params.update(next_page_token) - return params - - def stream_slices( - self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None - ) -> Iterable[Optional[Mapping[str, Any]]]: - checkout_session_state = None - if stream_state: - checkout_session_state = {"expires_at": stream_state["checkout_session_expires_at"]} - for checkout_session in self.checkout_session.read_records( - sync_mode=SyncMode.full_refresh, stream_state=checkout_session_state, stream_slice={} - ): - yield { - "checkout_session_id": checkout_session["id"], - "expires_at": checkout_session["expires_at"], - } - - @property - def raise_on_http_errors(self): - return False - - def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: - if response.status_code == 404: - self.logger.warning(response.json()) - return - response.raise_for_status() - - response_json = response.json() - data = response_json.get("data", []) - if data and stream_slice: - self.logger.info(f"stream_slice: {stream_slice}") - cs_id = stream_slice.get("checkout_session_id", None) - cs_expires_at = stream_slice.get("expires_at", None) - for e in data: - e["checkout_session_id"] = cs_id - e["checkout_session_expires_at"] = cs_expires_at - yield from data - - class CustomerBalanceTransactions(StripeStream): """ API docs: https://stripe.com/docs/api/customer_balance_transactions/list @@ -721,17 +642,6 @@ class StripeLazySubStream(StripeStream, HttpSubStream): } """ - @property - def add_parent_id(self) -> bool: - return self._add_parent_id - - @property - def parent_id(self) -> str: - """ - :return: string with attribute name - """ - return self._parent_id - @property def sub_items_attr(self) -> str: """ @@ -743,14 +653,10 @@ def sub_items_attr(self) -> str: def __init__( self, *args, - add_parent_id: bool = False, - parent_id: Optional[str] = None, sub_items_attr: Optional[str] = None, **kwargs, ): super().__init__(*args, **kwargs) - self._add_parent_id = add_parent_id - self._parent_id = parent_id self._sub_items_attr = sub_items_attr @property @@ -767,24 +673,16 @@ def request_params(self, stream_slice: Mapping[str, Any] = None, **kwargs): return params def read_records(self, sync_mode: SyncMode, stream_slice: Optional[Mapping[str, Any]] = None, **kwargs) -> Iterable[Mapping[str, Any]]: - parent_record = stream_slice["parent"] - items_obj = parent_record.get(self.sub_items_attr, {}) + items_obj = stream_slice["parent"].get(self.sub_items_attr, {}) if not items_obj: return - items = list(self.record_extractor.extract_records(items_obj.get("data", []))) - - # get next pages items_next_pages = [] + items = list(self.record_extractor.extract_records(items_obj.get("data", []), stream_slice)) if items_obj.get("has_more") and items: - stream_slice = {self.parent_id: parent_record["id"], "starting_after": items[-1]["id"]} + stream_slice = {"starting_after": items[-1]["id"], **stream_slice} items_next_pages = super().read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice, **kwargs) - - for item in chain(items, items_next_pages): - if self.add_parent_id: - # add reference to parent object when item doesn't have it already - item[self.parent_id] = parent_record["id"] - yield item + yield from chain(items, items_next_pages) class IncrementalStripeLazySubStreamSelector(IStreamSelector): @@ -797,6 +695,11 @@ def get_parent_stream(self, stream_state: Mapping[str, Any]) -> StripeStream: class UpdatedCursorIncrementalStripeLazySubStream(StripeStream, ABC): + """ + This stream uses StripeLazySubStream under the hood to run full refresh or initial incremental syncs. + In case of subsequent incremental syncs, it uses the UpdatedCursorIncrementalStripeStream class. + """ + def __init__( self, parent: StripeStream, @@ -804,8 +707,6 @@ def __init__( cursor_field: str = "updated", legacy_cursor_field: Optional[str] = "created", event_types: Optional[List[str]] = None, - parent_id: Optional[str] = None, - add_parent_id: bool = False, sub_items_attr: Optional[str] = None, response_filter: Optional[Callable] = None, **kwargs, @@ -823,8 +724,6 @@ def __init__( self.lazy_substream = StripeLazySubStream( *args, parent=parent, - parent_id=parent_id, - add_parent_id=add_parent_id, sub_items_attr=sub_items_attr, record_extractor=UpdatedCursorIncrementalRecordExtractor( cursor_field=cursor_field, legacy_cursor_field=legacy_cursor_field, response_filter=response_filter @@ -866,3 +765,60 @@ def read_records( yield from self.parent_stream.read_records( sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state ) + + +class ParentIncrementalStipeSubStream(StripeSubStream): + """ + This stream differs from others in that it runs parent stream in exactly same sync mode it is run itself to generate stream slices. + It also uses regular /v1 API endpoints to sync data no matter what the sync mode is. This means that the event-based API can only + be utilized by the parent stream. + """ + + @property + def cursor_field(self) -> str: + return self._cursor_field + + def __init__(self, cursor_field: str, *args, **kwargs): + self._cursor_field = cursor_field + super().__init__(*args, **kwargs) + + def stream_slices( + self, sync_mode: SyncMode, cursor_field: Optional[List[str]] = None, stream_state: Optional[Mapping[str, Any]] = None + ) -> Iterable[Optional[Mapping[str, Any]]]: + stream_state = stream_state or {} + if stream_state: + # state is shared between self and parent, but cursor fields are different + stream_state = {self.parent.cursor_field: stream_state.get(self.cursor_field, 0)} + parent_stream_slices = self.parent.stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=stream_state) + for stream_slice in parent_stream_slices: + parent_records = self.parent.read_records( + sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state + ) + for record in parent_records: + yield {"parent": record} + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + return {self.cursor_field: max(current_stream_state.get(self.cursor_field, 0), latest_record[self.cursor_field])} + + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() + + @property + def raise_on_http_errors(self) -> bool: + return False + + def parse_response(self, response: requests.Response, *args, **kwargs) -> Iterable[Mapping[str, Any]]: + if response.status_code == 200: + return super().parse_response(response, *args, **kwargs) + if response.status_code == 404: + # When running incremental sync with state, the returned parent object very likely will not contain sub-items + # as the events API does not support expandable items. Parent class will try getting sub-items from this object, + # then from its own API. In case there are no sub-items at all for this entity, API will raise 404 error. + self.logger.warning( + "Data was not found for URL: {response.request.url}. " + "If this is a path for getting child attributes like /v1/checkout/sessions//line_items when running " + "the incremental sync, you may safely ignore this warning." + ) + return [] + response.raise_for_status() diff --git a/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py index 143331e06992..5f942b152157 100644 --- a/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-stripe/unit_tests/test_streams.py @@ -7,7 +7,7 @@ import freezegun import pendulum import pytest -from source_stripe.streams import CheckoutSessionsLineItems, CustomerBalanceTransactions, Persons, SetupAttempts +from source_stripe.streams import CustomerBalanceTransactions, Persons, SetupAttempts def read_from_stream(stream, sync_mode, state): @@ -170,40 +170,185 @@ def test_lazy_substream_data_is_filtered( assert record["object"] == expected_object -@freezegun.freeze_time("2023-08-23T15:00:15Z") -def test_created_cursor_incremental_stream(requests_mock, stream_by_name, config): - config["start_date"] = str(pendulum.now().subtract(months=23)) - stream = stream_by_name("balance_transactions", {"lookback_window_days": 14, **config}) - requests_mock.get( - "/v1/balance_transactions", - [ +balance_transactions_api_objects = [ + {"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": 435, "created": 1653299388, "status": "available"}, + {"id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", "object": "balance_transaction", "amount": -9164, "created": 1679568588, "status": "available"}, +] + + +refunds_api_objects = [ + { + "id": "re_3NYB8LAHLf1oYfwN3EZRDIfF", + "object": "refund", + "amount": 100, + "charge": "ch_3NYB8LAHLf1oYfwN3P6BxdKj", + "created": 1653299388, + "currency": "usd", + }, + { + "id": "re_Lf1oYfwN3EZRDIfF3NYB8LAH", + "object": "refund", + "amount": 15, + "charge": "ch_YfwN3P6BxdKj3NYB8LAHLf1o", + "created": 1679568588, + "currency": "eur", + }, +] + + +@pytest.mark.parametrize( + "requests_mock_map, expected_records, expected_slices, stream_name, sync_mode, state", + ( + ( { - "json": { - "data": [{"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": 435, "status": "available"}], - "has_more": False, - } + "/v1/balance_transactions": [ + { + "json": { + "data": [balance_transactions_api_objects[0]], + "has_more": False, + } + }, + { + "json": { + "data": [balance_transactions_api_objects[-1]], + "has_more": False, + } + }, + ], + }, + [ + { + "id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", + "object": "balance_transaction", + "amount": 435, + "created": 1653299388, + "status": "available", + }, + { + "id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", + "object": "balance_transaction", + "amount": -9164, + "created": 1679568588, + "status": "available", + }, + ], + [{"created[gte]": 1631199615, "created[lte]": 1662735615}, {"created[gte]": 1662735616, "created[lte]": 1692802815}], + "balance_transactions", + "full_refresh", + {}, + ), + ( + { + "/v1/balance_transactions": [ + { + "json": { + "data": [balance_transactions_api_objects[-1]], + "has_more": False, + } + }, + ], }, + [ + { + "id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", + "object": "balance_transaction", + "amount": -9164, + "created": 1679568588, + "status": "available", + }, + ], + [{"created[gte]": 1665308989, "created[lte]": 1692802815}], + "balance_transactions", + "incremental", + {"created": 1666518588}, + ), + ( { - "json": { - "data": [ - {"id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", "object": "balance_transaction", "amount": -9164, "status": "available"} - ], - "has_more": False, - } + "/v1/refunds": [ + { + "json": { + "data": [refunds_api_objects[0]], + "has_more": False, + } + }, + { + "json": { + "data": [refunds_api_objects[-1]], + "has_more": False, + } + }, + ], }, - ], - ) + [ + { + "id": "re_3NYB8LAHLf1oYfwN3EZRDIfF", + "object": "refund", + "amount": 100, + "charge": "ch_3NYB8LAHLf1oYfwN3P6BxdKj", + "created": 1653299388, + "currency": "usd", + }, + { + "id": "re_Lf1oYfwN3EZRDIfF3NYB8LAH", + "object": "refund", + "amount": 15, + "charge": "ch_YfwN3P6BxdKj3NYB8LAHLf1o", + "created": 1679568588, + "currency": "eur", + }, + ], + [{"created[gte]": 1631199615, "created[lte]": 1662735615}, {"created[gte]": 1662735616, "created[lte]": 1692802815}], + "refunds", + "full_refresh", + {}, + ), + ( + { + "/v1/refunds": [ + { + "json": { + "data": [refunds_api_objects[-1]], + "has_more": False, + } + }, + ], + }, + [ + { + "id": "re_Lf1oYfwN3EZRDIfF3NYB8LAH", + "object": "refund", + "amount": 15, + "charge": "ch_YfwN3P6BxdKj3NYB8LAHLf1o", + "created": 1679568588, + "currency": "eur", + } + ], + [{"created[gte]": 1665308989, "created[lte]": 1692802815}], + "refunds", + "incremental", + {"created": 1666518588}, + ), + ), +) +@freezegun.freeze_time("2023-08-23T15:00:15Z") +def test_created_cursor_incremental_stream( + requests_mock, requests_mock_map, stream_by_name, expected_records, expected_slices, stream_name, sync_mode, state, config +): + config["start_date"] = str(pendulum.now().subtract(months=23)) + stream = stream_by_name(stream_name, {"lookback_window_days": 14, **config}) + for url, response in requests_mock_map.items(): + requests_mock.get(url, response) - slices = list(stream.stream_slices("full_refresh")) - assert slices == [{"created[gte]": 1631199615, "created[lte]": 1662735615}, {"created[gte]": 1662735616, "created[lte]": 1692802815}] - records = [] + slices = list(stream.stream_slices(sync_mode, stream_state=state)) + assert slices == expected_slices + records = read_from_stream(stream, sync_mode, state) + assert records == expected_records + for record in records: + assert bool(record[stream.cursor_field]) + call_history = iter(requests_mock.request_history) for slice_ in slices: - for record in stream.read_records("full_refresh", stream_slice=slice_): - records.append(record) - assert records == [ - {"id": "txn_1KVQhfEcXtiJtvvhF7ox3YEm", "object": "balance_transaction", "amount": 435, "status": "available"}, - {"id": "txn_tiJtvvhF7ox3YEmKvVQhfEcX", "object": "balance_transaction", "amount": -9164, "status": "available"}, - ] + call = next(call_history) + assert urlencode(slice_) in call.url @pytest.mark.parametrize( @@ -213,8 +358,8 @@ def test_created_cursor_incremental_stream(requests_mock, stream_by_name, config ("2020-01-01T00:00:00Z", 14, 0, {}, "2019-12-18T00:00:00Z"), ("2020-01-01T00:00:00Z", 0, 30, {}, "2023-07-24T15:00:15Z"), ("2020-01-01T00:00:00Z", 14, 30, {}, "2023-07-24T15:00:15Z"), - ("2020-01-01T00:00:00Z", 0, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-17T00:00:00Z"), - ("2020-01-01T00:00:00Z", 14, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-03T00:00:00Z"), + ("2020-01-01T00:00:00Z", 0, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-17T00:00:01Z"), + ("2020-01-01T00:00:00Z", 14, 0, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2022-07-03T00:00:01Z"), ("2020-01-01T00:00:00Z", 0, 30, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2023-07-24T15:00:15Z"), ("2020-01-01T00:00:00Z", 14, 30, {"created": pendulum.parse("2022-07-17T00:00:00Z").int_timestamp}, "2023-07-24T15:00:15Z"), ), @@ -326,50 +471,6 @@ def test_updated_cursor_incremental_stream_read_w_state(requests_mock, stream_by assert records == [{"object": "credit_note", "invoice": "in_1K9GK0EcXtiJtvvhSo2LvGqT", "created": 1653341716, "updated": 1691629292}] -def test_checkout_session_line_items(requests_mock): - - session_id_missed = "cs_test_a165K4wNihuJlp2u3tknuohrvjAxyXFUB7nxZH3lwXRKJsadNEvIEWMUJ9" - session_id_exists = "cs_test_a1RjRHNyGUQOFVF3OkL8V8J0lZUASyVoCtsnZYG74VrBv3qz4245BLA1BP" - - response_sessions = { - "data": [{"id": session_id_missed, "expires_at": 100_000}, {"id": session_id_exists, "expires_at": 100_000}], - "has_more": False, - "object": "list", - "url": "/v1/checkout/sessions", - } - - response_sessions_line_items = { - "data": [{"id": "li_1JpAUUIEn5WyEQxnfGJT5MbL"}], - "has_more": False, - "object": "list", - "url": "/v1/checkout/sessions/{}/line_items".format(session_id_exists), - } - - response_error = { - "error": { - "code": "resource_missing", - "doc_url": "https://stripe.com/docs/error-codes/resource-missing", - "message": "No such checkout session: '{}'".format(session_id_missed), - "param": "session", - "type": "invalid_request_error", - } - } - - requests_mock.get("https://api.stripe.com/v1/checkout/sessions", json=response_sessions) - requests_mock.get( - "https://api.stripe.com/v1/checkout/sessions/{}/line_items".format(session_id_exists), json=response_sessions_line_items - ) - requests_mock.get( - "https://api.stripe.com/v1/checkout/sessions/{}/line_items".format(session_id_missed), json=response_error, status_code=404 - ) - - stream = CheckoutSessionsLineItems(start_date=100_100, account_id=None) - records = [] - for slice_ in stream.stream_slices(sync_mode="full_refresh"): - records.extend(stream.read_records(sync_mode="full_refresh", stream_slice=slice_)) - assert len(records) == 1 - - def test_customer_balance_transactions_stream_slices(requests_mock, stream_args): stream_args["start_date"] = pendulum.now().subtract(days=1).int_timestamp requests_mock.get( @@ -627,7 +728,6 @@ def test_subscription_items_extra_request_params(requests_mock, stream_by_name, "livemode": False, } ], - "has_more": False, }, ) requests_mock.get( @@ -668,3 +768,333 @@ def test_subscription_items_extra_request_params(requests_mock, stream_by_name, ] assert len(requests_mock.request_history) == 2 assert "subscription=sub_1OApco2eZvKYlo2CEDCzwLrE" in requests_mock.request_history[-1].url + + +checkout_session_api_response = { + "/v1/checkout/sessions": { + "object": "list", + "url": "/v1/checkout/sessions", + "has_more": False, + "data": [ + { + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "object": "checkout.session", + "created": 1699647441, + "expires_at": 1699647441, + "payment_intent": "pi_1Gt0KQ2eZvKYlo2CeWXUgmhy", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_1OB18o2eZvKYlo2CObYam50U", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + }, + { + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "object": "checkout.session", + "created": 1699744164, + "expires_at": 1699644174, + "payment_intent": "pi_lo2CeWXUgmhy1Gt0KQ2eZvKY", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_KYlo2CObYam50U1OB18o2eZv", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + }, + ], + } +} + + +checkout_session_line_items_api_response = { + "/v1/checkout/sessions/cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre/line_items": { + "object": "list", + "has_more": False, + "data": [ + { + "id": "li_1OB18o2eZvKYlo2CObYam50U", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + "link": "/v1/checkout/sessions/cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre/line_items", + }, + "/v1/checkout/sessions/cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi/line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions/cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi/line_items", + "data": [ + { + "id": "li_KYlo2CObYam50U1OB18o2eZv", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, +} + + +checkout_session_events_response = { + "/v1/events": { + "data": [ + { + "id": "evt_1NdNFoEcXtiJtvvhBP5mxQmL", + "object": "event", + "api_version": "2020-08-27", + "created": 1699902016, + "data": { + "object": { + "object": "checkout_session", + "checkout_session": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "created": 1653341716, + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "expires_at": 1692896410, + } + }, + "type": "checkout.session.completed", + }, + { + "id": "evt_XtiJtvvhBP5mxQmL1NdNFoEc", + "object": "event", + "api_version": "2020-08-27", + "created": 1699901630, + "data": { + "object": { + "object": "checkout_session", + "checkout_session": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "created": 1653341716, + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "expires_at": 1692896410, + } + }, + "type": "checkout.session.completed", + }, + ], + "has_more": False, + }, +} + + +@pytest.mark.parametrize( + "requests_mock_map, stream_name, sync_mode, state, expected_slices", + ( + ( + checkout_session_api_response, + "checkout_sessions_line_items", + "full_refresh", + {}, + [ + { + "parent": { + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "object": "checkout.session", + "created": 1699647441, + "updated": 1699647441, + "expires_at": 1699647441, + "payment_intent": "pi_1Gt0KQ2eZvKYlo2CeWXUgmhy", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_1OB18o2eZvKYlo2CObYam50U", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + } + }, + { + "parent": { + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "object": "checkout.session", + "created": 1699744164, + "updated": 1699744164, + "expires_at": 1699644174, + "payment_intent": "pi_lo2CeWXUgmhy1Gt0KQ2eZvKY", + "status": "open", + "line_items": { + "object": "list", + "has_more": False, + "url": "/v1/checkout/sessions", + "data": [ + { + "id": "li_KYlo2CObYam50U1OB18o2eZv", + "object": "item", + "amount_discount": 0, + "amount_subtotal": 0, + "amount_tax": 0, + "amount_total": 0, + "currency": "usd", + } + ], + }, + } + }, + ], + ), + ( + checkout_session_events_response, + "checkout_sessions_line_items", + "incremental", + {"checkout_session_updated": 1685898010}, + [ + { + "parent": { + "object": "checkout_session", + "checkout_session": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "created": 1653341716, + "id": "cs_test_a1yxusdFIgDDkWTaKn6JTYniMDBzrmnBiXH8oRSExZt7tcbIzIEoZk1Lre", + "expires_at": 1692896410, + "updated": 1699902016, + } + }, + { + "parent": { + "object": "checkout_session", + "checkout_session": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "created": 1653341716, + "updated": 1699901630, + "id": "cs_test_XH8oRSExZt7tcbIzIEoZk1Lrea1yxusdFIgDDkWTaKn6JTYniMDBzrmnBi", + "expires_at": 1692896410, + } + }, + ], + ), + ), +) +@freezegun.freeze_time("2023-08-23T15:00:15") +def test_parent_incremental_substream_stream_slices( + requests_mock, requests_mock_map, stream_by_name, stream_name, sync_mode, state, expected_slices +): + for url, response in requests_mock_map.items(): + requests_mock.get(url, json=response) + + stream = stream_by_name(stream_name) + slices = stream.stream_slices(sync_mode, stream_state=state) + assert list(slices) == expected_slices + + +checkout_session_line_items_slice_to_record_data_map = { + "id": "checkout_session_id", + "expires_at": "checkout_session_expires_at", + "created": "checkout_session_created", + "updated": "checkout_session_updated", +} + + +@pytest.mark.parametrize( + "requests_mock_map, stream_name, sync_mode, state, mapped_fields", + ( + ( + {**checkout_session_api_response, **checkout_session_line_items_api_response}, + "checkout_sessions_line_items", + "full_refresh", + {}, + checkout_session_line_items_slice_to_record_data_map, + ), + ( + {**checkout_session_events_response, **checkout_session_line_items_api_response}, + "checkout_sessions_line_items", + "incremental", + {"checkout_session_updated": 1685898010}, + checkout_session_line_items_slice_to_record_data_map, + ), + ), +) +def test_parent_incremental_substream_records_contain_data_from_slice( + requests_mock, requests_mock_map, stream_by_name, stream_name, sync_mode, state, mapped_fields +): + for url, response in requests_mock_map.items(): + requests_mock.get(url, json=response) + + stream = stream_by_name(stream_name) + for slice_ in stream.stream_slices(sync_mode, stream_state=state): + for record in stream.read_records(sync_mode, stream_slice=slice_, stream_state=state): + for key, value in mapped_fields.items(): + assert slice_["parent"][key] == record[value] + + +@pytest.mark.parametrize( + "requests_mock_map, stream_name, state", + ( + ( + { + "/v1/events": ( + { + "data": [ + { + "id": "evt_1NdNFoEcXtiJtvvhBP5mxQmL", + "object": "event", + "api_version": "2020-08-27", + "created": 1699902016, + "data": { + "object": { + "object": "checkout_session", + "checkout_session": "cs_1K9GK0EcXtiJtvvhSo2LvGqT", + "created": 1653341716, + "id": "cs_1K9GK0EcXtiJtvvhSo2LvGqT", + "expires_at": 1692896410, + } + }, + "type": "checkout.session.completed", + } + ], + "has_more": False, + }, + 200, + ), + "/v1/checkout/sessions/cs_1K9GK0EcXtiJtvvhSo2LvGqT/line_items": ({}, 404), + }, + "checkout_sessions_line_items", + {"checkout_session_updated": 1686934810}, + ), + ), +) +@freezegun.freeze_time("2023-08-23T15:00:15") +def test_parent_incremental_substream_handles_404(requests_mock, requests_mock_map, stream_by_name, stream_name, state, caplog): + for url, (response, status) in requests_mock_map.items(): + requests_mock.get(url, json=response, status_code=status) + + stream = stream_by_name(stream_name) + records = read_from_stream(stream, "incremental", state) + assert records == [] + assert "Data was not found for URL" in caplog.text diff --git a/docs/integrations/sources/stripe-migrations.md b/docs/integrations/sources/stripe-migrations.md index 5dc7fa19f9b5..60f4be4d4ab4 100644 --- a/docs/integrations/sources/stripe-migrations.md +++ b/docs/integrations/sources/stripe-migrations.md @@ -1,5 +1,20 @@ # Stripe Migration Guide +## Upgrading to 5.0.0 + +This change fixes multiple incremental sync issues with the `Refunds`, `Checkout Sessions` and `Checkout Sessions Line Items` streams: + - `Refunds` stream was not syncing data in the incremental sync mode. Cursor field has been updated to "created" to allow for incremental syncs. Because of the changed cursor field of the `Refunds` stream, incremental syncs will not reflect every update of the records that have been previously replicated. Only newly created records will be synced. To always have the up-to-date data, users are encouraged to make use of the lookback window. + - `CheckoutSessions` stream had been missing data for one day when using the incremental sync mode after a reset; this has been resolved. + - `CheckoutSessionsLineItems` previously had potential data loss. It has been updated to use a new cursor field `checkout_session_updated`. + - Incremental streams with the `created` cursor had been duplicating some data; this has been fixed. + +Stream schema update is a breaking change as well as changing the cursor field for the `Refunds` and the `CheckoutSessionsLineItems` stream. A schema refresh and data reset of all effected streams is required after the update is applied. + +Also, this update affects three more streams: `Invoices`, `Subscriptions`, `SubscriptionSchedule`. Schemas are changed in this update so that the declared data types would match the actual data. + +Stream schema update is a breaking change as well as changing the cursor field for the `Refunds` and the `CheckoutSessionsLineItems` stream. A schema refresh and data reset of all effected streams is required after the update is applied. +Because of the changed cursor field of the `Refunds` stream, incremental syncs will not reflect every update of the records that have been previously replicated. Only newly created records will be synced. To always have the up-to-date data, users are encouraged to make use of the lookback window. + ## Upgrading to 4.0.0 A major update of most streams to support event-based incremental sync mode. This allows the connector to pull not only the newly created data since the last sync, but the modified data as well. diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index bde9f9d6d0e6..3f949c6847e6 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -36,7 +36,7 @@ For more information on Stripe API Keys, see the [Stripe documentation](https:// 5. For **Account ID**, enter your Stripe Account ID. This ID begins with `acct_`, and can be found in the top-right corner of your Stripe [account settings page](https://dashboard.stripe.com/settings/account). 6. For **Secret Key**, enter the restricted key you created for the connection. 7. For **Replication Start Date**, use the provided datepicker or enter a UTC date and time programmatically in the format `YYYY-MM-DDTHH:mm:ssZ`. The data added on and after this date will be replicated. -8. (Optional) For **Lookback Window**, you may specify a number of days from the present day to reread data. This allows the connector to retrieve data that might have been updated after its initial creation, and is useful for handling any post-transaction adjustments. This applies only to streams that do not support event-based incremental syncs, please see the list below. +8. (Optional) For **Lookback Window**, you may specify a number of days from the present day to reread data. This allows the connector to retrieve data that might have been updated after its initial creation, and is useful for handling any post-transaction adjustments. This applies only to streams that do not support event-based incremental syncs, please see [the list below](#troubleshooting). - Leaving the **Lookback Window** at its default value of 0 means Airbyte will not re-export data after it has been synced. - Setting the **Lookback Window** to 1 means Airbyte will re-export data from the past day, capturing any changes made in the last 24 hours. @@ -143,26 +143,27 @@ Please be aware: this also means that any change older than 30 days will not be ::: ### Troubleshooting -:::note + Since the Stripe API does not allow querying objects which were updated since the last sync, the Stripe connector uses the Events API under the hood to implement incremental syncs and export data based on its update date. However, not all the entities are supported by the Events API, so the Stripe connector uses the `created` field or its analogue to query for new data in your Stripe account. These are the entities synced based on the date of creation: -- `BalanceTransactions` -- `CheckoutSessionLineItems` (cursor field is `checkout_session_expires_at`) +- `Balance Transactions` - `Events` -- `FileLinks` +- `File Links` - `Files` -- `SetupAttempts` -- `ShippingRates` +- `Refunds` +- `Setup Attempts` +- `Shipping Rates` On the other hand, the following streams use the `updated` field value as a cursor: - `Application Fees` - `Application Fee Refunds` - `Authorizations` -- `Bank accounts` +- `Bank Accounts` - `Cardholders` - `Cards` - `Charges` - `Checkout Sessions` +- `Checkout Session Line Items` (cursor field is `checkout_session_updated`) - `Coupons` - `Credit Notes` - `Customer Balance Transactions` @@ -180,7 +181,6 @@ On the other hand, the following streams use the `updated` field value as a curs - `Plans` - `Prices` - `Products` -- `Refunds` - `Reviews` - `Setup Intents` - `Subscription Schedule` @@ -188,9 +188,9 @@ On the other hand, the following streams use the `updated` field value as a curs - `Top Ups` - `Transactions` - `Transfers` -::: -:::note +## Incremental deletes + The Stripe API also provides a way to implement incremental deletes for a limited number of streams: - `Bank Accounts` - `Coupons` @@ -206,98 +206,99 @@ The Stripe API also provides a way to implement incremental deletes for a limite - `Subscriptions` Each record is marked with `is_deleted` flag when the appropriate event happens upstream. -::: - * Check out common troubleshooting issues for the Stripe source connector on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions). +### Data type mapping + ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| -| 4.5.4 | 2023-11-16 | [32284](https://github.com/airbytehq/airbyte/pull/32284/) | Enable client-side rate limiting | -| 4.5.3 | 2023-11-14 | [32473](https://github.com/airbytehq/airbyte/pull/32473/) | Have all full_refresh stream syncs be concurrent | -| 4.5.2 | 2023-11-03 | [32146](https://github.com/airbytehq/airbyte/pull/32146/) | Fix multiple BankAccount issues | -| 4.5.1 | 2023-11-01 | [32056](https://github.com/airbytehq/airbyte/pull/32056/) | Use CDK version 0.52.8 | -| 4.5.0 | 2023-10-25 | [31327](https://github.com/airbytehq/airbyte/pull/31327/) | Use concurrent CDK when running in full-refresh | -| 4.4.2 | 2023-10-24 | [31764](https://github.com/airbytehq/airbyte/pull/31764) | Base image migration: remove Dockerfile and use the python-connector-base image | -| 4.4.1 | 2023-10-18 | [31553](https://github.com/airbytehq/airbyte/pull/31553) | Adjusted `Setup Attempts` and extended `Checkout Sessions` stream schemas | -| 4.4.0 | 2023-10-04 | [31046](https://github.com/airbytehq/airbyte/pull/31046) | Added margins field to invoice_line_items stream. | -| 4.3.1 | 2023-09-27 | [30800](https://github.com/airbytehq/airbyte/pull/30800) | Handle permission issues a non breaking | -| 4.3.0 | 2023-09-26 | [30752](https://github.com/airbytehq/airbyte/pull/30752) | Do not sync upcoming invoices, extend stream schemas | -| 4.2.0 | 2023-09-21 | [30660](https://github.com/airbytehq/airbyte/pull/30660) | Fix updated state for the incremental syncs | -| 4.1.1 | 2023-09-15 | [30494](https://github.com/airbytehq/airbyte/pull/30494) | Fix datatype of invoices.lines property | -| 4.1.0 | 2023-08-29 | [29950](https://github.com/airbytehq/airbyte/pull/29950) | Implement incremental deletes, add suggested streams | -| 4.0.1 | 2023-09-07 | [30254](https://github.com/airbytehq/airbyte/pull/30254) | Fix cursorless incremental streams | -| 4.0.0 | 2023-08-15 | [29330](https://github.com/airbytehq/airbyte/pull/29330) | Implement incremental syncs based on date of update | -| 3.17.4 | 2023-08-15 | [29425](https://github.com/airbytehq/airbyte/pull/29425) | Revert 3.17.3 | -| 3.17.3 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Revert 3.17.2 and fix atm_fee property | -| 3.17.2 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Fix stream schemas, remove custom 403 error handling | -| 3.17.1 | 2023-08-01 | [28887](https://github.com/airbytehq/airbyte/pull/28887) | Fix `Invoices` schema | -| 3.17.0 | 2023-07-28 | [26127](https://github.com/airbytehq/airbyte/pull/26127) | Add `Prices` stream | -| 3.16.0 | 2023-07-27 | [28776](https://github.com/airbytehq/airbyte/pull/28776) | Add new fields to stream schemas | -| 3.15.0 | 2023-07-09 | [28709](https://github.com/airbytehq/airbyte/pull/28709) | Remove duplicate streams | -| 3.14.0 | 2023-07-09 | [27217](https://github.com/airbytehq/airbyte/pull/27217) | Add `ShippingRates` stream | -| 3.13.0 | 2023-07-18 | [28466](https://github.com/airbytehq/airbyte/pull/28466) | Pin source API version | -| 3.12.0 | 2023-05-20 | [26208](https://github.com/airbytehq/airbyte/pull/26208) | Add new stream `Persons` | -| 3.11.0 | 2023-06-26 | [27734](https://github.com/airbytehq/airbyte/pull/27734) | License Update: Elv2 stream | -| 3.10.0 | 2023-06-22 | [27132](https://github.com/airbytehq/airbyte/pull/27132) | Add `CreditNotes` stream | -| 3.9.1 | 2023-06-20 | [27522](https://github.com/airbytehq/airbyte/pull/27522) | Fix formatting | -| 3.9.0 | 2023-06-19 | [27362](https://github.com/airbytehq/airbyte/pull/27362) | Add new Streams: Transfer Reversals, Setup Attempts, Usage Records, Transactions | -| 3.8.0 | 2023-06-12 | [27238](https://github.com/airbytehq/airbyte/pull/27238) | Add `Topups` stream; Add `Files` stream; Add `FileLinks` stream | -| 3.7.0 | 2023-06-06 | [27083](https://github.com/airbytehq/airbyte/pull/27083) | Add new Streams: Authorizations, Cardholders, Cards, Payment Methods, Reviews | -| 3.6.0 | 2023-05-24 | [25893](https://github.com/airbytehq/airbyte/pull/25893) | Add `ApplicationFeesRefunds` stream with parent `ApplicationFees` | -| 3.5.0 | 2023-05-20 | [22859](https://github.com/airbytehq/airbyte/pull/22859) | Add stream `Early Fraud Warnings` | -| 3.4.3 | 2023-05-10 | [25965](https://github.com/airbytehq/airbyte/pull/25965) | Fix Airbyte date-time data-types | -| 3.4.2 | 2023-05-04 | [25795](https://github.com/airbytehq/airbyte/pull/25795) | Added `CDK TypeTransformer` to guarantee declared JSON Schema data-types | -| 3.4.1 | 2023-04-24 | [23389](https://github.com/airbytehq/airbyte/pull/23389) | Add `customer_tax_ids` to `Invoices` | -| 3.4.0 | 2023-03-20 | [23963](https://github.com/airbytehq/airbyte/pull/23963) | Add `SetupIntents` stream | -| 3.3.0 | 2023-04-12 | [25136](https://github.com/airbytehq/airbyte/pull/25136) | Add stream `Accounts` | -| 3.2.0 | 2023-04-10 | [23624](https://github.com/airbytehq/airbyte/pull/23624) | Add new stream `Subscription Schedule` | -| 3.1.0 | 2023-03-10 | [19906](https://github.com/airbytehq/airbyte/pull/19906) | Expand `tiers` when syncing `Plans` streams | -| 3.0.5 | 2023-03-25 | [22866](https://github.com/airbytehq/airbyte/pull/22866) | Specified date formatting in specification | -| 3.0.4 | 2023-03-24 | [24471](https://github.com/airbytehq/airbyte/pull/24471) | Fix stream slices for single sliced streams | -| 3.0.3 | 2023-03-17 | [24179](https://github.com/airbytehq/airbyte/pull/24179) | Get customer's attributes safely | -| 3.0.2 | 2023-03-13 | [24051](https://github.com/airbytehq/airbyte/pull/24051) | Cache `customers` stream; Do not request transactions of customers with zero balance. | -| 3.0.1 | 2023-02-22 | [22898](https://github.com/airbytehq/airbyte/pull/22898) | Add missing column to Subscriptions stream | -| 3.0.0 | 2023-02-21 | [23295](https://github.com/airbytehq/airbyte/pull/23295) | Fix invoice schema | -| 2.0.0 | 2023-02-14 | [22312](https://github.com/airbytehq/airbyte/pull/22312) | Another fix of `Invoices` stream schema + Remove http urls from openapi_spec.json | -| 1.0.2 | 2023-02-09 | [22659](https://github.com/airbytehq/airbyte/pull/22659) | Set `AvailabilityStrategy` for all streams | -| 1.0.1 | 2023-01-27 | [22042](https://github.com/airbytehq/airbyte/pull/22042) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 1.0.0 | 2023-01-25 | [21858](https://github.com/airbytehq/airbyte/pull/21858) | Update the `Subscriptions` and `Invoices` stream schemas | -| 0.1.40 | 2022-10-20 | [18228](https://github.com/airbytehq/airbyte/pull/18228) | Update the `PaymentIntents` stream schema | -| 0.1.39 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream states. | -| 0.1.38 | 2022-09-09 | [16537](https://github.com/airbytehq/airbyte/pull/16537) | Fix `redeem_by` field type for `customers` stream | -| 0.1.37 | 2022-08-16 | [15686](https://github.com/airbytehq/airbyte/pull/15686) | Fix the bug when the stream couldn't be fetched due to limited permission set, if so - it should be skipped | -| 0.1.36 | 2022-08-04 | [15292](https://github.com/airbytehq/airbyte/pull/15292) | Implement slicing | -| 0.1.35 | 2022-07-21 | [14924](https://github.com/airbytehq/airbyte/pull/14924) | Remove `additionalProperties` field from spec and schema | -| 0.1.34 | 2022-07-01 | [14357](https://github.com/airbytehq/airbyte/pull/14357) | Add external account streams - | -| 0.1.33 | 2022-06-06 | [13449](https://github.com/airbytehq/airbyte/pull/13449) | Add semi-incremental support for CheckoutSessions and CheckoutSessionsLineItems streams, fixed big in StripeSubStream, added unittests, updated docs | -| 0.1.32 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | -| 0.1.31 | 2022-04-20 | [12230](https://github.com/airbytehq/airbyte/pull/12230) | Update connector to use a `spec.yaml` | -| 0.1.30 | 2022-03-21 | [11286](https://github.com/airbytehq/airbyte/pull/11286) | Minor corrections to documentation and connector specification | -| 0.1.29 | 2022-03-08 | [10359](https://github.com/airbytehq/airbyte/pull/10359) | Improved performance for streams with substreams: invoice_line_items, subscription_items, bank_accounts | -| 0.1.28 | 2022-02-08 | [10165](https://github.com/airbytehq/airbyte/pull/10165) | Improve 404 handling for `CheckoutSessionsLineItems` stream | -| 0.1.27 | 2021-12-28 | [9148](https://github.com/airbytehq/airbyte/pull/9148) | Fix `date`, `arrival\_date` fields | -| 0.1.26 | 2021-12-21 | [8992](https://github.com/airbytehq/airbyte/pull/8992) | Fix type `events.request` in schema | -| 0.1.25 | 2021-11-25 | [8250](https://github.com/airbytehq/airbyte/pull/8250) | Rearrange setup fields | -| 0.1.24 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Include tax data in `checkout_sessions_line_items` stream | -| 0.1.23 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Correct `payment_intents` schema | -| 0.1.22 | 2021-11-05 | [7345](https://github.com/airbytehq/airbyte/pull/7345) | Add 3 new streams | -| 0.1.21 | 2021-10-07 | [6841](https://github.com/airbytehq/airbyte/pull/6841) | Fix missing `start_date` argument + update json files for SAT | -| 0.1.20 | 2021-09-30 | [6017](https://github.com/airbytehq/airbyte/pull/6017) | Add lookback_window_days parameter | -| 0.1.19 | 2021-09-27 | [6466](https://github.com/airbytehq/airbyte/pull/6466) | Use `start_date` parameter in incremental streams | -| 0.1.18 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Fix coupons and subscriptions stream schemas by removing incorrect timestamp formatting | -| 0.1.17 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Add `PaymentIntents` stream | -| 0.1.16 | 2021-07-28 | [4980](https://github.com/airbytehq/airbyte/pull/4980) | Remove Updated field from schemas | -| 0.1.15 | 2021-07-21 | [4878](https://github.com/airbytehq/airbyte/pull/4878) | Fix incorrect percent_off and discounts data filed types | -| 0.1.14 | 2021-07-09 | [4669](https://github.com/airbytehq/airbyte/pull/4669) | Subscriptions Stream now returns all kinds of subscriptions \(including expired and canceled\) | -| 0.1.13 | 2021-07-03 | [4528](https://github.com/airbytehq/airbyte/pull/4528) | Remove regex for acc validation | -| 0.1.12 | 2021-06-08 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | -| 0.1.11 | 2021-05-30 | [3744](https://github.com/airbytehq/airbyte/pull/3744) | Fix types in schema | -| 0.1.10 | 2021-05-28 | [3728](https://github.com/airbytehq/airbyte/pull/3728) | Update data types to be number instead of int | -| 0.1.9 | 2021-05-13 | [3367](https://github.com/airbytehq/airbyte/pull/3367) | Add acceptance tests for connected accounts | -| 0.1.8 | 2021-05-11 | [3566](https://github.com/airbytehq/airbyte/pull/3368) | Bump CDK connectors | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 5.0.0 | 2023-11-14 | [32286](https://github.com/airbytehq/airbyte/pull/32286/) | Fix multiple issues regarding usage of the incremental sync mode for the `Refunds`, `CheckoutSessions`, `CheckoutSessionsLineItems` streams. Fix schemas for the streams: `Invoices`, `Subscriptions`, `SubscriptionSchedule` | +| 4.5.4 | 2023-11-16 | [32284](https://github.com/airbytehq/airbyte/pull/32284/) | Enable client-side rate limiting | +| 4.5.3 | 2023-11-14 | [32473](https://github.com/airbytehq/airbyte/pull/32473/) | Have all full_refresh stream syncs be concurrent | +| 4.5.2 | 2023-11-03 | [32146](https://github.com/airbytehq/airbyte/pull/32146/) | Fix multiple BankAccount issues | +| 4.5.1 | 2023-11-01 | [32056](https://github.com/airbytehq/airbyte/pull/32056/) | Use CDK version 0.52.8 | +| 4.5.0 | 2023-10-25 | [31327](https://github.com/airbytehq/airbyte/pull/31327/) | Use concurrent CDK when running in full-refresh | +| 4.4.2 | 2023-10-24 | [31764](https://github.com/airbytehq/airbyte/pull/31764) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 4.4.1 | 2023-10-18 | [31553](https://github.com/airbytehq/airbyte/pull/31553) | Adjusted `Setup Attempts` and extended `Checkout Sessions` stream schemas | +| 4.4.0 | 2023-10-04 | [31046](https://github.com/airbytehq/airbyte/pull/31046) | Added margins field to invoice_line_items stream. | +| 4.3.1 | 2023-09-27 | [30800](https://github.com/airbytehq/airbyte/pull/30800) | Handle permission issues a non breaking | +| 4.3.0 | 2023-09-26 | [30752](https://github.com/airbytehq/airbyte/pull/30752) | Do not sync upcoming invoices, extend stream schemas | +| 4.2.0 | 2023-09-21 | [30660](https://github.com/airbytehq/airbyte/pull/30660) | Fix updated state for the incremental syncs | +| 4.1.1 | 2023-09-15 | [30494](https://github.com/airbytehq/airbyte/pull/30494) | Fix datatype of invoices.lines property | +| 4.1.0 | 2023-08-29 | [29950](https://github.com/airbytehq/airbyte/pull/29950) | Implement incremental deletes, add suggested streams | +| 4.0.1 | 2023-09-07 | [30254](https://github.com/airbytehq/airbyte/pull/30254) | Fix cursorless incremental streams | +| 4.0.0 | 2023-08-15 | [29330](https://github.com/airbytehq/airbyte/pull/29330) | Implement incremental syncs based on date of update | +| 3.17.4 | 2023-08-15 | [29425](https://github.com/airbytehq/airbyte/pull/29425) | Revert 3.17.3 | +| 3.17.3 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Revert 3.17.2 and fix atm_fee property | +| 3.17.2 | 2023-08-01 | [28911](https://github.com/airbytehq/airbyte/pull/28911) | Fix stream schemas, remove custom 403 error handling | +| 3.17.1 | 2023-08-01 | [28887](https://github.com/airbytehq/airbyte/pull/28887) | Fix `Invoices` schema | +| 3.17.0 | 2023-07-28 | [26127](https://github.com/airbytehq/airbyte/pull/26127) | Add `Prices` stream | +| 3.16.0 | 2023-07-27 | [28776](https://github.com/airbytehq/airbyte/pull/28776) | Add new fields to stream schemas | +| 3.15.0 | 2023-07-09 | [28709](https://github.com/airbytehq/airbyte/pull/28709) | Remove duplicate streams | +| 3.14.0 | 2023-07-09 | [27217](https://github.com/airbytehq/airbyte/pull/27217) | Add `ShippingRates` stream | +| 3.13.0 | 2023-07-18 | [28466](https://github.com/airbytehq/airbyte/pull/28466) | Pin source API version | +| 3.12.0 | 2023-05-20 | [26208](https://github.com/airbytehq/airbyte/pull/26208) | Add new stream `Persons` | +| 3.11.0 | 2023-06-26 | [27734](https://github.com/airbytehq/airbyte/pull/27734) | License Update: Elv2 stream | +| 3.10.0 | 2023-06-22 | [27132](https://github.com/airbytehq/airbyte/pull/27132) | Add `CreditNotes` stream | +| 3.9.1 | 2023-06-20 | [27522](https://github.com/airbytehq/airbyte/pull/27522) | Fix formatting | +| 3.9.0 | 2023-06-19 | [27362](https://github.com/airbytehq/airbyte/pull/27362) | Add new Streams: Transfer Reversals, Setup Attempts, Usage Records, Transactions | +| 3.8.0 | 2023-06-12 | [27238](https://github.com/airbytehq/airbyte/pull/27238) | Add `Topups` stream; Add `Files` stream; Add `FileLinks` stream | +| 3.7.0 | 2023-06-06 | [27083](https://github.com/airbytehq/airbyte/pull/27083) | Add new Streams: Authorizations, Cardholders, Cards, Payment Methods, Reviews | +| 3.6.0 | 2023-05-24 | [25893](https://github.com/airbytehq/airbyte/pull/25893) | Add `ApplicationFeesRefunds` stream with parent `ApplicationFees` | +| 3.5.0 | 2023-05-20 | [22859](https://github.com/airbytehq/airbyte/pull/22859) | Add stream `Early Fraud Warnings` | +| 3.4.3 | 2023-05-10 | [25965](https://github.com/airbytehq/airbyte/pull/25965) | Fix Airbyte date-time data-types | +| 3.4.2 | 2023-05-04 | [25795](https://github.com/airbytehq/airbyte/pull/25795) | Added `CDK TypeTransformer` to guarantee declared JSON Schema data-types | +| 3.4.1 | 2023-04-24 | [23389](https://github.com/airbytehq/airbyte/pull/23389) | Add `customer_tax_ids` to `Invoices` | +| 3.4.0 | 2023-03-20 | [23963](https://github.com/airbytehq/airbyte/pull/23963) | Add `SetupIntents` stream | +| 3.3.0 | 2023-04-12 | [25136](https://github.com/airbytehq/airbyte/pull/25136) | Add stream `Accounts` | +| 3.2.0 | 2023-04-10 | [23624](https://github.com/airbytehq/airbyte/pull/23624) | Add new stream `Subscription Schedule` | +| 3.1.0 | 2023-03-10 | [19906](https://github.com/airbytehq/airbyte/pull/19906) | Expand `tiers` when syncing `Plans` streams | +| 3.0.5 | 2023-03-25 | [22866](https://github.com/airbytehq/airbyte/pull/22866) | Specified date formatting in specification | +| 3.0.4 | 2023-03-24 | [24471](https://github.com/airbytehq/airbyte/pull/24471) | Fix stream slices for single sliced streams | +| 3.0.3 | 2023-03-17 | [24179](https://github.com/airbytehq/airbyte/pull/24179) | Get customer's attributes safely | +| 3.0.2 | 2023-03-13 | [24051](https://github.com/airbytehq/airbyte/pull/24051) | Cache `customers` stream; Do not request transactions of customers with zero balance. | +| 3.0.1 | 2023-02-22 | [22898](https://github.com/airbytehq/airbyte/pull/22898) | Add missing column to Subscriptions stream | +| 3.0.0 | 2023-02-21 | [23295](https://github.com/airbytehq/airbyte/pull/23295) | Fix invoice schema | +| 2.0.0 | 2023-02-14 | [22312](https://github.com/airbytehq/airbyte/pull/22312) | Another fix of `Invoices` stream schema + Remove http urls from openapi_spec.json | +| 1.0.2 | 2023-02-09 | [22659](https://github.com/airbytehq/airbyte/pull/22659) | Set `AvailabilityStrategy` for all streams | +| 1.0.1 | 2023-01-27 | [22042](https://github.com/airbytehq/airbyte/pull/22042) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 1.0.0 | 2023-01-25 | [21858](https://github.com/airbytehq/airbyte/pull/21858) | Update the `Subscriptions` and `Invoices` stream schemas | +| 0.1.40 | 2022-10-20 | [18228](https://github.com/airbytehq/airbyte/pull/18228) | Update the `PaymentIntents` stream schema | +| 0.1.39 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream states. | +| 0.1.38 | 2022-09-09 | [16537](https://github.com/airbytehq/airbyte/pull/16537) | Fix `redeem_by` field type for `customers` stream | +| 0.1.37 | 2022-08-16 | [15686](https://github.com/airbytehq/airbyte/pull/15686) | Fix the bug when the stream couldn't be fetched due to limited permission set, if so - it should be skipped | +| 0.1.36 | 2022-08-04 | [15292](https://github.com/airbytehq/airbyte/pull/15292) | Implement slicing | +| 0.1.35 | 2022-07-21 | [14924](https://github.com/airbytehq/airbyte/pull/14924) | Remove `additionalProperties` field from spec and schema | +| 0.1.34 | 2022-07-01 | [14357](https://github.com/airbytehq/airbyte/pull/14357) | Add external account streams - | +| 0.1.33 | 2022-06-06 | [13449](https://github.com/airbytehq/airbyte/pull/13449) | Add semi-incremental support for CheckoutSessions and CheckoutSessionsLineItems streams, fixed big in StripeSubStream, added unittests, updated docs | +| 0.1.32 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | +| 0.1.31 | 2022-04-20 | [12230](https://github.com/airbytehq/airbyte/pull/12230) | Update connector to use a `spec.yaml` | +| 0.1.30 | 2022-03-21 | [11286](https://github.com/airbytehq/airbyte/pull/11286) | Minor corrections to documentation and connector specification | +| 0.1.29 | 2022-03-08 | [10359](https://github.com/airbytehq/airbyte/pull/10359) | Improved performance for streams with substreams: invoice_line_items, subscription_items, bank_accounts | +| 0.1.28 | 2022-02-08 | [10165](https://github.com/airbytehq/airbyte/pull/10165) | Improve 404 handling for `CheckoutSessionsLineItems` stream | +| 0.1.27 | 2021-12-28 | [9148](https://github.com/airbytehq/airbyte/pull/9148) | Fix `date`, `arrival\_date` fields | +| 0.1.26 | 2021-12-21 | [8992](https://github.com/airbytehq/airbyte/pull/8992) | Fix type `events.request` in schema | +| 0.1.25 | 2021-11-25 | [8250](https://github.com/airbytehq/airbyte/pull/8250) | Rearrange setup fields | +| 0.1.24 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Include tax data in `checkout_sessions_line_items` stream | +| 0.1.23 | 2021-11-08 | [7729](https://github.com/airbytehq/airbyte/pull/7729) | Correct `payment_intents` schema | +| 0.1.22 | 2021-11-05 | [7345](https://github.com/airbytehq/airbyte/pull/7345) | Add 3 new streams | +| 0.1.21 | 2021-10-07 | [6841](https://github.com/airbytehq/airbyte/pull/6841) | Fix missing `start_date` argument + update json files for SAT | +| 0.1.20 | 2021-09-30 | [6017](https://github.com/airbytehq/airbyte/pull/6017) | Add lookback_window_days parameter | +| 0.1.19 | 2021-09-27 | [6466](https://github.com/airbytehq/airbyte/pull/6466) | Use `start_date` parameter in incremental streams | +| 0.1.18 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Fix coupons and subscriptions stream schemas by removing incorrect timestamp formatting | +| 0.1.17 | 2021-09-14 | [6004](https://github.com/airbytehq/airbyte/pull/6004) | Add `PaymentIntents` stream | +| 0.1.16 | 2021-07-28 | [4980](https://github.com/airbytehq/airbyte/pull/4980) | Remove Updated field from schemas | +| 0.1.15 | 2021-07-21 | [4878](https://github.com/airbytehq/airbyte/pull/4878) | Fix incorrect percent_off and discounts data filed types | +| 0.1.14 | 2021-07-09 | [4669](https://github.com/airbytehq/airbyte/pull/4669) | Subscriptions Stream now returns all kinds of subscriptions \(including expired and canceled\) | +| 0.1.13 | 2021-07-03 | [4528](https://github.com/airbytehq/airbyte/pull/4528) | Remove regex for acc validation | +| 0.1.12 | 2021-06-08 | [3973](https://github.com/airbytehq/airbyte/pull/3973) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | +| 0.1.11 | 2021-05-30 | [3744](https://github.com/airbytehq/airbyte/pull/3744) | Fix types in schema | +| 0.1.10 | 2021-05-28 | [3728](https://github.com/airbytehq/airbyte/pull/3728) | Update data types to be number instead of int | +| 0.1.9 | 2021-05-13 | [3367](https://github.com/airbytehq/airbyte/pull/3367) | Add acceptance tests for connected accounts | +| 0.1.8 | 2021-05-11 | [3566](https://github.com/airbytehq/airbyte/pull/3368) | Bump CDK connectors | From 3da9d84b03d5a650390283cc6e36305249d49dcd Mon Sep 17 00:00:00 2001 From: Stephane Geneix <147216312+stephane-airbyte@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:14:27 -0800 Subject: [PATCH 12/57] bump source-mysql-strict-encrypt to 3.1.7 (#32611) --- .../connectors/source-mysql-strict-encrypt/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml b/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml index 885c843f1f95..e7aa7fb15b86 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: database connectorType: source definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad - dockerImageTag: 3.1.5 + dockerImageTag: 3.1.7 dockerRepository: airbyte/source-mysql-strict-encrypt githubIssueLabel: source-mysql icon: mysql.svg From c477dc48463436edeb818c1db7edbc0fd2f3fc69 Mon Sep 17 00:00:00 2001 From: Patrick Nilan Date: Thu, 16 Nov 2023 10:29:56 -0800 Subject: [PATCH 13/57] Paypal Transaction Source: Updates QL to 200 (#32390) --- .../connectors/source-paypal-transaction/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml b/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml index b5d777e36eda..1821fdddaddc 100644 --- a/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml +++ b/airbyte-integrations/connectors/source-paypal-transaction/metadata.yaml @@ -1,6 +1,6 @@ data: ab_internal: - ql: 400 + ql: 200 sl: 200 allowedHosts: hosts: From 8b07903f75585f5958c2f5acc926dbb5377496d2 Mon Sep 17 00:00:00 2001 From: Patrick Nilan Date: Thu, 16 Nov 2023 10:30:07 -0800 Subject: [PATCH 14/57] Chargebee Source: Updated QL in metadata (#32382) --- airbyte-integrations/connectors/source-chargebee/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-chargebee/metadata.yaml b/airbyte-integrations/connectors/source-chargebee/metadata.yaml index 52df4ecc5974..854035925e39 100644 --- a/airbyte-integrations/connectors/source-chargebee/metadata.yaml +++ b/airbyte-integrations/connectors/source-chargebee/metadata.yaml @@ -1,6 +1,6 @@ data: ab_internal: - ql: 400 + ql: 200 sl: 200 allowedHosts: hosts: From 1cc5e533ba486c340eac1caed0b03aa573290347 Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Thu, 16 Nov 2023 15:56:39 -0300 Subject: [PATCH 15/57] Source Cart: fix schema and allow to use connector on Airbyte Cloud (#32517) --- .../connectors/source-cart/Dockerfile | 2 +- .../source-cart/acceptance-test-config.yml | 71 +++++++++++-------- .../connectors/source-cart/metadata.yaml | 4 +- .../source_cart/schemas/addresses.json | 5 ++ .../source_cart/schemas/customers_cart.json | 16 ++++- .../source_cart/schemas/order_items.json | 65 +++++++++++++++++ .../source_cart/schemas/order_payments.json | 2 + .../source_cart/schemas/order_statuses.json | 5 +- .../source_cart/schemas/orders.json | 19 +++++ .../source_cart/schemas/products.json | 2 + docs/integrations/sources/cart.md | 1 + 11 files changed, 158 insertions(+), 34 deletions(-) diff --git a/airbyte-integrations/connectors/source-cart/Dockerfile b/airbyte-integrations/connectors/source-cart/Dockerfile index 4323b70cdd86..d526813a96ab 100644 --- a/airbyte-integrations/connectors/source-cart/Dockerfile +++ b/airbyte-integrations/connectors/source-cart/Dockerfile @@ -21,5 +21,5 @@ COPY source_cart ./source_cart ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.1 +LABEL io.airbyte.version=0.3.0 LABEL io.airbyte.name=airbyte/source-cart diff --git a/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml b/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml index 3ab3140291d3..52803b05481a 100644 --- a/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-cart/acceptance-test-config.yml @@ -1,38 +1,51 @@ # See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) # for more information about how to configure these tests connector_image: airbyte/source-cart:dev -tests: +test_strictness_level: low +acceptance_tests: spec: - - spec_path: "source_cart/spec.json" - backward_compatibility_tests_config: - disable_for_version: "0.1.6" + tests: + - spec_path: "source_cart/spec.json" + backward_compatibility_tests_config: + disable_for_version: "0.1.6" connection: - - config_path: "secrets/config.json" - status: "succeed" - - config_path: "integration_tests/invalid_config.json" - status: "failed" - timeout_seconds: 180 + tests: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + timeout_seconds: 180 discovery: - - config_path: "secrets/config.json" - backward_compatibility_tests_config: - disable_for_version: "0.1.6" + tests: + - config_path: "secrets/config.json" + backward_compatibility_tests_config: + disable_for_version: "0.1.6" basic_read: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - timeout_seconds: 1800 + tests: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 1800 + empty_streams: + - name: "order_payments" + bypass_reason: "no data" + - name: "products" + bypass_reason: "no data" incremental: - - config_path: "secrets/config_central_api_router.json" - configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" - future_state_path: "integration_tests/abnormal_state.json" - timeout_seconds: 1800 - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - future_state_path: "integration_tests/abnormal_state.json" - timeout_seconds: 1800 + tests: + # - config_path: "secrets/config_central_api_router.json" + # configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" + # future_state_path: "integration_tests/abnormal_state.json" + # timeout_seconds: 1800 + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + future_state: + future_state_path: "integration_tests/abnormal_state.json" + timeout_seconds: 1800 full_refresh: - - config_path: "secrets/config_central_api_router.json" - configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" - timeout_seconds: 1800 - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - timeout_seconds: 1800 + tests: + - config_path: "secrets/config_central_api_router.json" + configured_catalog_path: "integration_tests/configured_catalog_wo_order_statuses.json" + timeout_seconds: 1800 + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + timeout_seconds: 1800 diff --git a/airbyte-integrations/connectors/source-cart/metadata.yaml b/airbyte-integrations/connectors/source-cart/metadata.yaml index 8f75a5db4ceb..191e92810614 100644 --- a/airbyte-integrations/connectors/source-cart/metadata.yaml +++ b/airbyte-integrations/connectors/source-cart/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: api connectorType: source definitionId: bb1a6d31-6879-4819-a2bd-3eed299ea8e2 - dockerImageTag: 0.2.1 + dockerImageTag: 0.3.0 dockerRepository: airbyte/source-cart githubIssueLabel: source-cart icon: cart.svg @@ -10,7 +10,7 @@ data: name: Cart.com registries: cloud: - enabled: false + enabled: true oss: enabled: true releaseStage: alpha diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json index 98fe097f1779..e7d377656a16 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/addresses.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -10,6 +12,9 @@ "address_line_1": { "type": ["string", "null"] }, + "address_type": { + "type": ["string", "null"] + }, "address_line_2": { "type": ["string", "null"] }, diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json index 8520252b4485..23c4e341dce3 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/customers_cart.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -7,6 +9,12 @@ "customer_number": { "type": ["string", "null"] }, + "credit_limit": { + "type": ["string", "null"] + }, + "payment_net_term": { + "type": ["string", "null"] + }, "last_name": { "type": ["string", "null"] }, @@ -38,7 +46,13 @@ "type": ["integer", "null"] }, "is_no_tax_customer": { - "type": "boolean" + "type": ["boolean", "null"] + }, + "is_inactive": { + "type": ["boolean", "null"] + }, + "lock_default_address": { + "type": ["boolean", "null"] }, "comments": { "type": ["string", "null"] diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json index e803c78c5ac1..b7223e79fd55 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_items.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -81,6 +83,69 @@ }, "warehouse_id": { "type": ["integer", "null"] + }, + "configuration": { + "type": ["string", "null"] + }, + "description": { + "type": ["string", "null"] + }, + "discount_amount": { + "type": ["number", "null"] + }, + "discount_percentage": { + "type": ["number", "null"] + }, + "fitment": { + "type": ["string", "null"] + }, + "is_non_shipping_item": { + "type": ["boolean", "null"] + }, + "item_number_full": { + "type": ["string", "null"] + }, + "order_shipping_address_id": { + "type": ["string", "null"] + }, + "personalizations": { + "type": ["array", "null"] + }, + "selected_shipping_method": { + "type": ["string", "null"] + }, + "selected_shipping_method_id": { + "type": ["string", "null"] + }, + "selected_shipping_provider_service": { + "type": ["string", "null"] + }, + "shipping_total": { + "type": ["string", "null"] + }, + "status": { + "type": ["string", "null"] + }, + "tax": { + "type": ["number", "null"] + }, + "tax_code": { + "type": ["string", "null"] + }, + "variant_inventory_id": { + "type": ["string", "null"] + }, + "shipping_classification_code": { + "type": ["string", "null"] + }, + "variants": { + "type": ["array", "null"] + }, + "vendor_store_id": { + "type": ["integer", "null"] + }, + "weight_unit": { + "type": ["string", "null"] } } } diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json index f4dee9743008..ab2f2c844d71 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_payments.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json index eb7182c2f368..b77422eb2f54 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/order_statuses.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" }, "name": { "type": ["null", "string"] }, @@ -13,6 +15,7 @@ "created_at": { "type": ["null", "string"] }, "is_fully_refunded": { "type": ["null", "boolean"] }, "is_partially_refunded": { "type": ["null", "boolean"] }, - "is_quote_status": { "type": ["null", "boolean"] } + "is_quote_status": { "type": ["null", "boolean"] }, + "is_partially_shipped": { "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json index e5e7091efda4..f1ebdb8b5b9d 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/orders.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" @@ -7,6 +9,23 @@ "customer_id": { "type": ["integer", "null"] }, + "delivery_tax": { + "type": ["string", "null"] + }, + "entered_by_type": { + "type": ["string", "null"] + }, + "shipping_selections": { + "type": ["array", "null"], + "items": { + "type": ["null", "object"], + "additionalProperties": true, + "properties": {} + } + }, + "sales_agent_user_id": { + "type": ["string", "null"] + }, "customer_type_id": { "type": ["integer", "null"] }, diff --git a/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json b/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json index ed1473eb08a6..5d0ac08fa31a 100644 --- a/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json +++ b/airbyte-integrations/connectors/source-cart/source_cart/schemas/products.json @@ -1,5 +1,7 @@ { + "$schema": "http://json-schema.org/draft-07/schema#", "type": "object", + "additionalProperties": true, "properties": { "id": { "type": "integer" diff --git a/docs/integrations/sources/cart.md b/docs/integrations/sources/cart.md index 0559e6c7f3a3..90a618b956c6 100644 --- a/docs/integrations/sources/cart.md +++ b/docs/integrations/sources/cart.md @@ -50,6 +50,7 @@ Please follow these [steps](https://developers.cart.com/docs/rest-api/docs/READM | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :------------------------------------------------------------------------------------- | +| 0.3.0 | 2023-11-14 | [23317](https://github.com/airbytehq/airbyte/pull/23317) | Update schemas | | 0.2.1 | 2023-02-22 | [23317](https://github.com/airbytehq/airbyte/pull/23317) | Remove support for incremental for `order_statuses` stream | | 0.2.0 | 2022-09-21 | [16612](https://github.com/airbytehq/airbyte/pull/16612) | Source Cart.com: implement Central API Router access method and improve backoff policy | | 0.1.6 | 2022-07-15 | [14752](https://github.com/airbytehq/airbyte/pull/14752) | Add `order_statuses` stream | From b3f474c3ecd288b35e5022caa2b278d49c2518b6 Mon Sep 17 00:00:00 2001 From: Evan Tahler Date: Thu, 16 Nov 2023 14:54:53 -0800 Subject: [PATCH 16/57] Update License documentation (#32575) --- LICENSE | 26 ++++++----- docs/project-overview/licenses/license-faq.md | 46 +++++++++++++------ 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/LICENSE b/LICENSE index 814fd88f57f3..0df58b4829be 100644 --- a/LICENSE +++ b/LICENSE @@ -1,14 +1,17 @@ Airbyte monorepo uses multiple licenses. The license for a particular work is defined with following prioritized rules: + 1. License directly present in the file 2. LICENSE file in the same directory as the work -3. First LICENSE found when exploring parent directories up to the project top level directory -4. Defaults to Elastic License 2.0 +3. A `license` property defined in the `metadata.yaml` configuration file found when exploring parent directories (most connectors) +4. First LICENSE found when exploring parent directories up to the project top level directory +5. Defaults to Elastic License 2.0 If you have any question regarding licenses, just visit our [FAQ](https://airbyte.io/license-faq) or [contact us](mailto:license@airbyte.io). ------------------------------------------------------------------------------------- +--- + MIT License Copyright (c) 2020 Airbyte, Inc. @@ -31,7 +34,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------------- +--- + Elastic License 2.0 (ELv2) **Acceptance** @@ -65,16 +69,16 @@ If you use the software in violation of these terms, such use is not licensed, a As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim. **Definitions** -The *licensor* is the entity offering these terms, and the *software* is the software the licensor makes available under these terms, including any portion of it. +The _licensor_ is the entity offering these terms, and the _software_ is the software the licensor makes available under these terms, including any portion of it. -*you* refers to the individual or entity agreeing to these terms. +_you_ refers to the individual or entity agreeing to these terms. -*your company* is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. *control* means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. +_your company_ is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. _control_ means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. -*your licenses* are all the licenses granted to you for the software under these terms. +_your licenses_ are all the licenses granted to you for the software under these terms. -*use* means anything you do with the software requiring one of your licenses. +_use_ means anything you do with the software requiring one of your licenses. -*trademark* means trademarks, service marks, and similar rights. +_trademark_ means trademarks, service marks, and similar rights. ------------------------------------------------------------------------------------- +--- diff --git a/docs/project-overview/licenses/license-faq.md b/docs/project-overview/licenses/license-faq.md index 837ae5a5fd3d..6865094e4ba4 100644 --- a/docs/project-overview/licenses/license-faq.md +++ b/docs/project-overview/licenses/license-faq.md @@ -1,16 +1,19 @@ # License FAQ ## Airbyte Licensing Overview -* **Airbyte Connectors** are open sourced and available under the MIT License. -* **Airbyte Protocol** is open sourced and available under the MIT License. -* **Airbyte CDK** (Connector Development Kit) is open sourced and available under the MIT License. -* **Airbyte Core** is licensed under the Elastic License 2.0 (ELv2). -* **Airbyte Cloud & Airbyte Enterprise** are both closed source and require a commercial license from Airbyte. + +- **Airbyte Connectors** are open sourced and available under the [MIT](https://opensource.org/license/mit/) or [Elastic License 2.0 (ELv2)](https://www.elastic.co/licensing/elastic-license/faq) License. Each connector's `metadata.yaml` file contains more information. +- **Airbyte Protocol** is open sourced and available under the MIT License. +- **Airbyte CDK** (Connector Development Kit) is open sourced and available under the MIT License. +- **Airbyte Core** is licensed under the Elastic License 2.0 (ELv2). +- **Airbyte Cloud & Airbyte Enterprise** are both closed source and require a commercial license from Airbyte. ![Diagram of license structure](../../.gitbook/assets/license_faq_diagram.png) ## About Elastic License 2.0 (ELv2) + ELv2 is a simple, non-copyleft license, allowing for the right to “use, copy, distribute, make available, and prepare derivative works of the software”. Anyone can use Airbyte, free of charge. You can run the software at scale on your infrastructure. There are only three high-level limitations. You cannot: + 1. Provide the products to others as a managed service ([read more](#what-is-the-managed-service-use-case-that-is-not-allowed-under-elv2)); 2. Circumvent the license key functionality or remove/obscure features protected by license keys; or 3. Remove or obscure any licensing, copyright, or other notices. @@ -20,60 +23,75 @@ In case you want to work with Airbyte without these limitations, we offer altern [View License](elv2-license.md) ## FAQ + ### What limitations does ELv2 impose on my use of Airbyte? + If you are an Airbyte Cloud customer, nothing changes for you. For open-source users, everyone can continue to use Airbyte as they are doing today: no limitations on volume, number of users, number of connections… There are only a few high-level limitations. You cannot: + 1. Provide the products to others as a managed service. For example, you cannot sell a cloud service that provides users with direct access to Airbyte. You can sell access to applications built and run using Airbyte ([read more](#what-is-the-managed-service-use-case-that-is-not-allowed-under-elv2)). 2. Circumvent the license key functionality or remove/obscure features protected by license keys. For example, our code may contain watermarks or keys to unlock proprietary functionality. Those elements of our code will be marked in our source code. You can’t remove or change them. ### Why did Airbyte adopt ELv2? + We are releasing Airbyte Cloud, a managed version of Airbyte that will offer alternatives to how our users operate Airbyte, including additional features and new execution models. We want to find a great way to execute our mission to commoditize data integration with open source and our ambition to create a sustainable business. -ELv2 gives us the best of both worlds. +ELv2 gives us the best of both worlds. On one hand, our users can continue to use Airbyte freely, and on the other hand, we can safely create a sustainable business and continue to invest in our community, project and product. We don’t have to worry about other large companies taking the product to monetize it for themselves, thus hurting our community. ### Will Airbyte connectors continue to be open source? + Our own connectors remain open-source, and our contributors can also develop their own connectors and continue to choose whichever license they prefer. This is our way to accomplish Airbyte’s vision of commoditizing data integration: access to data shouldn’t be behind a paywall. Also, we want Airbyte’s licensing to work well with applications that are integrated using connectors. We are continuously investing in Airbyte's data protocol and all the tooling around it. The Connector Development Kit (CDK), which helps our community and our team build and maintain connectors at scale, is a cornerstone of our commoditization strategy and also remains open-source. ### How do I continue to contribute to Airbyte under ELv2? + Airbyte’s projects are available here. Anyone can contribute to any of these projects (including those licensed with ELv2). We are introducing a Contributor License Agreement that you will have to sign with your first contribution. ### When will ELv2 be effective? + ELv2 will apply from the following Airbyte core version as of September 27, 2021: version 0.30.0. ### What is the “managed service” use case that is not allowed under ELv2? -We chose ELv2 because it is very permissive with what you can do with the software. + +We chose ELv2 because it is very permissive with what you can do with the software. You can basically build ANY product on top of Airbyte as long as you don’t: -* Host Airbyte yourself and sell it as an ELT/ETL tool, or a replacement for the Airbyte solution. -* Sell a product that directly exposes Airbyte’s UI or API. + +- Host Airbyte yourself and sell it as an ELT/ETL tool, or a replacement for the Airbyte solution. +- Sell a product that directly exposes Airbyte’s UI or API. Here is a non-exhaustive list of what you can do (without providing your customers direct access to Airbyte functionality): -* I am creating an analytics platform and I want to use Airbyte to bring data in on behalf of my customers. -* I am building my internal data stack and I want my team to be able to interact with Airbyte to configure the pipelines through the UI or the API. -* ... + +- I am creating an analytics platform and I want to use Airbyte to bring data in on behalf of my customers. +- I am building my internal data stack and I want my team to be able to interact with Airbyte to configure the pipelines through the UI or the API. +- ... ### My company has a policy against using code that restricts commercial use – can I still use Airbyte under ELv2? -You can use software under ELv2 for your commercial business, you simply cannot offer it as a managed service. + +You can use software under ELv2 for your commercial business, you simply cannot offer it as a managed service. ### As a Data Agency, I currently use Airbyte to fulfill my customer needs. How does ELv2 affect me? + You can continue to use Airbyte, as long as you don’t offer it as a managed service. ### I started to use Airbyte to ingest my customer’s data. What should I do? + You can continue to use Airbyte, as long as you don’t offer it as a managed service. ### Can I customize ELv2 software? + Yes, you can customize ELv2 software. ELv2 is similar in this sense to permissive open-source licenses. You can modify the software, integrate the variant into your application, and operate the modified application, as long as you don’t go against any of the limitations. ### Why didn’t you use a closed-source license for Airbyte Core? + We want to provide developers with free access to our Airbyte Core source code — including rights to modify it. Since this wouldn’t be possible with a closed-source license, we decided to use the more permissive ELv2. ### Is there any revenue sharing for those who create Airbyte connectors? -We will be introducing a new participative model in the next few months. There are still a lot of details to figure out, but the general idea is that maintainers of connectors would have the option to obtain a share of revenue when the connectors are being used in the paid version of Airbyte. In exchange, maintainers would be responsible for SLAs, new features, and bug fixes for the said connector. +We will be introducing a new participative model in the next few months. There are still a lot of details to figure out, but the general idea is that maintainers of connectors would have the option to obtain a share of revenue when the connectors are being used in the paid version of Airbyte. In exchange, maintainers would be responsible for SLAs, new features, and bug fixes for the said connector. From ddd233bcb3b376c2f644c25534c3c4f8630ca68f Mon Sep 17 00:00:00 2001 From: Alexandre Cuoci Date: Thu, 16 Nov 2023 18:09:28 -0500 Subject: [PATCH 17/57] self-managed docs rewrite (#32605) --- .../on-kubernetes-via-helm.md | 35 ------ .../assets}/okta-app-integration-name.png | Bin .../okta-create-new-app-integration.png | Bin .../assets}/okta-login-redirect-uris.png | Bin docs/enterprise-setup/self-managed/README.md | 9 ++ .../self-managed/implementation-guide.md | 103 ++++++++++++++++++ .../self-managed/sso.md} | 25 ++--- docusaurus/redirects.yml | 4 +- docusaurus/sidebars.js | 19 +++- 9 files changed, 142 insertions(+), 53 deletions(-) rename docs/{assets/docs => enterprise-setup/assets}/okta-app-integration-name.png (100%) rename docs/{assets/docs => enterprise-setup/assets}/okta-create-new-app-integration.png (100%) rename docs/{assets/docs => enterprise-setup/assets}/okta-login-redirect-uris.png (100%) create mode 100644 docs/enterprise-setup/self-managed/README.md create mode 100644 docs/enterprise-setup/self-managed/implementation-guide.md rename docs/{airbyte-enterprise.md => enterprise-setup/self-managed/sso.md} (54%) diff --git a/docs/deploying-airbyte/on-kubernetes-via-helm.md b/docs/deploying-airbyte/on-kubernetes-via-helm.md index 12f192db3174..818dec3f78f5 100644 --- a/docs/deploying-airbyte/on-kubernetes-via-helm.md +++ b/docs/deploying-airbyte/on-kubernetes-via-helm.md @@ -122,41 +122,6 @@ After specifying your own configuration, run the following command: helm install --values path/to/values.yaml %release_name% airbyte/airbyte ``` -### (Early Access) Airbyte Enterprise deployment - -[Airbyte Enterprise](/airbyte-enterprise) is in an early access stage for select priority users. Once you [are qualified for an Airbyte Enterprise license key](https://airbyte.com/company/talk-to-sales), you can install Airbyte Enterprise via helm by following these steps: - -1. Checkout the latest revision of the [airbyte-platform repository](https://github.com/airbytehq/airbyte-platform) - -2. Add your Airbyte Enterprise license key and [auth configuration details](/airbyte-enterprise#single-sign-on-sso) to a file called `airbyte.yml` in the `configs` directory of `airbyte-platform`. You can copy `airbyte.sample.yml` to use as a template: - -```sh -cp configs/airbyte.sample.yml configs/airbyte.yml -``` - -Then, open up `airbyte.yml` in your text editor to fill in the indicated fields. - -:::caution - -For now, auth configurations aren't easy to modify once initially installed, so please double check them to make sure they're accurate before proceeding! This will be improved in the near future. - -::: - -3. Make sure your helm repository is up to date: - -```text -helm repo update -``` - -4. Install Airbyte Enterprise on helm using the following command: - -```text -./tools/bin/install_airbyte_pro_on_helm.sh -``` - -The default release name is `airbyte-pro`. You can change this via the `RELEASE_NAME` environment -variable. - ## Migrate from old charts to new ones Starting from `0.39.37-alpha` we've revisited helm charts structure and separated all components of airbyte into their own independent charts, thus by allowing our developers to test single component without deploying airbyte as a whole and by upgrading single component at a time. diff --git a/docs/assets/docs/okta-app-integration-name.png b/docs/enterprise-setup/assets/okta-app-integration-name.png similarity index 100% rename from docs/assets/docs/okta-app-integration-name.png rename to docs/enterprise-setup/assets/okta-app-integration-name.png diff --git a/docs/assets/docs/okta-create-new-app-integration.png b/docs/enterprise-setup/assets/okta-create-new-app-integration.png similarity index 100% rename from docs/assets/docs/okta-create-new-app-integration.png rename to docs/enterprise-setup/assets/okta-create-new-app-integration.png diff --git a/docs/assets/docs/okta-login-redirect-uris.png b/docs/enterprise-setup/assets/okta-login-redirect-uris.png similarity index 100% rename from docs/assets/docs/okta-login-redirect-uris.png rename to docs/enterprise-setup/assets/okta-login-redirect-uris.png diff --git a/docs/enterprise-setup/self-managed/README.md b/docs/enterprise-setup/self-managed/README.md new file mode 100644 index 000000000000..30d1e7e05598 --- /dev/null +++ b/docs/enterprise-setup/self-managed/README.md @@ -0,0 +1,9 @@ +# Airbyte Self-Managed + +[Airbyte Self-Managed](https://airbyte.com/solutions/airbyte-enterprise) is a self-hosted version of Airbyte with additional features for enterprise customers. Airbyte Enterprise is in an early access stage for select priority users. + +A valid license key is required to get started with Airbyte Enterprise. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. + +The following pages outline how to: +1. [Deploy Airbyte Enterprise using Kubernetes](./implementation-guide.md) +2. [Configure Okta for Single Sign-On (SSO) with Airbyte Enterprise](./sso.md) diff --git a/docs/enterprise-setup/self-managed/implementation-guide.md b/docs/enterprise-setup/self-managed/implementation-guide.md new file mode 100644 index 000000000000..3b5c2da9c5d2 --- /dev/null +++ b/docs/enterprise-setup/self-managed/implementation-guide.md @@ -0,0 +1,103 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Implementation Guide + +[Airbyte Self-Managed](./README.md) is in an early access stage for select priority users. Once you [are qualified for an Airbyte Self Managed license key](https://airbyte.com/company/talk-to-sales), you can deploy Airbyte with the following instructions. + +Airbyte Self Managed must be deployed using Kubernetes. This is to enable Airbyte's best performance and scale. The core components \(api server, scheduler, etc\) run as deployments while the scheduler launches connector-related pods on different nodes. + +## Prerequisites + +There are three prerequisites to deploying Self-Managed: installing [helm](https://helm.sh/docs/intro/install/), a Kubernetes cluster, and having configured `kubectl` to connect to the cluster. + +For production, we recommend deploying to EKS, GKE or AKS. If you are doing some local testing, follow the cluster setup instructions outlined [here](../../deploying-airbyte/on-kubernetes-via-helm.md#cluster-setup). + +To install `kubectl`, please follow [these instructions](https://kubernetes.io/docs/tasks/tools/). To configure `kubectl` to connect to your cluster by using `kubectl use-context my-cluster-name`, see the following: + +
+ Configure kubectl to connect to your cluster + + +
    +
  1. Configure gcloud with gcloud auth login.
  2. +
  3. On the Google Cloud Console, the cluster page will have a "Connect" button, with a command to run locally: gcloud container clusters get-credentials $CLUSTER_NAME --zone $ZONE_NAME --project $PROJECT_NAME
  4. +
  5. Use kubectl config get-contexts to show the contexts available.
  6. +
  7. Run kubectl config use-context $GKE_CONTEXT to access the cluster from kubectl.
  8. +
+
+ +
    +
  1. Configure your AWS CLI to connect to your project.
  2. +
  3. Install eksctl.
  4. +
  5. Run eksctl utils write-kubeconfig --cluster=$CLUSTER_NAME to make the context available to kubectl.
  6. +
  7. Use kubectl config get-contexts to show the contexts available.
  8. +
  9. Run kubectl config use-context $EKS_CONTEXT to access the cluster with kubectl.
  10. +
+
+
+
+ +## Deploy Airbyte Self-Managed + +### Add Airbyte Helm Repository + +Follow these instructions to add the Airbyte helm repository: +1. Run `helm repo add airbyte https://airbytehq.github.io/helm-charts`, where `airbyte` is the name of the repository that will be indexed locally. +2. Perform the repo indexing process, and ensure your helm repository is up-to-date by running `helm repo update`. +3. You can then browse all charts uploaded to your repository by running `helm search repo airbyte`. + +### Clone & Configure Airbyte + + +1. `git clone` the latest revision of the [airbyte-platform repository](https://github.com/airbytehq/airbyte-platform) + +2. Create a new `airbyte.yml` file in the `configs` directory of the `airbyte-platform` folder. You may also copy `airbyte.sample.yml` to use as a template: + +```sh +cp configs/airbyte.sample.yml configs/airbyte.yml +``` + +3. Add your Airbyte Enterprise license key to your `airbyte.yml`. + +4. Add your [auth details](/airbyte-enterprise#single-sign-on-sso) to your `airbyte.yml`. Auth configurations aren't easy to modify after Airbyte is installed, so please double check them to make sure they're accurate before proceeding. + +
+ Configuring auth in your airbyte.yml file + +To configure SSO with Okta, add the following at the end of your `airbyte.yml` file: + +``` +auth: + identity-providers: + - type: okta + domain: $OKTA_DOMAIN + app-name: $OKTA_APP_INTEGRATION_NAME + client-id: $OKTA_CLIENT_ID + client-secret: $OKTA_CLIENT_SECRET +``` + +To configure basic auth (deploy without SSO), remove the entire `auth:` section from your airbyte.yml config file. You will authenticate with the instance admin user and password included in the your `airbyte.yml`. + +
+ +### Install Airbyte Self Managed + +Install Airbyte Enterprise on helm using the following command: + +```text +./tools/bin/install_airbyte_pro_on_helm.sh +``` + +The default release name is `airbyte-pro`. You can change this via the `RELEASE_NAME` environment +variable. + +### Customizing your Airbyte Self Managed Deployment + +In order to customize your deployment, you need to create `values.yaml` file in a local folder and populate it with default configuration override values. A `values.yaml` example can be located in [charts/airbyte](https://github.com/airbytehq/airbyte-platform/blob/main/charts/airbyte/values.yaml) folder of the Airbyte repository. + +After specifying your own configuration, run the following command: + +```text +./tools/bin/install_airbyte_pro_on_helm.sh --values path/to/values.yaml $RELEASE_NAME airbyte/airbyte +``` \ No newline at end of file diff --git a/docs/airbyte-enterprise.md b/docs/enterprise-setup/self-managed/sso.md similarity index 54% rename from docs/airbyte-enterprise.md rename to docs/enterprise-setup/self-managed/sso.md index d717cd41d3ef..a7295e60ecf5 100644 --- a/docs/airbyte-enterprise.md +++ b/docs/enterprise-setup/self-managed/sso.md @@ -1,14 +1,12 @@ -# Airbyte Enterprise +# Using Single Sign-On (SSO) -[Airbyte Enterprise](https://airbyte.com/solutions/airbyte-enterprise) is a self-managed version of Airbyte with additional features for enterprise customers. Airbyte Enterprise is in an early access stage for select priority users. A valid license key is required to get started with Airbyte Enterprise. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. +Leverage your existing identity provider to enable employees to access your Airbyte instance using their corporate credentials, simplifying user provisioning. Enabling Single Sign-On extends Airbyte Self Managed to support multiple users, and multiple teams all on one instance. -The following instructions outline how to: -1. Configure Okta for Single Sign-On (SSO) with Airbyte Enterprise -2. Deploy Airbyte Enterprise using Kubernetes (License Key Required) +Airbyte Self Managed currently supports SSO via OIDC with [Okta](https://www.okta.com/) as an IdP. Support for Azure Active Directory and connecting via SAML are both coming soon. Please talk to us to learn more about upcoming [enterprise features](https://airbyte.com/company/talk-to-sales). -## Single Sign-On (SSO) - -Airbyte Enterprise supports Single Sign-On, allowing an organization to manage user access to their Airbyte Enterprise instance through the configuration of an Identity Provider (IdP). Airbyte Enterprise currently supports SSO via OIDC with [Okta](https://www.okta.com/) as an IdP. +The following instructions walk you through: +1. [Setting up the Okta OIDC App Integration to be used by your Airbyte instance](#setting-up-okta-for-sso) +2. [Configuring Airbyte Self-Managed to use SSO](#deploying-airbyte-enterprise-with-okta) ### Setting up Okta for SSO @@ -16,13 +14,13 @@ You will need to create a new Okta OIDC App Integration for your Airbyte instanc You should create an app integration with **OIDC - OpenID Connect** as the sign-in method and **Web Application** as the application type: -![Screenshot of Okta app integration creation modal](./assets/docs/okta-create-new-app-integration.png) +![Screenshot of Okta app integration creation modal](../assets/okta-create-new-app-integration.png) #### App integration name Please choose a URL-friendly app integraiton name without spaces or special characters, such as `my-airbyte-app`: -![Screenshot of Okta app integration name](./assets/docs/okta-app-integration-name.png) +![Screenshot of Okta app integration name](../assets/okta-app-integration-name.png) Spaces or special characters in this field could result in invalid redirect URIs. @@ -42,13 +40,14 @@ Sign-out redirect URIs /auth/realms/airbyte/broker//endpoint/logout_response ``` -![Okta app integration name screenshot](./assets/docs/okta-login-redirect-uris.png) +![Okta app integration name screenshot](../assets/okta-login-redirect-uris.png) _Example values_ `` should point to where your Airbyte instance will be available, including the http/https protocol. -## Deploying Airbyte Enterprise with Okta + +## Deploying Airbyte Self-Managed with Okta Once your Okta app is set up, you're ready to deploy Airbyte with SSO. Take note of the following configuration values, as you will need them to configure Airbyte to use your new Okta SSO app integration: @@ -57,4 +56,4 @@ Once your Okta app is set up, you're ready to deploy Airbyte with SSO. Take note - Client ID - Client Secret -Visit [Airbyte Enterprise deployment](/deploying-airbyte/on-kubernetes-via-helm#early-access-airbyte-enterprise-deployment) for instructions on how to deploy Airbyte Enterprise using `kubernetes`, `kubectl` and `helm`. +Visit the [implementation guide](./implementation-guide.md) for instructions on how to deploy Airbyte Enterprise using `kubernetes`, `kubectl` and `helm`. diff --git a/docusaurus/redirects.yml b/docusaurus/redirects.yml index b69386db8c1d..28a7f499bc15 100644 --- a/docusaurus/redirects.yml +++ b/docusaurus/redirects.yml @@ -1,6 +1,8 @@ # A list of URLs that should be redirected to new pathes - from: /airbyte-pro - to: /airbyte-enterprise + to: /enterprise-setup/self-managed/ +- from: /airbyte-enterprise + to: /enterprise-setup/self-managed/ - from: /upgrading-airbyte to: /operator-guides/upgrading-airbyte - from: /catalog diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js index 2b915c7b4bb4..61e5dde2146a 100644 --- a/docusaurus/sidebars.js +++ b/docusaurus/sidebars.js @@ -418,6 +418,19 @@ const deployAirbyte = { ], }; +const airbyteSelfManaged = { + type: "category", + label: "Airbyte Self Managed", + link: { + type: "doc", + id: "enterprise-setup/self-managed/README", + }, + items: [ + "enterprise-setup/self-managed/implementation-guide", + "enterprise-setup/self-managed/sso", + ] +} + const operatorGuide = { type: "category", label: "Manage Airbyte", @@ -518,10 +531,8 @@ module.exports = { type: "doc", id: "troubleshooting", }, - { - type: "doc", - id: "airbyte-enterprise", - }, + sectionHeader("Enterprise Setup"), + airbyteSelfManaged, sectionHeader("Developer Guides"), { type: "doc", From 159dfe69a4bd44979a7bbbf2398e26ff46636419 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 15:53:40 -0800 Subject: [PATCH 18/57] Hotfix: Fix formatting issue (#32619) --- airbyte-integrations/connectors/source-bing-ads/metadata.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml index 599a7e5c7206..1c29083b42ff 100644 --- a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml +++ b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml @@ -25,11 +25,11 @@ data: name: Bing Ads registries: cloud: - dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU + dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU enabled: true oss: enabled: true - dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU + dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU releaseStage: generally_available releases: breakingChanges: From b6e5aff10cc938443ad018b65abf8fd577012997 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 16:08:38 -0800 Subject: [PATCH 19/57] CI: Add a test version of the approve and merge command that requires required checks to be passing (#32612) Co-authored-by: bnchrch --- .github/workflows/airbyte-ci-tests.yml | 1 + .../approve-and-merge-demo-dispatch.yml | 87 +++++++++++++++++++ .github/workflows/format_fix.yml | 1 + .github/workflows/gradle.yml | 1 + .github/workflows/slash-commands.yml | 1 + 5 files changed, 91 insertions(+) create mode 100644 .github/workflows/approve-and-merge-demo-dispatch.yml diff --git a/.github/workflows/airbyte-ci-tests.yml b/.github/workflows/airbyte-ci-tests.yml index f11f6947b469..7089532b095d 100644 --- a/.github/workflows/airbyte-ci-tests.yml +++ b/.github/workflows/airbyte-ci-tests.yml @@ -13,6 +13,7 @@ on: - synchronize jobs: run-airbyte-ci-tests: + # Note if you are changing this name you must also change it in the approve-and-merge-dispatch.yml workflow name: Run Airbyte CI tests runs-on: "ci-runner-connector-test-large-dagger-0-6-4" steps: diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml new file mode 100644 index 000000000000..5bed14cee2c2 --- /dev/null +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -0,0 +1,87 @@ +name: Approve and Merge Command Dispatch + +# Note: We have a two stage dispatch so that we can wait for the formatters to run before approving and merging. +on: + repository_dispatch: + types: [approve-and-merge-demo-command] +jobs: + checkFormat: + runs-on: ubuntu-latest + steps: + - name: Wait for formatters to succeed + id: wait-for-formatters + uses: lewagon/wait-on-check-action@v1.3.1 + with: + ref: ${{ github.ref }} + check-name: "Apply All Formatting Rules" + repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + wait-interval: 30 + - name: Comment if formatters failed + if: failure() + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + body: | + > Error: Formatters failed. Please fix the errors and try again. + + checkGradle: + runs-on: ubuntu-latest + steps: + - name: Wait for Gradle Check to succeed + id: wait-for-gradle + uses: lewagon/wait-on-check-action@v1.3.1 + with: + ref: ${{ github.ref }} + check-name: "Gradle Check" + repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + wait-interval: 60 + - name: Comment if Gradle Check failed + if: failure() + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + body: | + > Error: Gradle Check failed. Please fix the errors and try again. + + checkAirbyteCI: + runs-on: ubuntu-latest + steps: + - name: Wait for Airbyte CI tests to succeed + id: wait-for-gradle + uses: lewagon/wait-on-check-action@v1.3.1 + with: + ref: ${{ github.ref }} + check-name: "Run Airbyte CI tests" + repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + wait-interval: 30 + - name: Comment if Airbyte CI tests failed + if: failure() + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + body: | + > Error: Airbyte CI tests failed. Please fix the errors and try again. + + approveAndMergeDispatch: + runs-on: ubuntu-latest + needs: [checkFormat, checkGradle, checkAirbyteCI] + steps: + - name: Auto Approve Slash Command Dispatch + uses: peter-evans/slash-command-dispatch@v3 + id: scd + with: + token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + permission: write + issue-type: pull-request + repository: airbytehq/airbyte-cloud + dispatch-type: repository + commands: | + approve-and-merge + + - name: Edit comment with error message + if: steps.scd.outputs.error-message + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ github.event.client_payload.github.payload.comment.id }} + body: | + > Error: ${{ steps.scd.outputs.error-message }} diff --git a/.github/workflows/format_fix.yml b/.github/workflows/format_fix.yml index d689a8891063..58761fef432e 100644 --- a/.github/workflows/format_fix.yml +++ b/.github/workflows/format_fix.yml @@ -11,6 +11,7 @@ on: jobs: format-fix: runs-on: "ci-runner-connector-format-medium-dagger-0-6-4" + # Note if you are changing this name you must also change it in the approve-and-merge-dispatch.yml workflow name: "Apply All Formatting Rules" timeout-minutes: 40 steps: diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 13fa98a6acaa..695ccde96fa1 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -54,6 +54,7 @@ jobs: # In case of self-hosted EC2 errors, removed the `needs` line and switch back to running on ubuntu-latest. needs: start-check-runner # required to start the main job when the runner is ready runs-on: ${{ needs.start-check-runner.outputs.label }} # run the job on the newly created runner + # Note if you are changing this name you must also change it in the approve-and-merge-dispatch.yml workflow name: Gradle Check timeout-minutes: 30 steps: diff --git a/.github/workflows/slash-commands.yml b/.github/workflows/slash-commands.yml index 22029f06baba..91dc9d0fb8d5 100644 --- a/.github/workflows/slash-commands.yml +++ b/.github/workflows/slash-commands.yml @@ -29,6 +29,7 @@ jobs: publish-java-cdk legacy-publish connector-performance + approve-and-merge-demo static-args: | repo=${{ steps.getref.outputs.repo }} gitref=${{ steps.getref.outputs.ref }} From 5c87e1c54a4a595ec86ad726632d9dec967e04bf Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 16:19:30 -0800 Subject: [PATCH 20/57] CI TEST: Benign docstring change (#32620) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 1 + .github/workflows/slash-commands.yml | 2 +- .../connectors/pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index 5bed14cee2c2..5ab290d6230c 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -4,6 +4,7 @@ name: Approve and Merge Command Dispatch on: repository_dispatch: types: [approve-and-merge-demo-command] + jobs: checkFormat: runs-on: ubuntu-latest diff --git a/.github/workflows/slash-commands.yml b/.github/workflows/slash-commands.yml index 91dc9d0fb8d5..d589eee4b49a 100644 --- a/.github/workflows/slash-commands.yml +++ b/.github/workflows/slash-commands.yml @@ -17,7 +17,7 @@ jobs: - name: Slash Command Dispatch id: scd - uses: peter-evans/slash-command-dispatch@v2 + uses: peter-evans/slash-command-dispatch@v3 with: token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} permission: write diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index 205498c331da..f6073542ca2f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package. + """Runs the tests for the given airbyte-ci package Args: pipeline_context (ClickPipelineContext): The context object. From 796f4fa1ba70d79a54b2f2e8c13bb1c59751d7cd Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 16:27:30 -0800 Subject: [PATCH 21/57] CI TEST: Benign docstring change (#32621) --- .github/workflows/slash-commands.yml | 13 +++++++++++-- .../pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/slash-commands.yml b/.github/workflows/slash-commands.yml index d589eee4b49a..8df9fb7d3342 100644 --- a/.github/workflows/slash-commands.yml +++ b/.github/workflows/slash-commands.yml @@ -15,7 +15,7 @@ jobs: echo ref="$(echo $pr_info | jq -r '.head.ref')" >> $GITHUB_OUTPUT echo repo="$(echo $pr_info | jq -r '.head.repo.full_name')" >> $GITHUB_OUTPUT - - name: Slash Command Dispatch + - name: Slash Command Dispatch (Workflow) id: scd uses: peter-evans/slash-command-dispatch@v3 with: @@ -29,13 +29,22 @@ jobs: publish-java-cdk legacy-publish connector-performance - approve-and-merge-demo static-args: | repo=${{ steps.getref.outputs.repo }} gitref=${{ steps.getref.outputs.ref }} comment-id=${{ github.event.comment.id }} dispatch-type: workflow + - name: Slash Command Dispatch (Repository) + id: scdr + uses: peter-evans/slash-command-dispatch@v3 + with: + token: ${{ secrets.GH_PAT_MAINTENANCE_OCTAVIA }} + permission: write + commands: | + approve-and-merge-demo + dispatch-type: repository + - name: Edit comment with error message if: steps.scd.outputs.error-message uses: peter-evans/create-or-update-comment@v1 diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index f6073542ca2f..205498c331da 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package + """Runs the tests for the given airbyte-ci package. Args: pipeline_context (ClickPipelineContext): The context object. From 88cc7c8dfeb0995ecc85f256ab9930b71ef2b4c0 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 16:49:19 -0800 Subject: [PATCH 22/57] CI TEST: Benign docstring change (#32622) --- .../approve-and-merge-demo-dispatch.yml | 44 ++----------------- .../pipelines/airbyte_ci/test/commands.py | 2 +- 2 files changed, 4 insertions(+), 42 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index 5ab290d6230c..6e2532a647c9 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -14,7 +14,7 @@ jobs: uses: lewagon/wait-on-check-action@v1.3.1 with: ref: ${{ github.ref }} - check-name: "Apply All Formatting Rules" + check-name: "Automatic Formatting on PRs" repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} wait-interval: 30 - name: Comment if formatters failed @@ -23,49 +23,11 @@ jobs: with: comment-id: ${{ github.event.client_payload.github.payload.comment.id }} body: | - > Error: Formatters failed. Please fix the errors and try again. - - checkGradle: - runs-on: ubuntu-latest - steps: - - name: Wait for Gradle Check to succeed - id: wait-for-gradle - uses: lewagon/wait-on-check-action@v1.3.1 - with: - ref: ${{ github.ref }} - check-name: "Gradle Check" - repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} - wait-interval: 60 - - name: Comment if Gradle Check failed - if: failure() - uses: peter-evans/create-or-update-comment@v1 - with: - comment-id: ${{ github.event.client_payload.github.payload.comment.id }} - body: | - > Error: Gradle Check failed. Please fix the errors and try again. - - checkAirbyteCI: - runs-on: ubuntu-latest - steps: - - name: Wait for Airbyte CI tests to succeed - id: wait-for-gradle - uses: lewagon/wait-on-check-action@v1.3.1 - with: - ref: ${{ github.ref }} - check-name: "Run Airbyte CI tests" - repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} - wait-interval: 30 - - name: Comment if Airbyte CI tests failed - if: failure() - uses: peter-evans/create-or-update-comment@v1 - with: - comment-id: ${{ github.event.client_payload.github.payload.comment.id }} - body: | - > Error: Airbyte CI tests failed. Please fix the errors and try again. + > Error: Formatters failed. Ensure formatting is passing before using approve-and-merge. approveAndMergeDispatch: runs-on: ubuntu-latest - needs: [checkFormat, checkGradle, checkAirbyteCI] + needs: [checkFormat] steps: - name: Auto Approve Slash Command Dispatch uses: peter-evans/slash-command-dispatch@v3 diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index 205498c331da..f6073542ca2f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package. + """Runs the tests for the given airbyte-ci package Args: pipeline_context (ClickPipelineContext): The context object. From 95c8667859a533b9bfe1561ca61b71e0984a01ae Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 17:01:11 -0800 Subject: [PATCH 23/57] CI TEST: Benign docstring change (#32623) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 2 +- .../connectors/pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index 6e2532a647c9..d4c158d80248 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -14,7 +14,7 @@ jobs: uses: lewagon/wait-on-check-action@v1.3.1 with: ref: ${{ github.ref }} - check-name: "Automatic Formatting on PRs" + check-name: "Apply All Formatting Rules" repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} wait-interval: 30 - name: Comment if formatters failed diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index f6073542ca2f..205498c331da 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package + """Runs the tests for the given airbyte-ci package. Args: pipeline_context (ClickPipelineContext): The context object. From 70cb2306477ac0779b90d96771dd8ff1d0bb89c6 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 17:39:15 -0800 Subject: [PATCH 24/57] CI TEST: Benign docstring change (#32624) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 10 +++------- .../pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index d4c158d80248..1e542e38bd0a 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -9,14 +9,10 @@ jobs: checkFormat: runs-on: ubuntu-latest steps: - - name: Wait for formatters to succeed - id: wait-for-formatters - uses: lewagon/wait-on-check-action@v1.3.1 + - uses: maael/confirm-checks-action with: - ref: ${{ github.ref }} - check-name: "Apply All Formatting Rules" - repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} - wait-interval: 30 + github_token: ${{secrets.GH_PAT_APPROVINGTON_OCTAVIA}} + checks: 'Apply All Formatting Rules' - name: Comment if formatters failed if: failure() uses: peter-evans/create-or-update-comment@v1 diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index 205498c331da..f6073542ca2f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package. + """Runs the tests for the given airbyte-ci package Args: pipeline_context (ClickPipelineContext): The context object. From 251bba1ff09203cdb057b7a3d6164f98649b3773 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 17:44:23 -0800 Subject: [PATCH 25/57] CI TEST: Benign docstring change (#32628) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 4 ++-- .../pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index 1e542e38bd0a..a129bad50ec7 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -1,4 +1,4 @@ -name: Approve and Merge Command Dispatch +name: Approve and Merge Demo Command Dispatch # Note: We have a two stage dispatch so that we can wait for the formatters to run before approving and merging. on: @@ -12,7 +12,7 @@ jobs: - uses: maael/confirm-checks-action with: github_token: ${{secrets.GH_PAT_APPROVINGTON_OCTAVIA}} - checks: 'Apply All Formatting Rules' + checks: "Apply All Formatting Rules" - name: Comment if formatters failed if: failure() uses: peter-evans/create-or-update-comment@v1 diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index f6073542ca2f..205498c331da 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package + """Runs the tests for the given airbyte-ci package. Args: pipeline_context (ClickPipelineContext): The context object. From 44450c3840ace7efcec7372b01a1a08e30cdbeb1 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 18:13:42 -0800 Subject: [PATCH 26/57] Test CI: Benign docstring change (#32629) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 10 +++++++--- .../pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index a129bad50ec7..5458e3cee8c4 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -9,10 +9,14 @@ jobs: checkFormat: runs-on: ubuntu-latest steps: - - uses: maael/confirm-checks-action + - name: Wait for formatters to succeed + id: wait-for-formatters + uses: lewagon/wait-on-check-action@v1.3.1 with: - github_token: ${{secrets.GH_PAT_APPROVINGTON_OCTAVIA}} - checks: "Apply All Formatting Rules" + ref: ${{ github.ref }} + check-name: "Apply All Formatting Rules" + repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} + wait-interval: 30 - name: Comment if formatters failed if: failure() uses: peter-evans/create-or-update-comment@v1 diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index 205498c331da..f6073542ca2f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package. + """Runs the tests for the given airbyte-ci package Args: pipeline_context (ClickPipelineContext): The context object. From 0e7f30b76e24c825b6eb6fb838541f8b262edd59 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 18:34:11 -0800 Subject: [PATCH 27/57] TEST CI: Benign docstring change (#32630) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 8 ++++++-- .github/workflows/slash-commands.yml | 4 ++++ .../pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index 5458e3cee8c4..e19d16e46698 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -9,12 +9,16 @@ jobs: checkFormat: runs-on: ubuntu-latest steps: + - name: Dump the client payload context + env: + PAYLOAD_CONTEXT: ${{ toJson(github.event.client_payload) }} + run: echo "$PAYLOAD_CONTEXT" - name: Wait for formatters to succeed id: wait-for-formatters uses: lewagon/wait-on-check-action@v1.3.1 with: - ref: ${{ github.ref }} - check-name: "Apply All Formatting Rules" + ref: ${{ github.event.client_payload.slash_command.args.named.gitref }} + check-name: "Check for formatting errors on" repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} wait-interval: 30 - name: Comment if formatters failed diff --git a/.github/workflows/slash-commands.yml b/.github/workflows/slash-commands.yml index 8df9fb7d3342..90106603c9af 100644 --- a/.github/workflows/slash-commands.yml +++ b/.github/workflows/slash-commands.yml @@ -43,6 +43,10 @@ jobs: permission: write commands: | approve-and-merge-demo + static-args: | + repo=${{ steps.getref.outputs.repo }} + gitref=${{ steps.getref.outputs.ref }} + comment-id=${{ github.event.comment.id }} dispatch-type: repository - name: Edit comment with error message diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index f6073542ca2f..205498c331da 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package + """Runs the tests for the given airbyte-ci package. Args: pipeline_context (ClickPipelineContext): The context object. From 4c6e6a0f936ca0d6843f2cd7288ae559f9d38575 Mon Sep 17 00:00:00 2001 From: Ben Church Date: Thu, 16 Nov 2023 18:40:37 -0800 Subject: [PATCH 28/57] TEST CI: Benign docstring change (#32631) --- .github/workflows/approve-and-merge-demo-dispatch.yml | 8 ++------ .github/workflows/slash-commands.yml | 4 ---- .../pipelines/pipelines/airbyte_ci/test/commands.py | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/approve-and-merge-demo-dispatch.yml b/.github/workflows/approve-and-merge-demo-dispatch.yml index e19d16e46698..234a038558e0 100644 --- a/.github/workflows/approve-and-merge-demo-dispatch.yml +++ b/.github/workflows/approve-and-merge-demo-dispatch.yml @@ -9,16 +9,12 @@ jobs: checkFormat: runs-on: ubuntu-latest steps: - - name: Dump the client payload context - env: - PAYLOAD_CONTEXT: ${{ toJson(github.event.client_payload) }} - run: echo "$PAYLOAD_CONTEXT" - name: Wait for formatters to succeed id: wait-for-formatters uses: lewagon/wait-on-check-action@v1.3.1 with: - ref: ${{ github.event.client_payload.slash_command.args.named.gitref }} - check-name: "Check for formatting errors on" + ref: ${{ github.event.client_payload.pull_request.head.ref }} + check-name: "Apply All Formatting Rules" repo-token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }} wait-interval: 30 - name: Comment if formatters failed diff --git a/.github/workflows/slash-commands.yml b/.github/workflows/slash-commands.yml index 90106603c9af..8df9fb7d3342 100644 --- a/.github/workflows/slash-commands.yml +++ b/.github/workflows/slash-commands.yml @@ -43,10 +43,6 @@ jobs: permission: write commands: | approve-and-merge-demo - static-args: | - repo=${{ steps.getref.outputs.repo }} - gitref=${{ steps.getref.outputs.ref }} - comment-id=${{ github.event.comment.id }} dispatch-type: repository - name: Edit comment with error message diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py index 205498c331da..f6073542ca2f 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/test/commands.py @@ -18,7 +18,7 @@ @pass_pipeline_context @click_ignore_unused_kwargs async def test(pipeline_context: ClickPipelineContext): - """Runs the tests for the given airbyte-ci package. + """Runs the tests for the given airbyte-ci package Args: pipeline_context (ClickPipelineContext): The context object. From f8ae6f639152010ad4c60a6aaf1abeef294e6cf9 Mon Sep 17 00:00:00 2001 From: Denys Davydov Date: Fri, 17 Nov 2023 08:13:01 +0200 Subject: [PATCH 29/57] :bug: Source Amazon Seller Partner: fix OAuth (#32550) Co-authored-by: davydov-d --- .../source-amazon-seller-partner/main.py | 2 + .../metadata.yaml | 2 +- .../config_migrations.py | 79 +++++++++++++++++++ .../source_amazon_seller_partner/spec.json | 40 +++++++--- .../unit_tests/test_migrations.py | 41 ++++++++++ .../test_migrations/migrated_config.json | 9 +++ .../test_migrations/not_migrated_config.json | 8 ++ .../sources/amazon-seller-partner.md | 36 +++++++-- 8 files changed, 199 insertions(+), 18 deletions(-) create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json create mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/main.py b/airbyte-integrations/connectors/source-amazon-seller-partner/main.py index a09a9063026c..f5089129f6a6 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/main.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/main.py @@ -7,7 +7,9 @@ from airbyte_cdk.entrypoint import launch from source_amazon_seller_partner import SourceAmazonSellerPartner +from source_amazon_seller_partner.config_migrations import MigrateAccountType if __name__ == "__main__": source = SourceAmazonSellerPartner() + MigrateAccountType.migrate(sys.argv[1:], source) launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml index 2bad20c6b106..3f39e4aae628 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 - dockerImageTag: 2.0.0 + dockerImageTag: 2.0.1 dockerRepository: airbyte/source-amazon-seller-partner documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner githubIssueLabel: source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py new file mode 100644 index 000000000000..5d2daf748f6c --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/config_migrations.py @@ -0,0 +1,79 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import logging +from typing import Any, List, Mapping + +from airbyte_cdk.config_observation import create_connector_config_control_message +from airbyte_cdk.entrypoint import AirbyteEntrypoint +from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository + +from .source import SourceAmazonSellerPartner + +logger = logging.getLogger("airbyte_logger") + + +class MigrateAccountType: + """ + This class stands for migrating the config at runtime, + while providing the backward compatibility when falling back to the previous source version. + + Specifically, starting from `2.0.1`, the `account_type` property becomes required. + For those connector configs that do not contain this key, the default value of `Seller` will be used. + Reverse operation is not needed as this field is ignored in previous versions of the connector. + """ + + message_repository: MessageRepository = InMemoryMessageRepository() + migration_key: str = "account_type" + + @classmethod + def _should_migrate(cls, config: Mapping[str, Any]) -> bool: + """ + This method determines whether config requires migration. + Returns: + > True, if the transformation is neccessary + > False, otherwise. + """ + return cls.migration_key not in config + + @classmethod + def _populate_with_default_value(cls, config: Mapping[str, Any], source: SourceAmazonSellerPartner = None) -> Mapping[str, Any]: + config[cls.migration_key] = "Seller" + return config + + @classmethod + def _modify_and_save(cls, config_path: str, source: SourceAmazonSellerPartner, config: Mapping[str, Any]) -> Mapping[str, Any]: + # modify the config + migrated_config = cls._populate_with_default_value(config, source) + # save the config + source.write_config(migrated_config, config_path) + # return modified config + return migrated_config + + @classmethod + def _emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None: + # add the Airbyte Control Message to message repo + cls.message_repository.emit_message(create_connector_config_control_message(migrated_config)) + # emit the Airbyte Control Message from message queue to stdout + for message in cls.message_repository.consume_queue(): + print(message.json(exclude_unset=True)) + + @classmethod + def migrate(cls, args: List[str], source: SourceAmazonSellerPartner) -> None: + """ + This method checks the input args, should the config be migrated, + transform if neccessary and emit the CONTROL message. + """ + # get config path + config_path = AirbyteEntrypoint(source).extract_config(args) + # proceed only if `--config` arg is provided + if config_path: + # read the existing config + config = source.read_config(config_path) + # migration check + if cls._should_migrate(config): + cls._emit_control_message( + cls._modify_and_save(config_path, source, config), + ) diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json index afcd6279342f..d64b1ee1d86f 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json @@ -7,6 +7,7 @@ "required": [ "aws_environment", "region", + "account_type", "lwa_app_id", "lwa_client_secret", "refresh_token", @@ -59,10 +60,18 @@ "type": "string", "order": 2 }, + "account_type": { + "title": "AWS Seller Partner Account Type", + "description": "Type of the Account you're going to authorize the Airbyte application by", + "enum": ["Seller", "Vendor"], + "default": "Seller", + "type": "string", + "order": 3 + }, "lwa_app_id": { "title": "LWA Client Id", "description": "Your Login with Amazon Client ID.", - "order": 3, + "order": 4, "airbyte_secret": true, "type": "string" }, @@ -70,14 +79,14 @@ "title": "LWA Client Secret", "description": "Your Login with Amazon Client Secret.", "airbyte_secret": true, - "order": 4, + "order": 5, "type": "string" }, "refresh_token": { "title": "Refresh Token", "description": "The Refresh Token obtained via OAuth flow authorization.", "airbyte_secret": true, - "order": 5, + "order": 6, "type": "string" }, "replication_start_date": { @@ -85,7 +94,7 @@ "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "examples": ["2017-01-25T00:00:00Z"], - "order": 6, + "order": 7, "type": "string", "format": "date-time" }, @@ -94,7 +103,7 @@ "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data after this date will not be replicated.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$|^$", "examples": ["2017-01-25T00:00:00Z"], - "order": 7, + "order": 8, "type": "string", "format": "date-time" }, @@ -104,7 +113,7 @@ "description": "Will be used for stream slicing for initial full_refresh sync when no updated state is present for reports that support sliced incremental sync.", "default": 90, "minimum": 1, - "order": 8 + "order": 9 }, "report_options": { "title": "Report Options", @@ -113,7 +122,7 @@ "{\"GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT\": {\"reportPeriod\": \"WEEK\"}}", "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" ], - "order": 9, + "order": 10, "type": "string" }, "max_wait_seconds": { @@ -121,7 +130,7 @@ "description": "Sometimes report can take up to 30 minutes to generate. This will set the limit for how long to wait for a successful report.", "default": 500, "examples": ["500", "1980"], - "order": 10, + "order": 11, "minimum": 1, "type": "integer" }, @@ -132,7 +141,7 @@ "{\"GET_SALES_AND_TRAFFIC_REPORT\": {\"availability_sla_days\": 3}}", "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" ], - "order": 11, + "order": 12, "type": "string" } } @@ -142,6 +151,19 @@ "predicate_key": ["auth_type"], "predicate_value": "oauth2.0", "oauth_config_specification": { + "oauth_user_input_from_connector_config_specification": { + "type": "object", + "properties": { + "region": { + "type": "string", + "path_in_connector_config": ["region"] + }, + "account_type": { + "type": "string", + "path_in_connector_config": ["account_type"] + } + } + }, "complete_oauth_output_specification": { "type": "object", "additionalProperties": false, diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py new file mode 100644 index 000000000000..52af77133e47 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +from typing import Any, Mapping + +from airbyte_cdk.models import OrchestratorType, Type +from airbyte_cdk.sources import Source +from source_amazon_seller_partner.config_migrations import MigrateAccountType +from source_amazon_seller_partner.source import SourceAmazonSellerPartner + +CMD = "check" +TEST_NOT_MIGRATED_CONFIG_PATH = "unit_tests/test_migrations/not_migrated_config.json" +TEST_MIGRATED_CONFIG_PATH = "unit_tests/test_migrations/migrated_config.json" +SOURCE: Source = SourceAmazonSellerPartner() + + +def load_config(config_path: str = TEST_NOT_MIGRATED_CONFIG_PATH) -> Mapping[str, Any]: + with open(config_path, "r") as config: + return json.load(config) + + +def test_migrate_config(capsys): + config = load_config(TEST_NOT_MIGRATED_CONFIG_PATH) + assert "acount_type" not in config + migration_instance = MigrateAccountType() + migration_instance.migrate([CMD, "--config", TEST_NOT_MIGRATED_CONFIG_PATH], SOURCE) + control_msg = json.loads(capsys.readouterr().out) + assert control_msg["type"] == Type.CONTROL.value + assert control_msg["control"]["type"] == OrchestratorType.CONNECTOR_CONFIG.value + migrated_config = control_msg["control"]["connectorConfig"]["config"] + assert migrated_config["account_type"] == "Seller" + + +def test_should_not_migrate(): + config = load_config(TEST_MIGRATED_CONFIG_PATH) + assert config["account_type"] + migration_instance = MigrateAccountType() + assert not migration_instance._should_migrate(config) diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json new file mode 100644 index 000000000000..3b65000693d3 --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/migrated_config.json @@ -0,0 +1,9 @@ +{ + "refresh_token": "refresh_token", + "lwa_app_id": "amzn1.application-oa2-client.lwa_app_id", + "lwa_client_secret": "amzn1.oa2-cs.v1.lwa_client_secret", + "replication_start_date": "2022-09-01T00:00:00Z", + "aws_environment": "PRODUCTION", + "account_type": "Vendor", + "region": "US" +} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json new file mode 100644 index 000000000000..e7f89850ba5b --- /dev/null +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_migrations/not_migrated_config.json @@ -0,0 +1,8 @@ +{ + "refresh_token": "refresh_token", + "lwa_app_id": "amzn1.application-oa2-client.lwa_app_id", + "lwa_client_secret": "amzn1.oa2-cs.v1.lwa_client_secret", + "replication_start_date": "2022-09-01T00:00:00Z", + "aws_environment": "PRODUCTION", + "region": "US" +} diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index c5da26fc4d3d..1023cee1379b 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -4,20 +4,39 @@ This page guides you through the process of setting up the Amazon Seller Partner ## Prerequisites +- Amazon Selling Partner account + + + +**For Airbyte Cloud:** + +- AWS Environment +- AWS Region +- Granted OAuth access +- Replication Start Date + + + + +**For Airbyte Open Source:** + - AWS Environment - AWS Region -- LWA Client ID (LWA App ID)** -- LWA Client Secret** -- Refresh token** - Replication Start Date + -**not required for Airbyte Cloud +## Setup Guide ## Step 1: Set up Amazon Seller Partner -1. [Register](https://developer-docs.amazon.com/sp-api/docs/registering-your-application) Amazon Seller Partner application. + + +**Airbyte Open Source setup steps** + +- [Register](https://developer-docs.amazon.com/sp-api/docs/registering-your-application) Amazon Seller Partner application. - The application must be published as Amazon does not allow external parties such as Airbyte to access draft applications. -2. [Create](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html) IAM user. + + ## Step 2: Set up the source connector in Airbyte @@ -28,7 +47,7 @@ This page guides you through the process of setting up the Amazon Seller Partner 3. On the source setup page, select **Amazon Seller Partner** from the Source type dropdown and enter a name for this connector. 4. Click `Authenticate your account`. 5. Log in and Authorize to your Amazon Seller Partner account. -6. Paste all other data to required fields using your IAM user. +6. Paste all other data to required fields. 7. Click `Set up source`. **For Airbyte Open Source:** @@ -37,7 +56,7 @@ This page guides you through the process of setting up the Amazon Seller Partner 2. Go to local Airbyte page. 3. In the left navigation bar, click **Sources**. In the top-right corner, click **+ new source**. 4. On the Set up the source page, enter the name for the Amazon Seller Partner connector and select **Amazon Seller Partner** from the Source type dropdown. -5. Paste all data to required fields using your IAM user and developer account. +5. Paste all data to required fields. 6. Click `Set up source`. ## Supported sync modes @@ -124,6 +143,7 @@ So, for any value that exceeds the limit, the `period_in_days` will be automatic | Version | Date | Pull Request | Subject | |:---------|:-----------|:--------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `2.0.1` | 2023-11-16 | [\#32550](https://github.com/airbytehq/airbyte/pull/32550) | Fix the OAuth flow | | `2.0.0` | 2023-11-23 | [\#32355](https://github.com/airbytehq/airbyte/pull/32355) | Remove Brand Analytics from Airbyte Cloud, permanently remove deprecated FBA reports | | `1.6.2` | 2023-11-14 | [\#32508](https://github.com/airbytehq/airbyte/pull/32508) | Do not use AWS signature as it is no longer required by the Amazon API | | `1.6.1` | 2023-11-13 | [\#32457](https://github.com/airbytehq/airbyte/pull/32457) | Fix report decompression | From d475beba195253099e55d09389472fa06d13be08 Mon Sep 17 00:00:00 2001 From: Yevhenii Date: Fri, 17 Nov 2023 12:22:20 +0000 Subject: [PATCH 30/57] =?UTF-8?q?=F0=9F=90=9BCDK:=20Fix=20of=20generate=20?= =?UTF-8?q?the=20error=20message=20using=20=5Ftry=5Fget=5Ferror=20based=20?= =?UTF-8?q?on=20list=20of=20errors=20(#32545)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../sources/declarative/requesters/http_requester.py | 5 ++++- .../sources/declarative/requesters/test_http_requester.py | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py index 445325f77b2d..a3a8ec657090 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -538,7 +538,8 @@ def _try_get_error(value: Any) -> Any: if isinstance(value, str): return value elif isinstance(value, list): - return ", ".join(_try_get_error(v) for v in value) + error_list = [_try_get_error(v) for v in value] + return ", ".join(v for v in error_list if v is not None) elif isinstance(value, dict): new_value = ( value.get("message") @@ -547,6 +548,8 @@ def _try_get_error(value: Any) -> Any: or value.get("errors") or value.get("failures") or value.get("failure") + or value.get("details") + or value.get("detail") ) return _try_get_error(new_value) return None diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py index ecec2379693f..0800ff62d8f6 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_requester.py @@ -686,14 +686,19 @@ def test_raise_on_http_errors(mocker, error): ({"error": {"message": "something broke"}}, "something broke"), ({"error": "err-001", "message": "something broke"}, "something broke"), ({"failure": {"message": "something broke"}}, "something broke"), + ({"detail": {"message": "something broke"}}, "something broke"), ({"error": {"errors": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}}, "one, two, three"), ({"errors": ["one", "two", "three"]}, "one, two, three"), + ({"errors": [None, {}, "third error", 9002.09]}, "third error"), ({"messages": ["one", "two", "three"]}, "one, two, three"), ({"errors": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), ({"error": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), ({"errors": [{"error": "one"}, {"error": "two"}, {"error": "three"}]}, "one, two, three"), ({"failures": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), + ({"details": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"), + ({"details": ["one", 10087, True]}, "one"), (["one", "two", "three"], "one, two, three"), + ({"detail": False}, None), ([{"error": "one"}, {"error": "two"}, {"error": "three"}], "one, two, three"), ({"error": True}, None), ({"something_else": "hi"}, None), From 03cfe9b9104986e7824a6017572c8997b6617f78 Mon Sep 17 00:00:00 2001 From: yevhenii-ldv Date: Fri, 17 Nov 2023 12:30:09 +0000 Subject: [PATCH 31/57] =?UTF-8?q?=F0=9F=A4=96=20Bump=20patch=20version=20o?= =?UTF-8?q?f=20Python=20CDK?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- airbyte-cdk/python/.bumpversion.cfg | 2 +- airbyte-cdk/python/CHANGELOG.md | 3 +++ airbyte-cdk/python/Dockerfile | 4 ++-- airbyte-cdk/python/setup.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/airbyte-cdk/python/.bumpversion.cfg b/airbyte-cdk/python/.bumpversion.cfg index d93cc7ac0f15..c1c6f6b90cc0 100644 --- a/airbyte-cdk/python/.bumpversion.cfg +++ b/airbyte-cdk/python/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.53.8 +current_version = 0.53.9 commit = False [bumpversion:file:setup.py] diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index fc944faa64c8..591ed0c349e0 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.53.9 +Fix of generate the error message using _try_get_error based on list of errors + ## 0.53.8 Vector DB CDK: Remove CDC records, File CDK: Update unstructured parser diff --git a/airbyte-cdk/python/Dockerfile b/airbyte-cdk/python/Dockerfile index 21618bcb99ff..6942df8c0eea 100644 --- a/airbyte-cdk/python/Dockerfile +++ b/airbyte-cdk/python/Dockerfile @@ -10,7 +10,7 @@ RUN apk --no-cache upgrade \ && apk --no-cache add tzdata build-base # install airbyte-cdk -RUN pip install --prefix=/install airbyte-cdk==0.53.8 +RUN pip install --prefix=/install airbyte-cdk==0.53.9 # build a clean environment FROM base @@ -32,5 +32,5 @@ ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] # needs to be the same as CDK -LABEL io.airbyte.version=0.53.8 +LABEL io.airbyte.version=0.53.9 LABEL io.airbyte.name=airbyte/source-declarative-manifest diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 8c2f8921ae71..bfb4ec5d274c 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -36,7 +36,7 @@ name="airbyte-cdk", # The version of the airbyte-cdk package is used at runtime to validate manifests. That validation must be # updated if our semver format changes such as using release candidate versions. - version="0.53.8", + version="0.53.9", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", From 831a5b01cee662011fc1aecaef3234afde69f4e0 Mon Sep 17 00:00:00 2001 From: Richard Gourley Date: Fri, 17 Nov 2023 13:39:59 +0100 Subject: [PATCH 32/57] Small typo fixed - An JSON -> A JSON (#32637) --- docs/understanding-airbyte/airbyte-protocol.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/understanding-airbyte/airbyte-protocol.md b/docs/understanding-airbyte/airbyte-protocol.md index 17c742722882..66c0bc4f10ed 100644 --- a/docs/understanding-airbyte/airbyte-protocol.md +++ b/docs/understanding-airbyte/airbyte-protocol.md @@ -143,7 +143,7 @@ The `discover` method detects and describes the _structure_ of the data in the d 1. `config` - A configuration JSON object that has been validated using `ConnectorSpecification#connectionSpecification` (see [ActorSpecification](#actor-specification) for information on `connectionSpecification`). 2. `configured catalog` - A `ConfiguredAirbyteCatalog` is built on top of the `catalog` returned by `discover`. The `ConfiguredAirbyteCatalog` specifies HOW the data in the catalog should be replicated. The catalog is documented in the [Catalog Section](#catalog). -3. `state` - An JSON object that represents a checkpoint in the replication. This object is only ever written or read by the source, so it is a JSON blob with whatever information is necessary to keep track of how much of the data source has already been read (learn more in the [State & Checkpointing](#state--checkpointing) Section). +3. `state` - A JSON object that represents a checkpoint in the replication. This object is only ever written or read by the source, so it is a JSON blob with whatever information is necessary to keep track of how much of the data source has already been read (learn more in the [State & Checkpointing](#state--checkpointing) Section). #### Output: From 4481be264653708b841baee7bab04e844386078f Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:45:35 +0200 Subject: [PATCH 33/57] =?UTF-8?q?=E2=9C=A8=20Source=20Pinterest:=20Marked?= =?UTF-8?q?=20start=20date=20as=20optional;=20add=20missing=20fields;=20ad?= =?UTF-8?q?d=20suggested=20streams=20(#32592)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../connectors/source-pinterest/metadata.yaml | 13 +++- .../connectors/source-pinterest/setup.py | 2 +- .../source_pinterest/schemas/ad_groups.json | 61 +++++++++++++++++++ .../source_pinterest/schemas/ads.json | 3 + .../source_pinterest/schemas/board_pins.json | 16 +++-- .../schemas/board_section_pins.json | 3 + .../source_pinterest/schemas/campaigns.json | 15 +++++ .../source_pinterest/source.py | 2 +- .../source_pinterest/spec.json | 1 - docs/integrations/sources/pinterest.md | 57 ++++++++--------- 10 files changed, 132 insertions(+), 41 deletions(-) diff --git a/airbyte-integrations/connectors/source-pinterest/metadata.yaml b/airbyte-integrations/connectors/source-pinterest/metadata.yaml index 4763dd34d13a..82a76edc0a4b 100644 --- a/airbyte-integrations/connectors/source-pinterest/metadata.yaml +++ b/airbyte-integrations/connectors/source-pinterest/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: 5cb7e5fe-38c2-11ec-8d3d-0242ac130003 - dockerImageTag: 0.7.2 + dockerImageTag: 0.8.0 dockerRepository: airbyte/source-pinterest connectorBuildOptions: baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c @@ -19,6 +19,17 @@ data: oss: enabled: true releaseStage: generally_available + suggestedStreams: + streams: + - campaign_analytics + - ad_account_analytics + - ad_analytics + - campaigns + - ad_accounts + - ads + - user_account_analytics + - ad_group_analytics + - ad_groups documentationUrl: https://docs.airbyte.com/integrations/sources/pinterest tags: - language:python diff --git a/airbyte-integrations/connectors/source-pinterest/setup.py b/airbyte-integrations/connectors/source-pinterest/setup.py index eac9cebacb4b..5da646d8e719 100644 --- a/airbyte-integrations/connectors/source-pinterest/setup.py +++ b/airbyte-integrations/connectors/source-pinterest/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2", "pendulum~=2.1.2"] +MAIN_REQUIREMENTS = ["airbyte-cdk", "pendulum~=2.1.2"] TEST_REQUIREMENTS = [ "pytest~=6.1", diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json index 3f24d99ee067..4ab16b8d9676 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ad_groups.json @@ -116,6 +116,67 @@ }, "updated_time": { "type": ["null", "number"] + }, + "optimization_goal_metadata": { + "type": ["null", "object"], + "properties": { + "conversion_tag_v3_goal_metadata": { + "type": ["null", "object"], + "properties": { + "attribution_windows": { + "type": ["null", "object"], + "properties": { + "click_window_days": { + "type": ["null", "integer"] + }, + "engagement_window_days": { + "type": ["null", "integer"] + }, + "view_window_days": { + "type": ["null", "integer"] + } + } + }, + "conversion_event": { + "type": ["null", "string"] + }, + "conversion_tag_id": { + "type": ["null", "string"] + }, + "cpa_goal_value_in_micro_currency": { + "type": ["null", "string"] + }, + "is_roas_optimized": { + "type": ["null", "boolean"] + }, + "learning_mode_type": { + "type": ["null", "string"] + } + } + }, + "frequency_goal_metadata": { + "type": ["null", "object"], + "properties": { + "frequency": { + "type": ["null", "integer"] + }, + "timerange": { + "type": ["null", "string"] + } + } + }, + "scrollup_goal_metadata": { + "type": ["null", "object"], + "properties": { + "scrollup_goal_value_in_micro_currency": { + "type": ["null", "string"] + } + } + } + } + }, + "bid_strategy_type": { + "type": ["null", "string"] } } } diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json index 2b385cce892d..d5f238bd9b10 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/ads.json @@ -91,6 +91,9 @@ "view_tracking_url": { "type": ["null", "string"] }, + "lead_form_id": { + "type": ["null", "string"] + }, "ad_account_id": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json index 2989e890ced2..55a5c52fbd48 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_pins.json @@ -11,15 +11,7 @@ "format": "date-time" }, "creative_type": { - "type": ["null", "string"], - "enum": [ - "REGULAR", - "VIDEO", - "CAROUSEL", - "MAX_VIDEO", - "SHOP_THE_PIN", - "IDEA" - ] + "type": ["null", "string"] }, "is_standard": { "type": ["null", "boolean"] @@ -77,6 +69,12 @@ "type": ["null", "string"] } } + }, + "pin_metrics": { + "type": ["null", "object"] + }, + "has_been_promoted": { + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json index 74bdf144cb8a..603145526fa6 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/board_section_pins.json @@ -35,6 +35,9 @@ } } }, + "pin_metrics": { + "type": ["null", "object"] + }, "media": { "type": ["null", "object"], "properties": { diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json index 561bebf0d971..cb91bc3af2d7 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/schemas/campaigns.json @@ -69,6 +69,21 @@ }, "type": { "type": ["null", "string"] + }, + "start_time": { + "type": ["null", "integer"] + }, + "end_time": { + "type": ["null", "integer"] + }, + "summary_status": { + "type": ["null", "string"] + }, + "is_campaign_budget_optimization": { + "type": ["null", "boolean"] + }, + "is_flexible_daily_budgets": { + "type": ["null", "boolean"] } } } diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py index 828e07f22b53..b8f9b693a9ab 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py @@ -59,7 +59,7 @@ def _validate_and_transform(self, config: Mapping[str, Any], amount_of_days_allo today = pendulum.today() latest_date_allowed_by_api = today.subtract(days=amount_of_days_allowed_for_lookup) - start_date = config["start_date"] + start_date = config.get("start_date") if not start_date: config["start_date"] = latest_date_allowed_by_api else: diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json index 835d983074c2..c19b80a65901 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json @@ -4,7 +4,6 @@ "$schema": "https://json-schema.org/draft-07/schema#", "title": "Pinterest Spec", "type": "object", - "required": ["start_date"], "additionalProperties": true, "properties": { "start_date": { diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index aa599ef5389e..5b8f94e40de1 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -84,32 +84,33 @@ The connector is restricted by the Pinterest [requests limitation](https://devel ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :------------------------------------------------------- |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :------------------------------------------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.8.0 | 2023-11-16 | [32592](https://github.com/airbytehq/airbyte/pull/32592) | Make start_date optional; add suggested streams; add missing fields | | 0.7.2 | 2023-11-08 | [32299](https://github.com/airbytehq/airbyte/pull/32299) | added default `AvailabilityStrategy`, fixed bug which cases duplicated requests, added new streams: Catalogs, CatalogsFeeds, CatalogsProductGroups, Audiences, Keywords, ConversionTags, CustomerLists, CampaignTargetingReport, AdvertizerReport, AdvertizerTargetingReport, AdGroupReport, AdGroupTargetingReport, PinPromotionReport, PinPromotionTargetingReport, ProductGroupReport, ProductGroupTargetingReport, ProductItemReport, KeywordReport | -| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | -| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | -| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | -| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | -| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | -| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | -| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | -| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | -| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | -| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | -| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | -| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | -| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | -| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | -| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | -| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | -| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | -| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | -| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | -| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | -| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | -| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | -| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | -| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | -| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector | +| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | +| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | +| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | +| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | +| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | +| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | +| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | +| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | +| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | +| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | +| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | +| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | +| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | +| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | +| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | +| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | +| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | +| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | +| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | +| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | +| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | +| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | +| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | +| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | +| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector | From 9d1a4ba04e8d508c196d45dd2e5e13eb890fa85a Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Fri, 17 Nov 2023 14:51:49 +0100 Subject: [PATCH 34/57] Deprecate langchain destination (#32455) --- .../connectors/destination-langchain/Dockerfile | 2 +- .../connectors/destination-langchain/metadata.yaml | 2 +- .../connectors/destination-langchain/setup.py | 2 +- docs/integrations/destinations/langchain.md | 13 +++++++++++++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-langchain/Dockerfile b/airbyte-integrations/connectors/destination-langchain/Dockerfile index b4b48cdd04c2..30452c2628ac 100644 --- a/airbyte-integrations/connectors/destination-langchain/Dockerfile +++ b/airbyte-integrations/connectors/destination-langchain/Dockerfile @@ -42,5 +42,5 @@ COPY destination_langchain ./destination_langchain ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.version=0.1.2 LABEL io.airbyte.name=airbyte/destination-langchain diff --git a/airbyte-integrations/connectors/destination-langchain/metadata.yaml b/airbyte-integrations/connectors/destination-langchain/metadata.yaml index a76d4126868f..f8db27c1afe0 100644 --- a/airbyte-integrations/connectors/destination-langchain/metadata.yaml +++ b/airbyte-integrations/connectors/destination-langchain/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: database connectorType: destination definitionId: cf98d52c-ba5a-4dfd-8ada-c1baebfa6e73 - dockerImageTag: 0.1.1 + dockerImageTag: 0.1.2 dockerRepository: airbyte/destination-langchain githubIssueLabel: destination-langchain icon: langchain.svg diff --git a/airbyte-integrations/connectors/destination-langchain/setup.py b/airbyte-integrations/connectors/destination-langchain/setup.py index 80f25bd65f1e..5446952fc464 100644 --- a/airbyte-integrations/connectors/destination-langchain/setup.py +++ b/airbyte-integrations/connectors/destination-langchain/setup.py @@ -6,7 +6,7 @@ from setuptools import find_packages, setup MAIN_REQUIREMENTS = [ - "airbyte-cdk", + "airbyte-cdk==0.51.10", "langchain", "openai", "requests", diff --git a/docs/integrations/destinations/langchain.md b/docs/integrations/destinations/langchain.md index 31a9ddcae93d..4ac1fe151906 100644 --- a/docs/integrations/destinations/langchain.md +++ b/docs/integrations/destinations/langchain.md @@ -1,5 +1,17 @@ # Vector Database (powered by LangChain) +:::warning +The vector db destination destination has been split into separate destinations per vector database. This destination will not receive any further updates and is not subject to SLAs. The separate destinations support all features of this destination and are actively maintained. Please migrate to the respective destination as soon as possible. + +Please use the respective destination for the vector database you want to use to ensure you receive updates and support. + +To following databases are supported: +* [Pinecone](https://docs.airbyte.com/integrations/destinations/pinecone) +* [Weaviate](https://docs.airbyte.com/integrations/destinations/weaviate) +* [Milvus](https://docs.airbyte.com/integrations/destinations/milvus) +* [Chroma](https://docs.airbyte.com/integrations/destinations/chroma) +* [Qdrant](https://docs.airbyte.com/integrations/destinations/qdrant) +::: ## Overview @@ -140,6 +152,7 @@ Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a M | Version | Date | Pull Request | Subject | |:--------| :--------- |:--------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.2 | 2023-11-13 | [#32455](https://github.com/airbytehq/airbyte/pull/32455) | Fix build | | 0.1.1 | 2023-09-01 | [#30282](https://github.com/airbytehq/airbyte/pull/30282) | Use embedders from CDK | | 0.1.0 | 2023-09-01 | [#30080](https://github.com/airbytehq/airbyte/pull/30080) | Fix bug with potential data loss on append+dedup syncing. 🚨 Streams using append+dedup mode need to be reset after upgrade. | | 0.0.8 | 2023-08-21 | [#29515](https://github.com/airbytehq/airbyte/pull/29515) | Clean up generated schema spec | From 0391d374a0d6f9b49e82f8f3dac11bd67ac98efb Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:58:31 +0100 Subject: [PATCH 35/57] =?UTF-8?q?=F0=9F=90=9B=20Source=20Bing=20Ads:=20Fix?= =?UTF-8?q?=20start=20date=20parsing=20from=20stream=20state=20(#32597)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integration_tests/expected_records.jsonl | 4 ++-- .../connectors/source-bing-ads/metadata.yaml | 2 +- .../source_bing_ads/report_streams.py | 2 +- .../source-bing-ads/unit_tests/test_reports.py | 15 ++++++--------- docs/integrations/sources/bing-ads.md | 1 + 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl index 62627cf0aeae..7a61e57866e3 100644 --- a/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl @@ -15,8 +15,8 @@ {"stream":"campaign_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"TimePeriod":"2023-11-05","CurrencyCode":"USD","AdDistribution":"Search","DeviceType":"Computer","Network":"Syndicated search partners","DeliveredMatchType":"Exact","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","CampaignType":"Search & content","CampaignStatus":"Active","CampaignLabels":null,"Impressions":9,"Clicks":1,"Ctr":11.11,"Spend":0.03,"CostPerConversion":null,"QualityScore":5.0,"AdRelevance":3.0,"LandingPageExperience":1.0,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Assists":0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"CustomParameters":null,"ViewThroughConversions":0,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"AverageCpc":0.03,"AveragePosition":0.0,"AverageCpm":3.33,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":0,"LowQualitySophisticatedClicks":0,"LowQualityConversions":0,"LowQualityConversionRate":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null,"BudgetName":null,"BudgetStatus":null,"BudgetAssociationStatus":"Current","HistoricalQualityScore":5.0,"HistoricalExpectedCtr":2.0,"HistoricalAdRelevance":3.0,"HistoricalLandingPageExperience":1.0},"emitted_at":1699954081143} {"stream":"campaign_impression_performance_report_daily","data":{"AccountName":"Airbyte","AccountNumber":"F149MJ18","AccountId":180519267,"TimePeriod":"2023-11-07","CampaignStatus":"Active","CampaignName":"Airbyte test","CampaignId":531016227,"CurrencyCode":"USD","AdDistribution":"Search","Impressions":10,"Clicks":1,"Ctr":10.0,"AverageCpc":0.33,"Spend":0.33,"AveragePosition":0.0,"Conversions":0,"ConversionRate":null,"CostPerConversion":null,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":9,"LowQualityImpressionsPercent":47.37,"LowQualityConversions":0,"LowQualityConversionRate":null,"DeviceType":"Computer","ImpressionSharePercent":3.37,"ImpressionLostToBudgetPercent":85.19,"ImpressionLostToRankAggPercent":11.45,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"HistoricalQualityScore":null,"HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Network":"Syndicated search partners","Assists":0,"Revenue":0.0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"RevenuePerConversion":null,"RevenuePerAssist":null,"TrackingTemplate":null,"CustomParameters":null,"AccountStatus":"Active","LowQualityGeneralClicks":0,"LowQualitySophisticatedClicks":0,"CampaignLabels":null,"ExactMatchImpressionSharePercent":null,"ClickSharePercent":null,"AbsoluteTopImpressionSharePercent":6.02,"FinalUrlSuffix":null,"CampaignType":"Search & content","TopImpressionShareLostToRankPercent":14.63,"TopImpressionShareLostToBudgetPercent":77.24,"AbsoluteTopImpressionShareLostToRankPercent":15.66,"AbsoluteTopImpressionShareLostToBudgetPercent":78.31,"TopImpressionSharePercent":8.13,"AbsoluteTopImpressionRatePercent":50.0,"TopImpressionRatePercent":100.0,"BaseCampaignId":531016227,"AllConversions":0,"AllRevenue":0.0,"AllConversionRate":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllRevenuePerConversion":null,"ViewThroughConversions":0,"AudienceImpressionSharePercent":null,"AudienceImpressionLostToRankPercent":null,"AudienceImpressionLostToBudgetPercent":null,"RelativeCtr":null,"AverageCpm":33.0,"ConversionsQualified":0.0,"LowQualityConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0,"VideoViews":0,"ViewThroughRate":0.0,"AverageCPV":null,"VideoViewsAt25Percent":0,"VideoViewsAt50Percent":0,"VideoViewsAt75Percent":0,"CompletedVideoViews":0,"VideoCompletionRate":null,"TotalWatchTimeInMS":0,"AverageWatchTimePerVideoView":null,"AverageWatchTimePerImpression":0.0,"Sales":0,"CostPerSale":null,"RevenuePerSale":null,"Installs":0,"CostPerInstall":null,"RevenuePerInstall":null},"emitted_at":1699954182626} {"stream":"campaign_impression_performance_report_weekly","data":{"AccountName":"Airbyte","AccountNumber":"F149MJ18","AccountId":180519267,"TimePeriod":"2023-11-05","CampaignStatus":"Active","CampaignName":"Airbyte test","CampaignId":531016227,"CurrencyCode":"USD","AdDistribution":"Search","Impressions":10,"Clicks":1,"Ctr":10.0,"AverageCpc":0.33,"Spend":0.33,"AveragePosition":0.0,"Conversions":0,"ConversionRate":null,"CostPerConversion":null,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":9,"LowQualityImpressionsPercent":47.37,"LowQualityConversions":0,"LowQualityConversionRate":null,"DeviceType":"Computer","ImpressionSharePercent":10.87,"ImpressionLostToBudgetPercent":17.05,"ImpressionLostToRankAggPercent":72.08,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"HistoricalQualityScore":null,"HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Network":"Syndicated search partners","Assists":0,"Revenue":0.0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"RevenuePerConversion":null,"RevenuePerAssist":null,"TrackingTemplate":null,"CustomParameters":null,"AccountStatus":"Active","LowQualityGeneralClicks":0,"LowQualitySophisticatedClicks":0,"CampaignLabels":null,"ExactMatchImpressionSharePercent":29.07,"ClickSharePercent":2.89,"AbsoluteTopImpressionSharePercent":8.88,"FinalUrlSuffix":null,"CampaignType":"Search & content","TopImpressionShareLostToRankPercent":76.51,"TopImpressionShareLostToBudgetPercent":9.99,"AbsoluteTopImpressionShareLostToRankPercent":81.99,"AbsoluteTopImpressionShareLostToBudgetPercent":9.13,"TopImpressionSharePercent":13.5,"AbsoluteTopImpressionRatePercent":50.0,"TopImpressionRatePercent":100.0,"BaseCampaignId":531016227,"AllConversions":0,"AllRevenue":0.0,"AllConversionRate":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllRevenuePerConversion":null,"ViewThroughConversions":0,"AudienceImpressionSharePercent":null,"AudienceImpressionLostToRankPercent":null,"AudienceImpressionLostToBudgetPercent":null,"RelativeCtr":null,"AverageCpm":33.0,"ConversionsQualified":0.0,"LowQualityConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0,"VideoViews":0,"ViewThroughRate":0.0,"AverageCPV":null,"VideoViewsAt25Percent":0,"VideoViewsAt50Percent":0,"VideoViewsAt75Percent":0,"CompletedVideoViews":0,"VideoCompletionRate":null,"TotalWatchTimeInMS":0,"AverageWatchTimePerVideoView":null,"AverageWatchTimePerImpression":0.0,"Sales":0,"CostPerSale":null,"RevenuePerSale":null,"Installs":0,"CostPerInstall":null,"RevenuePerInstall":null},"emitted_at":1699954211223} -{"stream":"keyword_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-07","CurrencyCode":"USD","DeliveredMatchType":"Phrase","AdDistribution":"Search","DeviceType":"Computer","Language":"German","Network":"Syndicated search partners","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"HistoricalQualityScore":null,"Impressions":1,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.18,"FirstPageBid":0.51,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700078149400} -{"stream":"keyword_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-05","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","Impressions":2,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.18,"FirstPageBid":0.51,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700078299436} +{"stream":"keyword_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-07","CurrencyCode":"USD","DeliveredMatchType":"Phrase","AdDistribution":"Search","DeviceType":"Computer","Language":"German","Network":"Syndicated search partners","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"HistoricalQualityScore":null,"Impressions":1,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.11,"FirstPageBid":0.48,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700167471396} +{"stream":"keyword_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-05","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","Impressions":2,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.11,"FirstPageBid":0.48,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700167573859} {"stream":"geographic_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"TimePeriod":"2023-11-07","Country":"Australia","CurrencyCode":"USD","DeliveredMatchType":"Broad","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Syndicated search partners","DeviceOS":"Windows","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","MetroArea":null,"State":"New South Wales","City":null,"AdGroupName":"keywords","Ctr":0.0,"ProximityTargetLocation":null,"Radius":"0","Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"LocationType":"Physical location","MostSpecificLocation":"2000","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","County":null,"PostalCode":"2000","LocationId":"122395","BaseCampaignId":"531016227","AllCostPerConversion":null,"AllReturnOnAdSpend":null,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":0.0,"TopImpressionRatePercent":"100.00","AllConversionsQualified":"0.00","ViewThroughConversionsQualified":null,"Neighborhood":null,"ViewThroughRevenue":"0.00","CampaignType":"Search & content","AssetGroupId":null,"AssetGroupName":null,"AssetGroupStatus":null,"Clicks":0,"Spend":0.0,"Impressions":1,"CostPerConversion":null,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1699956863587} {"stream":"geographic_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"TimePeriod":"2023-11-05","Country":"Argentina","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"Spanish","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","MetroArea":null,"State":"Buenos Aires Province","City":null,"AdGroupName":"keywords","Ctr":0.0,"ProximityTargetLocation":null,"Radius":"0","Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"LocationType":"Physical location","MostSpecificLocation":"Buenos Aires Province","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","County":null,"PostalCode":null,"LocationId":"141965","BaseCampaignId":"531016227","AllCostPerConversion":null,"AllReturnOnAdSpend":null,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":0.0,"TopImpressionRatePercent":"100.00","AllConversionsQualified":"0.00","ViewThroughConversionsQualified":null,"Neighborhood":null,"ViewThroughRevenue":"0.00","CampaignType":"Search & content","AssetGroupId":null,"AssetGroupName":null,"AssetGroupStatus":null,"Clicks":0,"Spend":0.0,"Impressions":1,"CostPerConversion":null,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1699953673210} {"stream":"age_gender_audience_report_daily","data":{"AccountId":180519267,"AgeGroup":"Unknown","Gender":"Unknown","TimePeriod":"2023-11-07","AllConversions":0,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","CampaignId":531016227,"AdGroupName":"keywords","AdGroupId":1356799861840328,"AdDistribution":"Search","Impressions":3,"Clicks":1,"Conversions":0.0,"Spend":0.79,"Revenue":0.0,"ExtendedCost":0.0,"Assists":0,"Language":"German","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","BaseCampaignId":"531016227","AllRevenue":0.0,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":33.33,"TopImpressionRatePercent":100.0,"ConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0},"emitted_at":1699954406862} diff --git a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml index 1c29083b42ff..8d64cd443fa7 100644 --- a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml +++ b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml @@ -16,7 +16,7 @@ data: connectorSubtype: api connectorType: source definitionId: 47f25999-dd5e-4636-8c39-e7cea2453331 - dockerImageTag: 2.0.0 + dockerImageTag: 2.0.1 dockerRepository: airbyte/source-bing-ads documentationUrl: https://docs.airbyte.com/integrations/sources/bing-ads githubIssueLabel: source-bing-ads diff --git a/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py b/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py index 2be5dd890834..5a0d1c5818a3 100644 --- a/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py +++ b/airbyte-integrations/connectors/source-bing-ads/source_bing_ads/report_streams.py @@ -154,7 +154,7 @@ def request_params( def get_start_date(self, stream_state: Mapping[str, Any] = None, account_id: str = None): if stream_state and account_id: if stream_state.get(account_id, {}).get(self.cursor_field): - return pendulum.parse(self.get_report_record_timestamp(stream_state[account_id][self.cursor_field])) + return pendulum.parse(stream_state[account_id][self.cursor_field]) return self.client.reports_start_date diff --git a/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py b/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py index 44c5329bd83a..a10ba7f94022 100644 --- a/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py +++ b/airbyte-integrations/connectors/source-bing-ads/unit_tests/test_reports.py @@ -200,7 +200,7 @@ def test_get_report_record_timestamp_hourly(stream_report_hourly_cls): def test_report_get_start_date_wo_stream_state(): expected_start_date = "2020-01-01" - test_report = TestReport() + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.client.reports_start_date = "2020-01-01" stream_state = {} account_id = "123" @@ -209,20 +209,18 @@ def test_report_get_start_date_wo_stream_state(): def test_report_get_start_date_with_stream_state(): expected_start_date = pendulum.parse("2023-04-17T21:29:57") - test_report = TestReport() - test_report.cursor_field = "cursor_field" + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.client.reports_start_date = "2020-01-01" - stream_state = {"123": {"cursor_field": "2023-04-17T21:29:57+00:00"}} + stream_state = {"123": {"TimePeriod": "2023-04-17T21:29:57+00:00"}} account_id = "123" assert expected_start_date == test_report.get_start_date(stream_state, account_id) def test_report_get_start_date_performance_report_with_stream_state(): expected_start_date = pendulum.parse("2023-04-07T21:29:57") - test_report = TestPerformanceReport() - test_report.cursor_field = "cursor_field" + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.config = {"lookback_window": 10} - stream_state = {"123": {"cursor_field": "2023-04-17T21:29:57+00:00"}} + stream_state = {"123": {"TimePeriod": "2023-04-17T21:29:57+00:00"}} account_id = "123" assert expected_start_date == test_report.get_start_date(stream_state, account_id) @@ -230,8 +228,7 @@ def test_report_get_start_date_performance_report_with_stream_state(): def test_report_get_start_date_performance_report_wo_stream_state(): days_to_subtract = 10 reports_start_date = pendulum.parse("2021-04-07T00:00:00") - test_report = TestPerformanceReport() - test_report.cursor_field = "cursor_field" + test_report = GeographicPerformanceReportDaily(client=Mock(), config=TEST_CONFIG) test_report.client.reports_start_date = reports_start_date test_report.config = {"lookback_window": days_to_subtract} stream_state = {} diff --git a/docs/integrations/sources/bing-ads.md b/docs/integrations/sources/bing-ads.md index cab52dc73a5b..a7a972931394 100644 --- a/docs/integrations/sources/bing-ads.md +++ b/docs/integrations/sources/bing-ads.md @@ -208,6 +208,7 @@ The Bing Ads API limits the number of requests for all Microsoft Advertising cli | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.0.1 | 2023-11-16 | [32597](https://github.com/airbytehq/airbyte/pull/32597) | Fix start date parsing from stream state | | 2.0.0 | 2023-11-07 | [31995](https://github.com/airbytehq/airbyte/pull/31995) | Schema update for Accounts, Campaigns and Search Query Performance Report streams. Convert `date` and `date-time` fields to standard `RFC3339` | | 1.13.0 | 2023-11-13 | [32306](https://github.com/airbytehq/airbyte/pull/32306) | Add Custom reports and decrease backoff max tries number | | 1.12.1 | 2023-11-10 | [32422](https://github.com/airbytehq/airbyte/pull/32422) | Normalize numeric values in reports | From a745a374023afbab84a37f6fd22603d11a7fe8ac Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Date: Fri, 17 Nov 2023 16:23:20 +0200 Subject: [PATCH 36/57] =?UTF-8?q?=F0=9F=90=9B=20Source=20Instagram:=20Chan?= =?UTF-8?q?ge=20start=5Fdate=20type=20to=20date;=20fix=20docs=20(#32627)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../source-instagram/integration_tests/spec.json | 3 ++- .../connectors/source-instagram/metadata.yaml | 2 +- .../source-instagram/source_instagram/source.py | 2 +- docs/integrations/sources/instagram.md | 13 +++++++------ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json b/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json index cfed60215dc4..f3fbd6e9dc22 100644 --- a/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-instagram/integration_tests/spec.json @@ -10,7 +10,8 @@ "description": "The date from which you'd like to replicate data for User Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date.", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "examples": ["2017-01-25T00:00:00Z"], - "type": "string" + "type": "string", + "format": "date-time" }, "access_token": { "title": "Access Token", diff --git a/airbyte-integrations/connectors/source-instagram/metadata.yaml b/airbyte-integrations/connectors/source-instagram/metadata.yaml index 2e1403daea66..48c76e1a70d2 100644 --- a/airbyte-integrations/connectors/source-instagram/metadata.yaml +++ b/airbyte-integrations/connectors/source-instagram/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: 6acf6b55-4f1e-4fca-944e-1a3caef8aba8 - dockerImageTag: 1.0.15 + dockerImageTag: 1.0.16 dockerRepository: airbyte/source-instagram githubIssueLabel: source-instagram icon: instagram.svg diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/source.py b/airbyte-integrations/connectors/source-instagram/source_instagram/source.py index 73ce1fb5b5f4..4a41d013c1a9 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/source.py +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/source.py @@ -18,7 +18,7 @@ class ConnectorConfig(BaseModel): class Config: title = "Source Instagram" - start_date: Optional[str] = Field( + start_date: Optional[datetime] = Field( description="The date from which you'd like to replicate data for User Insights, in the format YYYY-MM-DDT00:00:00Z. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date.", pattern="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", examples=["2017-01-25T00:00:00Z"], diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index b392c4737016..307acbd686ac 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -10,9 +10,10 @@ This page contains the setup guide and reference information for the Instagram s - [Meta for Developers account](https://developers.facebook.com) - [Instagram business account](https://www.facebook.com/business/help/898752960195806) to your Facebook page +- [Facebook ad account ID number](https://www.facebook.com/business/help/1492627900875762) (you'll use this to configure Instagram as a source in Airbyte - [Instagram Graph API](https://developers.facebook.com/docs/instagram-api/) to your Facebook app -- [Facebook OAuth Reference](https://developers.facebook.com/docs/instagram-basic-display-api/reference) -- [Facebook ad account ID number](https://www.facebook.com/business/help/1492627900875762) (you'll use this to configure Instagram as a source in Airbyte) +- [Facebook Instagram OAuth Reference](https://developers.facebook.com/docs/instagram-basic-display-api/reference) + ## Setup Guide @@ -28,7 +29,7 @@ This page contains the setup guide and reference information for the Instagram s 4. Enter a name for your source. 5. Click **Authenticate your Instagram account**. 6. Log in and authorize the Instagram account. -7. Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If this field is blank, Airbyte will replicate all data. +7. (Optional) Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date. 8. Click **Set up source**. @@ -40,9 +41,8 @@ This page contains the setup guide and reference information for the Instagram s 2. Click **Sources** and then click **+ New source**. 3. On the Set up the source page, select **Instagram** from the **Source type** dropdown. 4. Enter a name for your source. -5. Click **Authenticate your Instagram account**. -6. Log in and authorize the Instagram account. -7. Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If this field is blank, Airbyte will replicate all data. +5. Enter **Access Token** generated using [Graph API Explorer](https://developers.facebook.com/tools/explorer/) or [by using an app you can create on Facebook](https://developers.facebook.com/docs/instagram-api/getting-started) with the required permissions: instagram_basic, instagram_manage_insights, pages_show_list, pages_read_engagement. +7. (Optional) Enter the **Start Date** in YYYY-MM-DDTHH:mm:ssZ format. All data generated after this date will be replicated. If left blank, the start date will be set to 2 years before the present date. 8. Click **Set up source**. @@ -93,6 +93,7 @@ AirbyteRecords are required to conform to the [Airbyte type](https://docs.airbyt | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------| +| 1.0.16 | 2023-11-17 | [32627](https://github.com/airbytehq/airbyte/pull/32627) | Fix start_date type; fix docs | | 1.0.15 | 2023-11-14 | [32494](https://github.com/airbytehq/airbyte/pull/32494) | Marked start_date as optional; set max retry time to 10 minutes; add suggested streams | | 1.0.14 | 2023-11-13 | [32423](https://github.com/airbytehq/airbyte/pull/32423) | Capture media_product_type column in media and stories stream | | 1.0.13 | 2023-11-10 | [32245](https://github.com/airbytehq/airbyte/pull/32245) | Add skipping reading MediaInsights stream if an error code 10 is received | From cc7f01925397c7d773f8cae33eb19651bf697c85 Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Date: Fri, 17 Nov 2023 17:46:37 +0200 Subject: [PATCH 37/57] =?UTF-8?q?=F0=9F=9A=A8=F0=9F=9A=A8=E2=9C=A8=20Sourc?= =?UTF-8?q?e=20Instagram:=20Add=20primary=20keys=20for=20UserLifetimeInsig?= =?UTF-8?q?hts=20and=20UserInsights;=20add=20airbyte=5Ftype=20to=20timesta?= =?UTF-8?q?mp=20fields=20=20(#32500)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integration_tests/expected_records.jsonl | 14 ++++---- .../connectors/source-instagram/metadata.yaml | 9 +++++- .../source_instagram/schemas/media.json | 6 ++-- .../source_instagram/schemas/stories.json | 3 +- .../schemas/user_insights.json | 3 +- .../schemas/user_lifetime_insights.json | 3 +- .../source_instagram/streams.py | 31 ++++++++++++++---- .../source-instagram/unit_tests/conftest.py | 1 + .../unit_tests/test_source.py | 3 ++ .../unit_tests/test_streams.py | 32 ++++++++++++++----- .../sources/instagram-migrations.md | 9 ++++++ docs/integrations/sources/instagram.md | 1 + 12 files changed, 88 insertions(+), 27 deletions(-) create mode 100644 docs/integrations/sources/instagram-migrations.md diff --git a/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl index 0a60185b5dbf..d90034355c7a 100644 --- a/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-instagram/integration_tests/expected_records.jsonl @@ -1,7 +1,7 @@ -{"stream": "users", "data": {"id": "17841408147298757", "biography": "Airbyte is the new open-source data integration platform that consolidates your data into your warehouses.", "profile_picture_url": "https://scontent-iev1-1.xx.fbcdn.net/v/t51.2885-15/153169696_890787328349641_8382928081987798464_n.jpg?_nc_cat=111&_nc_sid=7d201b&_nc_ohc=uQq3P1OLNOYAX_JjmNN&_nc_ht=scontent-iev1-1.xx&edm=AL-3X8kEAAAA&oh=00_AfBI4aspXBrxU-bYTD-qnPWh7ex05YFFAOl_24u7JxLYrw&oe=6558D73E", "username": "airbytehq", "followers_count": 1253, "name": "Jean Lafleur", "ig_id": 8070063576, "media_count": 258, "follows_count": 14, "website": "https://www.airbyte.io/", "page_id": "144706962067225"}, "emitted_at": 1700004246764} -{"stream": "media", "data": {"id": "17884386203808767", "media_product_type": "REELS", "shortcode": "CtZs0Y3v2lx", "permalink": "https://www.instagram.com/reel/CtZs0Y3v2lx/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/B34BFFBB0614049AD69F066D153FDD8C_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=107&vs=986202625710684_1200838240&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CMzRCRkZCQjA2MTQwNDlBRDY5RjA2NkQxNTNGREQ4Q192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dDQm9HQlV3a2JxUWwtY0JBRnZGTnFBUkdQeHpicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJtDf4euHnbtAFQIoAkMzLBdAUBtDlYEGJRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfAJ_4aiqzpxj20QL_aetXfmjmA8nRmz27vnAzpiARGK5w&oe=6555EADA&_nc_sid=1d576d", "timestamp": "2023-06-12T19:20:02+0000", "media_type": "VIDEO", "caption": "Terraform Explained Part 1\n.\n.\n.\n#airbyte #dataengineering #tech #terraform #cloud #cloudengineer #coding #reels", "comments_count": 2, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 9, "ig_id": "3123724930722523505", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/353022694_609901831117241_2447211336606431614_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=1ZTHPkRhzl8AX-hZcw_&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfAzQkg0OB_775OS9F7QSmHxKMrjBSNFi8Rx24OISWSTTQ&oe=655888CE", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200656} -{"stream": "media", "data": {"id": "17864256500936159", "media_product_type": "REELS", "shortcode": "CscAR5EsRgA", "permalink": "https://www.instagram.com/reel/CscAR5EsRgA/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/BE4F848CC97FBA35A1AE1B1150B989A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=110&vs=6290041361087047_1877877688&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CRTRGODQ4Q0M5N0ZCQTM1QTFBRTFCMTE1MEI5ODlBN192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dEaE94aFJJdk1BWGZaWURBQXQyS0FLWWxOSlhicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrD%2B6LaRwf1AFQIoAkMzLBdARDmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfB3i72i3aoV6KoK_SkI7W93z4rQLdbYHatg-KzPo0ADCg&oe=655556A6&_nc_sid=1d576d", "timestamp": "2023-05-19T20:08:33+0000", "media_type": "VIDEO", "caption": "When and why you should be using Rust for Data Engineering! \n\n#rust #airbyte #coding #programming #tech #dataengineering #data", "comments_count": 0, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 7, "ig_id": "3106359072491902976", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347441626_604256678433845_716271787932876577_n.jpg?_nc_cat=108&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=2ACJfSHiIRkAX8S0ZFU&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfDJSGXmZXkQnQZmkrVUi4nadhEddZxH5LUNtELipGu4Dw&oe=655947D9", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200657} -{"stream": "media", "data": {"id": "17964324206288599", "media_product_type": "REELS", "shortcode": "CsUe2iqpQif", "permalink": "https://www.instagram.com/reel/CsUe2iqpQif/", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/274503D36EA0F6E79A7CF3797A8D5985_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNTc2LmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=106&vs=1336282350269744_3931649106&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC8yNzQ1MDNEMzZFQTBGNkU3OUE3Q0YzNzk3QThENTk4NV92aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dQdzNzaFRId3VlSlBFWURBSDFmTjUzcUNhd0JicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrDwmtqO44lAFQIoAkMzLBdAIewIMSbpeRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfB1HP_4v5ndbtfq_6eWT0cxo0vqzO9F6mu5ZS-q4IRDzg&oe=65558FF2&_nc_sid=1d576d", "timestamp": "2023-05-16T22:01:45+0000", "media_type": "VIDEO", "caption": "We've all been there right? \ud83e\udd23\n\n#airbyte #data #dataengineering #datascience #dataanalytics #tech #softwareengineer", "comments_count": 0, "username": "airbytehq", "is_comment_enabled": true, "owner": {"id": "17841408147298757"}, "like_count": 13, "ig_id": "3104241732634871967", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347429218_1848940842145573_5975413208994727174_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=3He_36rMQuYAX9Pz0NM&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfCAgX5HrHW8grC2x_VzJyCf2lUTViJCmwNy0uStHB-YFg&oe=6559347C", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700004200657} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_city", "date": "2023-11-14T08:00:00+0000", "value": {"London, England": 8, "Sydney, New South Wales": 19, "Algiers, Algiers Province": 4, "Casablanca, Grand Casablanca": 4, "S\u00e3o Paulo, S\u00e3o Paulo (state)": 14, "Rio de Janeiro, Rio de Janeiro (state)": 5, "Hong Kong, Hong Kong": 4, "Berlin, Berlin": 7, "Kolkata, West Bengal": 4, "Phoenix, Arizona": 3, "Tulsa, Oklahoma": 4, "Lagos, Lagos State": 18, "Skopje, Municipality of Centar (Skopje)": 4, "Ahmedabad, Gujarat": 4, "Moscow, Moscow": 5, "Karachi, Sindh": 4, "Bogot\u00e1, Distrito Especial": 5, "Dar es Salaam, Dar es Salaam": 7, "Jakarta, Jakarta": 10, "Accra, Greater Accra Region": 4, "Buenos Aires, Ciudad Aut\u00f3noma de Buenos Aires": 9, "Melbourne, Victoria": 7, "Delhi, Delhi": 6, "Gurugram, Haryana": 6, "Kuala Lumpur, Kuala Lumpur": 4, "Los Angeles, California": 5, "Lima, Lima Region": 5, "Istanbul, Istanbul Province": 9, "Abuja, Federal Capital Territory": 5, "Chennai, Tamil Nadu": 6, "Bangkok, Bangkok": 5, "Mexico City, Distrito Federal": 7, "Cape Town, Western Cape": 5, "San Francisco, California": 6, "Greater Noida, Uttar Pradesh": 3, "Tehran, Tehran Province": 4, "New York, New York": 13, "Cairo, Cairo Governorate": 4, "Santiago, Santiago Metropolitan Region": 6, "Dubai, Dubai": 8, "Mumbai, Maharashtra": 8, "Bangalore, Karnataka": 18, "Singapore, Singapore": 6, "Hyderabad, Telangana": 7, "San Diego, California": 6}}, "emitted_at": 1700004246978} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_country", "date": "2023-11-14T08:00:00+0000", "value": {"DE": 31, "HK": 4, "FI": 5, "TW": 5, "RU": 9, "TZ": 8, "FR": 11, "SA": 8, "BR": 64, "SE": 6, "MA": 6, "SG": 6, "DZ": 6, "ID": 29, "GB": 45, "CA": 24, "US": 264, "GH": 4, "EG": 10, "AE": 9, "CH": 7, "IN": 125, "ZA": 16, "IQ": 6, "CL": 9, "IR": 12, "GR": 6, "IT": 19, "MX": 24, "MY": 9, "CO": 11, "ES": 12, "VE": 9, "AR": 23, "AT": 4, "TH": 7, "AU": 35, "PE": 5, "PH": 7, "NG": 30, "TN": 6, "PK": 10, "PL": 5, "TR": 10, "NL": 13}}, "emitted_at": 1700004246980} -{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_gender_age", "date": "2023-11-14T08:00:00+0000", "value": {"F.18-24": 11, "F.25-34": 75, "F.35-44": 72, "F.45-54": 17, "F.55-64": 1, "F.65+": 2, "M.13-17": 2, "M.18-24": 50, "M.25-34": 367, "M.35-44": 226, "M.45-54": 83, "M.55-64": 20, "M.65+": 12, "U.18-24": 18, "U.25-34": 67, "U.35-44": 43, "U.45-54": 19, "U.55-64": 5}}, "emitted_at": 1700004246980} +{"stream": "users", "data": {"id": "17841408147298757", "website": "https://www.airbyte.io/", "ig_id": 8070063576, "followers_count": 1252, "name": "Jean Lafleur", "media_count": 258, "username": "airbytehq", "follows_count": 14, "biography": "Airbyte is the new open-source data integration platform that consolidates your data into your warehouses.", "profile_picture_url": "https://scontent-iev1-1.xx.fbcdn.net/v/t51.2885-15/153169696_890787328349641_8382928081987798464_n.jpg?_nc_cat=111&_nc_sid=7d201b&_nc_ohc=DFFn_25gYVMAX8nPfUd&_nc_ht=scontent-iev1-1.xx&edm=AL-3X8kEAAAA&oh=00_AfBHQPJ5aiFU1qw88d3gTF5jmg-Rpd5TX_gxAQt3jrSA4g&oe=655CCBBE", "page_id": "144706962067225"}, "emitted_at": 1700230802579} +{"stream": "media", "data": {"id": "17884386203808767", "caption": "Terraform Explained Part 1\n.\n.\n.\n#airbyte #dataengineering #tech #terraform #cloud #cloudengineer #coding #reels", "ig_id": "3123724930722523505", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/B34BFFBB0614049AD69F066D153FDD8C_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=107&vs=986202625710684_1200838240&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CMzRCRkZCQjA2MTQwNDlBRDY5RjA2NkQxNTNGREQ4Q192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dDQm9HQlV3a2JxUWwtY0JBRnZGTnFBUkdQeHpicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJtDf4euHnbtAFQIoAkMzLBdAUBtDlYEGJRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfBPpWnNa8TFbux-TpRO48bJGSkaIKPFOnmXhcv39jLd_A&oe=6559369A&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CtZs0Y3v2lx", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/353022694_609901831117241_2447211336606431614_n.jpg?_nc_cat=100&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=1ZTHPkRhzl8AX-hZcw_&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfBdTKQTru0U2JNSqNnuPN0cWYv1u6o6t6u3EHIFteUV7w&oe=655C7D4E", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CtZs0Y3v2lx/", "timestamp": "2023-06-12T19:20:02+00:00", "like_count": 9, "comments_count": 2, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757119} +{"stream": "media", "data": {"id": "17864256500936159", "caption": "When and why you should be using Rust for Data Engineering! \n\n#rust #airbyte #coding #programming #tech #dataengineering #data", "ig_id": "3106359072491902976", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/BE4F848CC97FBA35A1AE1B1150B989A7_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNzIwLmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=110&vs=6290041361087047_1877877688&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC9CRTRGODQ4Q0M5N0ZCQTM1QTFBRTFCMTE1MEI5ODlBN192aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dEaE94aFJJdk1BWGZaWURBQXQyS0FLWWxOSlhicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrD%2B6LaRwf1AFQIoAkMzLBdARDmZmZmZmhgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfC6GeTJWR8KJZ3-eb1-faBZ8P8G8AFyswEDdD4gFzmPMg&oe=65594B26&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CscAR5EsRgA", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347441626_604256678433845_716271787932876577_n.jpg?_nc_cat=108&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=jLyY4sWj0v0AX-iadbF&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfA-x6QyIXxT7o_lEwDH0k7tDb_bgCGeP61AseCpluCtPA&oe=655D3C59", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CscAR5EsRgA/", "timestamp": "2023-05-19T20:08:33+00:00", "like_count": 7, "comments_count": 0, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757120} +{"stream": "media", "data": {"id": "17964324206288599", "caption": "We've all been there right? \ud83e\udd23\n\n#airbyte #data #dataengineering #datascience #dataanalytics #tech #softwareengineer", "ig_id": "3104241732634871967", "media_url": "https://scontent-iev1-1.cdninstagram.com/o1/v/t16/f1/m82/274503D36EA0F6E79A7CF3797A8D5985_video_dashinit.mp4?efg=eyJ2ZW5jb2RlX3RhZyI6InZ0c192b2RfdXJsZ2VuLmNsaXBzLnVua25vd24tQzMuNTc2LmRhc2hfYmFzZWxpbmVfMV92MSJ9&_nc_ht=scontent-iev1-1.cdninstagram.com&_nc_cat=106&vs=1336282350269744_3931649106&_nc_vs=HBksFQIYT2lnX3hwdl9yZWVsc19wZXJtYW5lbnRfcHJvZC8yNzQ1MDNEMzZFQTBGNkU3OUE3Q0YzNzk3QThENTk4NV92aWRlb19kYXNoaW5pdC5tcDQVAALIAQAVAhg6cGFzc3Rocm91Z2hfZXZlcnN0b3JlL0dQdzNzaFRId3VlSlBFWURBSDFmTjUzcUNhd0JicV9FQUFBRhUCAsgBACgAGAAbAYgHdXNlX29pbAExFQAAJrDwmtqO44lAFQIoAkMzLBdAIewIMSbpeRgSZGFzaF9iYXNlbGluZV8xX3YxEQB1AAA%3D&ccb=9-4&oh=00_AfACHaQfoSJ_vMXbm4Xw3gmWnG_vnJgUsIYUePDdtIUS-w&oe=6558DBB2&_nc_sid=1d576d", "owner": {"id": "17841408147298757"}, "shortcode": "CsUe2iqpQif", "username": "airbytehq", "thumbnail_url": "https://scontent-iev1-1.cdninstagram.com/v/t51.36329-15/347429218_1848940842145573_5975413208994727174_n.jpg?_nc_cat=101&ccb=1-7&_nc_sid=c4dd86&_nc_ohc=Y6VzeGH_9lkAX_wkzpd&_nc_ht=scontent-iev1-1.cdninstagram.com&edm=AM6HXa8EAAAA&oh=00_AfDil0e2W7Iqq0-d7rf9JkdOluS7U2C3nhK17EfQ3c07fw&oe=655D28FC", "is_comment_enabled": true, "permalink": "https://www.instagram.com/reel/CsUe2iqpQif/", "timestamp": "2023-05-16T22:01:45+00:00", "like_count": 13, "comments_count": 0, "media_product_type": "REELS", "media_type": "VIDEO", "page_id": "144706962067225", "business_account_id": "17841408147298757"}, "emitted_at": 1700230757120} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_city", "date": "2023-11-17T08:00:00+00:00", "value": {"London, England": 7, "Sydney, New South Wales": 19, "Atlanta, Georgia": 4, "Algiers, Algiers Province": 4, "Caracas, Capital District": 4, "S\u00e3o Paulo, S\u00e3o Paulo (state)": 14, "Rio de Janeiro, Rio de Janeiro (state)": 5, "Hong Kong, Hong Kong": 4, "Berlin, Berlin": 8, "Kolkata, West Bengal": 5, "Tulsa, Oklahoma": 4, "Lagos, Lagos State": 16, "Dili, Timor-Leste": 3, "Ahmedabad, Gujarat": 4, "Skopje, Municipality of Centar (Skopje)": 4, "Moscow, Moscow": 5, "Karachi, Sindh": 4, "Bogot\u00e1, Distrito Especial": 5, "Dar es Salaam, Dar es Salaam": 7, "Jakarta, Jakarta": 10, "Accra, Greater Accra Region": 4, "Buenos Aires, Ciudad Aut\u00f3noma de Buenos Aires": 9, "Melbourne, Victoria": 7, "Gurugram, Haryana": 6, "Delhi, Delhi": 6, "Kuala Lumpur, Kuala Lumpur": 4, "Los Angeles, California": 5, "Lima, Lima Region": 4, "Istanbul, Istanbul Province": 9, "Chennai, Tamil Nadu": 6, "Abuja, Federal Capital Territory": 7, "Bangkok, Bangkok": 5, "Mexico City, Distrito Federal": 7, "Cape Town, Western Cape": 5, "San Francisco, California": 6, "Tehran, Tehran Province": 4, "New York, New York": 14, "Cairo, Cairo Governorate": 4, "Santiago, Santiago Metropolitan Region": 6, "Dubai, Dubai": 8, "Mumbai, Maharashtra": 8, "Bangalore, Karnataka": 18, "Singapore, Singapore": 6, "Hyderabad, Telangana": 7, "San Diego, California": 6}}, "emitted_at": 1700230802791} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_country", "date": "2023-11-17T08:00:00+00:00", "value": {"DE": 31, "HK": 4, "TW": 5, "FI": 5, "RU": 9, "TZ": 8, "FR": 10, "SA": 8, "BR": 64, "SE": 6, "SG": 6, "MA": 6, "DZ": 6, "ID": 29, "GB": 45, "CA": 24, "US": 264, "GH": 4, "EG": 10, "AE": 9, "CH": 7, "IN": 125, "ZA": 16, "IQ": 6, "CL": 9, "IR": 12, "GR": 6, "IT": 19, "MX": 24, "MY": 9, "CO": 11, "ES": 13, "VE": 9, "AR": 23, "AT": 4, "TH": 7, "AU": 35, "PE": 4, "PH": 7, "NG": 30, "TN": 6, "PK": 10, "PL": 5, "TR": 10, "NL": 13}}, "emitted_at": 1700230802792} +{"stream": "user_lifetime_insights", "data": {"page_id": "144706962067225", "business_account_id": "17841408147298757", "metric": "audience_gender_age", "date": "2023-11-17T08:00:00+00:00", "value": {"F.18-24": 11, "F.25-34": 75, "F.35-44": 72, "F.45-54": 17, "F.55-64": 1, "F.65+": 2, "M.13-17": 2, "M.18-24": 50, "M.25-34": 365, "M.35-44": 228, "M.45-54": 83, "M.55-64": 20, "M.65+": 12, "U.18-24": 18, "U.25-34": 67, "U.35-44": 42, "U.45-54": 19, "U.55-64": 5}}, "emitted_at": 1700230802792} diff --git a/airbyte-integrations/connectors/source-instagram/metadata.yaml b/airbyte-integrations/connectors/source-instagram/metadata.yaml index 48c76e1a70d2..13fb9d4bc74a 100644 --- a/airbyte-integrations/connectors/source-instagram/metadata.yaml +++ b/airbyte-integrations/connectors/source-instagram/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: 6acf6b55-4f1e-4fca-944e-1a3caef8aba8 - dockerImageTag: 1.0.16 + dockerImageTag: 2.0.0 dockerRepository: airbyte/source-instagram githubIssueLabel: source-instagram icon: instagram.svg @@ -19,6 +19,13 @@ data: oss: enabled: true releaseStage: generally_available + releases: + breakingChanges: + 2.0.0: + message: + This release introduces a default primary key for the streams UserLifetimeInsights and UserInsights. + Additionally, the format of timestamp fields has been updated in the UserLifetimeInsights, UserInsights, Media and Stories streams to include timezone information. + upgradeDeadline: "2023-12-03" suggestedStreams: streams: - media diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json index 4185de5f66cb..03c77796f5a0 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/media.json @@ -53,7 +53,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] @@ -94,7 +95,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json index 7fc7fa7b40a3..876edf95ea41 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/stories.json @@ -47,7 +47,8 @@ }, "timestamp": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "username": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json index fe98eafcccbf..91bc309d8eb6 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_insights.json @@ -9,7 +9,8 @@ }, "date": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "follower_count": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json index eb9bb57fc720..4cb5092f5ace 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/schemas/user_lifetime_insights.json @@ -9,7 +9,8 @@ }, "date": { "type": ["null", "string"], - "format": "date-time" + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" }, "metric": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py b/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py index 9c07d98bb083..bf5d39de1e1c 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/streams.py @@ -11,6 +11,7 @@ import pendulum from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import IncrementalMixin, Stream +from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer from cached_property import cached_property from facebook_business.adobjects.igmedia import IGMedia from facebook_business.exceptions import FacebookRequestError @@ -19,6 +20,24 @@ from .common import remove_params_from_url +class DatetimeTransformerMixin: + transformer: TypeTransformer = TypeTransformer(TransformConfig.CustomSchemaNormalization) + + @staticmethod + @transformer.registerCustomTransform + def custom_transform_datetime_rfc3339(original_value, field_schema): + """ + Transform datetime string to RFC 3339 format + """ + if original_value and field_schema.get("format") == "date-time" and field_schema.get("airbyte_type") == "timestamp_with_timezone": + # Parse the ISO format timestamp + dt = pendulum.parse(original_value) + + # Convert to RFC 3339 format + return dt.to_rfc3339_string() + return original_value + + class InstagramStream(Stream, ABC): """Base stream class""" @@ -121,10 +140,10 @@ def read_records( yield self.transform(record) -class UserLifetimeInsights(InstagramStream): +class UserLifetimeInsights(DatetimeTransformerMixin, InstagramStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights""" - primary_key = None + primary_key = ["business_account_id", "metric", "date"] LIFETIME_METRICS = ["audience_city", "audience_country", "audience_gender_age", "audience_locale"] period = "lifetime" @@ -156,7 +175,7 @@ def request_params( return params -class UserInsights(InstagramIncrementalStream): +class UserInsights(DatetimeTransformerMixin, InstagramIncrementalStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/insights""" METRICS_BY_PERIOD = { @@ -176,7 +195,7 @@ class UserInsights(InstagramIncrementalStream): "lifetime": ["online_followers"], } - primary_key = None + primary_key = ["business_account_id", "date"] cursor_field = "date" # For some metrics we can only get insights not older than 30 days, it is Facebook policy @@ -295,7 +314,7 @@ def _state_has_legacy_format(self, state: Mapping[str, Any]) -> bool: return False -class Media(InstagramStream): +class Media(DatetimeTransformerMixin, InstagramStream): """Children objects can only be of the media_type == "CAROUSEL_ALBUM". And children object does not support INVALID_CHILDREN_FIELDS fields, so they are excluded when trying to get child objects to avoid the error @@ -403,7 +422,7 @@ def _get_insights(self, item, account_id) -> Optional[MutableMapping[str, Any]]: raise error -class Stories(InstagramStream): +class Stories(DatetimeTransformerMixin, InstagramStream): """Docs: https://developers.facebook.com/docs/instagram-api/reference/ig-user/stories""" def read_records( diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py index 7b9fd1db150e..a065d01b77cf 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py @@ -35,6 +35,7 @@ def config_fixture(): def some_config_fixture(account_id): return {"start_date": "2021-01-23T00:00:00Z", "access_token": "unknown_token"} + @fixture(scope="session", name="some_config_future_date") def some_config_future_date_fixture(account_id): return {"start_date": "2030-01-23T00:00:00Z", "access_token": "unknown_token"} diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py index 2cdca11b4f8f..add26ad1a33f 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py @@ -31,11 +31,14 @@ def test_check_connection_empty_config(api): assert not ok assert error_msg + def test_check_connection_invalid_config_future_date(api, some_config_future_date): ok, error_msg = SourceInstagram().check_connection(logger, config=some_config_future_date) assert not ok assert error_msg + + def test_check_connection_no_date_config(api, some_config): some_config.pop("start_date") ok, error_msg = SourceInstagram().check_connection(logger, config=some_config) diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py index 39fc889e7e48..19470cb9c22b 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py @@ -9,6 +9,7 @@ from airbyte_cdk.models import SyncMode from facebook_business import FacebookAdsApi, FacebookSession from source_instagram.streams import ( + DatetimeTransformerMixin, InstagramStream, Media, MediaInsights, @@ -32,15 +33,11 @@ def test_clear_url(config): def test_state_outdated(api, config): - assert UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format( - {"state": MagicMock()} - ) + assert UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format({"state": MagicMock()}) def test_state_is_not_outdated(api, config): - assert not UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format( - {"state": {}} - ) + assert not UserInsights(api=api, start_date=config["start_date"])._state_has_legacy_format({"state": {}}) def test_media_get_children(api, requests_mock, some_config): @@ -208,9 +205,9 @@ def test_user_lifetime_insights_read(api, config, user_insight_data, requests_mo @pytest.mark.parametrize( "values,expected", [ - ({"end_time": "test_end_time", "value": "test_value"}, {"date": "test_end_time", "value": "test_value"}), + ({"end_time": "2020-05-04T07:00:00+0000", "value": "test_value"}, {"date": "2020-05-04T07:00:00+0000", "value": "test_value"}), ({"value": "test_value"}, {"date": None, "value": "test_value"}), - ({"end_time": "test_end_time"}, {"date": "test_end_time", "value": None}), + ({"end_time": "2020-05-04T07:00:00+0000"}, {"date": "2020-05-04T07:00:00+0000", "value": None}), ({}, {"date": None, "value": None}), ], ids=[ @@ -363,3 +360,22 @@ def test_exit_gracefully(api, config, requests_mock, caplog): assert not records assert requests_mock.call_count == 6 # 4 * 1 per `metric_to_period` map + 1 `summary` request + 1 `business_account_id` request assert "Stopping syncing stream 'user_insights'" in caplog.text + + +@pytest.mark.parametrize( + "original_value, field_schema, expected", + [ + ("2020-01-01T12:00:00Z", {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, "2020-01-01T12:00:00+00:00"), + ("2020-05-04T07:00:00+0000", {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, "2020-05-04T07:00:00+00:00"), + (None, {"format": "date-time", "airbyte_type": "timestamp_with_timezone"}, None), + ("2020-01-01T12:00:00", {"format": "date-time", "airbyte_type": "timestamp_without_timezone"}, "2020-01-01T12:00:00"), + ("2020-01-01T14:00:00", {"format": "date-time"}, "2020-01-01T14:00:00"), + ("2020-02-03T12:00:00", {"type": "string"}, "2020-02-03T12:00:00"), + ], +) +def test_custom_transform_datetime_rfc3339(original_value, field_schema, expected): + # Call the static method + result = DatetimeTransformerMixin.custom_transform_datetime_rfc3339(original_value, field_schema) + + # Assert the result matches the expected output + assert result == expected diff --git a/docs/integrations/sources/instagram-migrations.md b/docs/integrations/sources/instagram-migrations.md new file mode 100644 index 000000000000..f9009b09e3b5 --- /dev/null +++ b/docs/integrations/sources/instagram-migrations.md @@ -0,0 +1,9 @@ +# Instagram Migration Guide + +## Upgrading to 2.0.0 + +This release adds a default primary key for the streams UserLifetimeInsights and UserInsights, and updates the format of timestamp fields in the UserLifetimeInsights, UserInsights, Media and Stories streams to include timezone information. + +To ensure uninterrupted syncs, users should: +- Refresh the source schema +- Reset affected streams \ No newline at end of file diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index 307acbd686ac..7b4999945fd4 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -93,6 +93,7 @@ AirbyteRecords are required to conform to the [Airbyte type](https://docs.airbyt | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------| +| 2.0.0 | 2023-11-17 | [32500](https://github.com/airbytehq/airbyte/pull/32500) | Add primary keys for UserLifetimeInsights and UserInsights; add airbyte_type to timestamp fields | | 1.0.16 | 2023-11-17 | [32627](https://github.com/airbytehq/airbyte/pull/32627) | Fix start_date type; fix docs | | 1.0.15 | 2023-11-14 | [32494](https://github.com/airbytehq/airbyte/pull/32494) | Marked start_date as optional; set max retry time to 10 minutes; add suggested streams | | 1.0.14 | 2023-11-13 | [32423](https://github.com/airbytehq/airbyte/pull/32423) | Capture media_product_type column in media and stories stream | From e69e3decc1f00f8e6ce2673f11dfd5691de2ed89 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Fri, 17 Nov 2023 17:47:12 +0100 Subject: [PATCH 38/57] Source Bing Ads: unpin version (#32643) --- .../source-bing-ads/integration_tests/expected_records.jsonl | 4 ++-- airbyte-integrations/connectors/source-bing-ads/metadata.yaml | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl index 7a61e57866e3..73359a964320 100644 --- a/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-bing-ads/integration_tests/expected_records.jsonl @@ -15,8 +15,8 @@ {"stream":"campaign_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"TimePeriod":"2023-11-05","CurrencyCode":"USD","AdDistribution":"Search","DeviceType":"Computer","Network":"Syndicated search partners","DeliveredMatchType":"Exact","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","CampaignType":"Search & content","CampaignStatus":"Active","CampaignLabels":null,"Impressions":9,"Clicks":1,"Ctr":11.11,"Spend":0.03,"CostPerConversion":null,"QualityScore":5.0,"AdRelevance":3.0,"LandingPageExperience":1.0,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Assists":0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"CustomParameters":null,"ViewThroughConversions":0,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"AverageCpc":0.03,"AveragePosition":0.0,"AverageCpm":3.33,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":0,"LowQualitySophisticatedClicks":0,"LowQualityConversions":0,"LowQualityConversionRate":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null,"BudgetName":null,"BudgetStatus":null,"BudgetAssociationStatus":"Current","HistoricalQualityScore":5.0,"HistoricalExpectedCtr":2.0,"HistoricalAdRelevance":3.0,"HistoricalLandingPageExperience":1.0},"emitted_at":1699954081143} {"stream":"campaign_impression_performance_report_daily","data":{"AccountName":"Airbyte","AccountNumber":"F149MJ18","AccountId":180519267,"TimePeriod":"2023-11-07","CampaignStatus":"Active","CampaignName":"Airbyte test","CampaignId":531016227,"CurrencyCode":"USD","AdDistribution":"Search","Impressions":10,"Clicks":1,"Ctr":10.0,"AverageCpc":0.33,"Spend":0.33,"AveragePosition":0.0,"Conversions":0,"ConversionRate":null,"CostPerConversion":null,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":9,"LowQualityImpressionsPercent":47.37,"LowQualityConversions":0,"LowQualityConversionRate":null,"DeviceType":"Computer","ImpressionSharePercent":3.37,"ImpressionLostToBudgetPercent":85.19,"ImpressionLostToRankAggPercent":11.45,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"HistoricalQualityScore":null,"HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Network":"Syndicated search partners","Assists":0,"Revenue":0.0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"RevenuePerConversion":null,"RevenuePerAssist":null,"TrackingTemplate":null,"CustomParameters":null,"AccountStatus":"Active","LowQualityGeneralClicks":0,"LowQualitySophisticatedClicks":0,"CampaignLabels":null,"ExactMatchImpressionSharePercent":null,"ClickSharePercent":null,"AbsoluteTopImpressionSharePercent":6.02,"FinalUrlSuffix":null,"CampaignType":"Search & content","TopImpressionShareLostToRankPercent":14.63,"TopImpressionShareLostToBudgetPercent":77.24,"AbsoluteTopImpressionShareLostToRankPercent":15.66,"AbsoluteTopImpressionShareLostToBudgetPercent":78.31,"TopImpressionSharePercent":8.13,"AbsoluteTopImpressionRatePercent":50.0,"TopImpressionRatePercent":100.0,"BaseCampaignId":531016227,"AllConversions":0,"AllRevenue":0.0,"AllConversionRate":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllRevenuePerConversion":null,"ViewThroughConversions":0,"AudienceImpressionSharePercent":null,"AudienceImpressionLostToRankPercent":null,"AudienceImpressionLostToBudgetPercent":null,"RelativeCtr":null,"AverageCpm":33.0,"ConversionsQualified":0.0,"LowQualityConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0,"VideoViews":0,"ViewThroughRate":0.0,"AverageCPV":null,"VideoViewsAt25Percent":0,"VideoViewsAt50Percent":0,"VideoViewsAt75Percent":0,"CompletedVideoViews":0,"VideoCompletionRate":null,"TotalWatchTimeInMS":0,"AverageWatchTimePerVideoView":null,"AverageWatchTimePerImpression":0.0,"Sales":0,"CostPerSale":null,"RevenuePerSale":null,"Installs":0,"CostPerInstall":null,"RevenuePerInstall":null},"emitted_at":1699954182626} {"stream":"campaign_impression_performance_report_weekly","data":{"AccountName":"Airbyte","AccountNumber":"F149MJ18","AccountId":180519267,"TimePeriod":"2023-11-05","CampaignStatus":"Active","CampaignName":"Airbyte test","CampaignId":531016227,"CurrencyCode":"USD","AdDistribution":"Search","Impressions":10,"Clicks":1,"Ctr":10.0,"AverageCpc":0.33,"Spend":0.33,"AveragePosition":0.0,"Conversions":0,"ConversionRate":null,"CostPerConversion":null,"LowQualityClicks":0,"LowQualityClicksPercent":0.0,"LowQualityImpressions":9,"LowQualityImpressionsPercent":47.37,"LowQualityConversions":0,"LowQualityConversionRate":null,"DeviceType":"Computer","ImpressionSharePercent":10.87,"ImpressionLostToBudgetPercent":17.05,"ImpressionLostToRankAggPercent":72.08,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"HistoricalQualityScore":null,"HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"PhoneImpressions":0,"PhoneCalls":0,"Ptr":null,"Network":"Syndicated search partners","Assists":0,"Revenue":0.0,"ReturnOnAdSpend":0.0,"CostPerAssist":null,"RevenuePerConversion":null,"RevenuePerAssist":null,"TrackingTemplate":null,"CustomParameters":null,"AccountStatus":"Active","LowQualityGeneralClicks":0,"LowQualitySophisticatedClicks":0,"CampaignLabels":null,"ExactMatchImpressionSharePercent":29.07,"ClickSharePercent":2.89,"AbsoluteTopImpressionSharePercent":8.88,"FinalUrlSuffix":null,"CampaignType":"Search & content","TopImpressionShareLostToRankPercent":76.51,"TopImpressionShareLostToBudgetPercent":9.99,"AbsoluteTopImpressionShareLostToRankPercent":81.99,"AbsoluteTopImpressionShareLostToBudgetPercent":9.13,"TopImpressionSharePercent":13.5,"AbsoluteTopImpressionRatePercent":50.0,"TopImpressionRatePercent":100.0,"BaseCampaignId":531016227,"AllConversions":0,"AllRevenue":0.0,"AllConversionRate":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":0.0,"AllRevenuePerConversion":null,"ViewThroughConversions":0,"AudienceImpressionSharePercent":null,"AudienceImpressionLostToRankPercent":null,"AudienceImpressionLostToBudgetPercent":null,"RelativeCtr":null,"AverageCpm":33.0,"ConversionsQualified":0.0,"LowQualityConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0,"VideoViews":0,"ViewThroughRate":0.0,"AverageCPV":null,"VideoViewsAt25Percent":0,"VideoViewsAt50Percent":0,"VideoViewsAt75Percent":0,"CompletedVideoViews":0,"VideoCompletionRate":null,"TotalWatchTimeInMS":0,"AverageWatchTimePerVideoView":null,"AverageWatchTimePerImpression":0.0,"Sales":0,"CostPerSale":null,"RevenuePerSale":null,"Installs":0,"CostPerInstall":null,"RevenuePerInstall":null},"emitted_at":1699954211223} -{"stream":"keyword_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-07","CurrencyCode":"USD","DeliveredMatchType":"Phrase","AdDistribution":"Search","DeviceType":"Computer","Language":"German","Network":"Syndicated search partners","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"HistoricalQualityScore":null,"Impressions":1,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.11,"FirstPageBid":0.48,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700167471396} -{"stream":"keyword_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-05","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","Impressions":2,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.11,"FirstPageBid":0.48,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700167573859} +{"stream":"keyword_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-07","CurrencyCode":"USD","DeliveredMatchType":"Phrase","AdDistribution":"Search","DeviceType":"Computer","Language":"German","Network":"Syndicated search partners","DeviceOS":"Unknown","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","HistoricalExpectedCtr":null,"HistoricalAdRelevance":null,"HistoricalLandingPageExperience":null,"HistoricalQualityScore":null,"Impressions":1,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.0,"FirstPageBid":0.43,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700237754157} +{"stream":"keyword_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"KeywordId":84801135055365,"Keyword":"connector","AdId":84800390693061,"TimePeriod":"2023-11-05","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","AccountName":"Airbyte","CampaignName":"Airbyte test","AdGroupName":"keywords","KeywordStatus":"Active","Impressions":2,"Clicks":0,"Ctr":0.0,"CurrentMaxCpc":2.27,"Spend":0.0,"CostPerConversion":null,"QualityScore":5.0,"ExpectedCtr":"2","AdRelevance":3.0,"LandingPageExperience":1.0,"QualityImpact":0.0,"Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"CustomParameters":null,"FinalAppUrl":null,"Mainline1Bid":null,"MainlineBid":1.0,"FirstPageBid":0.43,"FinalUrlSuffix":null,"ViewThroughConversions":0,"ViewThroughConversionsQualified":null,"AllCostPerConversion":null,"AllReturnOnAdSpend":null,"Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1700237801690} {"stream":"geographic_performance_report_daily","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"TimePeriod":"2023-11-07","Country":"Australia","CurrencyCode":"USD","DeliveredMatchType":"Broad","AdDistribution":"Search","DeviceType":"Computer","Language":"English","Network":"Syndicated search partners","DeviceOS":"Windows","TopVsOther":"Syndicated search partners - Top","BidMatchType":"Broad","MetroArea":null,"State":"New South Wales","City":null,"AdGroupName":"keywords","Ctr":0.0,"ProximityTargetLocation":null,"Radius":"0","Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"LocationType":"Physical location","MostSpecificLocation":"2000","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","County":null,"PostalCode":"2000","LocationId":"122395","BaseCampaignId":"531016227","AllCostPerConversion":null,"AllReturnOnAdSpend":null,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":0.0,"TopImpressionRatePercent":"100.00","AllConversionsQualified":"0.00","ViewThroughConversionsQualified":null,"Neighborhood":null,"ViewThroughRevenue":"0.00","CampaignType":"Search & content","AssetGroupId":null,"AssetGroupName":null,"AssetGroupStatus":null,"Clicks":0,"Spend":0.0,"Impressions":1,"CostPerConversion":null,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1699956863587} {"stream":"geographic_performance_report_weekly","data":{"AccountId":180519267,"CampaignId":531016227,"AdGroupId":1356799861840328,"TimePeriod":"2023-11-05","Country":"Argentina","CurrencyCode":"USD","DeliveredMatchType":"Exact","AdDistribution":"Search","DeviceType":"Computer","Language":"Spanish","Network":"Microsoft sites and select traffic","DeviceOS":"Windows","TopVsOther":"Microsoft sites and select traffic - top","BidMatchType":"Broad","MetroArea":null,"State":"Buenos Aires Province","City":null,"AdGroupName":"keywords","Ctr":0.0,"ProximityTargetLocation":null,"Radius":"0","Assists":0,"ReturnOnAdSpend":null,"CostPerAssist":null,"LocationType":"Physical location","MostSpecificLocation":"Buenos Aires Province","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","County":null,"PostalCode":null,"LocationId":"141965","BaseCampaignId":"531016227","AllCostPerConversion":null,"AllReturnOnAdSpend":null,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":0.0,"TopImpressionRatePercent":"100.00","AllConversionsQualified":"0.00","ViewThroughConversionsQualified":null,"Neighborhood":null,"ViewThroughRevenue":"0.00","CampaignType":"Search & content","AssetGroupId":null,"AssetGroupName":null,"AssetGroupStatus":null,"Clicks":0,"Spend":0.0,"Impressions":1,"CostPerConversion":null,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","Conversions":0.0,"ConversionRate":null,"ConversionsQualified":0.0,"AverageCpc":0.0,"AveragePosition":0.0,"AverageCpm":0.0,"AllConversions":0,"AllConversionRate":null,"AllRevenue":0.0,"AllRevenuePerConversion":null,"Revenue":0.0,"RevenuePerConversion":null,"RevenuePerAssist":null},"emitted_at":1699953673210} {"stream":"age_gender_audience_report_daily","data":{"AccountId":180519267,"AgeGroup":"Unknown","Gender":"Unknown","TimePeriod":"2023-11-07","AllConversions":0,"AccountName":"Airbyte","AccountNumber":"F149MJ18","CampaignName":"Airbyte test","CampaignId":531016227,"AdGroupName":"keywords","AdGroupId":1356799861840328,"AdDistribution":"Search","Impressions":3,"Clicks":1,"Conversions":0.0,"Spend":0.79,"Revenue":0.0,"ExtendedCost":0.0,"Assists":0,"Language":"German","AccountStatus":"Active","CampaignStatus":"Active","AdGroupStatus":"Active","BaseCampaignId":"531016227","AllRevenue":0.0,"ViewThroughConversions":0,"Goal":null,"GoalType":null,"AbsoluteTopImpressionRatePercent":33.33,"TopImpressionRatePercent":100.0,"ConversionsQualified":0.0,"AllConversionsQualified":0.0,"ViewThroughConversionsQualified":null,"ViewThroughRevenue":0.0},"emitted_at":1699954406862} diff --git a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml index 8d64cd443fa7..102607de6664 100644 --- a/airbyte-integrations/connectors/source-bing-ads/metadata.yaml +++ b/airbyte-integrations/connectors/source-bing-ads/metadata.yaml @@ -25,11 +25,9 @@ data: name: Bing Ads registries: cloud: - dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU enabled: true oss: enabled: true - dockerImageTag: 1.13.0 #https://airbytehq-team.slack.com/archives/C0662JB7XPU releaseStage: generally_available releases: breakingChanges: From 4e51b7d645fbe256bacae189612b22c1aef5cba1 Mon Sep 17 00:00:00 2001 From: Christo Grabowski <108154848+ChristoGrab@users.noreply.github.com> Date: Fri, 17 Nov 2023 12:04:54 -0500 Subject: [PATCH 39/57] Source Mailchimp: Handle empty fields in Reports stream (#32543) --- .../connectors/source-mailchimp/metadata.yaml | 2 +- .../source_mailchimp/schemas/reports.json | 2 +- .../source_mailchimp/streams.py | 25 +++++++----- .../unit_tests/test_streams.py | 38 ++++++++++++++++++- docs/integrations/sources/mailchimp.md | 1 + 5 files changed, 56 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/source-mailchimp/metadata.yaml b/airbyte-integrations/connectors/source-mailchimp/metadata.yaml index 11ff4b1bbbcc..ef06fb27bda0 100644 --- a/airbyte-integrations/connectors/source-mailchimp/metadata.yaml +++ b/airbyte-integrations/connectors/source-mailchimp/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: b03a9f3e-22a5-11eb-adc1-0242ac120002 - dockerImageTag: 0.8.2 + dockerImageTag: 0.8.3 dockerRepository: airbyte/source-mailchimp documentationUrl: https://docs.airbyte.com/integrations/sources/mailchimp githubIssueLabel: source-mailchimp diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json index fe6dcf6a599b..34e513022879 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/schemas/reports.json @@ -138,7 +138,7 @@ "description": "The number of unique opens divided by the total number of successful deliveries." }, "last_open": { - "type": "string", + "type": ["null", "string"], "format": "date-time", "title": "Last Open", "description": "The date and time of the last recorded open in ISO 8601 format." diff --git a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py index d5239e7f3ecf..27df31f5b05c 100644 --- a/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py +++ b/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/streams.py @@ -274,21 +274,28 @@ class Reports(IncrementalMailChimpStream): cursor_field = "send_time" data_field = "reports" + @staticmethod + def remove_empty_datetime_fields(record: Mapping[str, Any]) -> Mapping[str, Any]: + """ + In some cases, the 'clicks.last_click' and 'opens.last_open' fields are returned as an empty string, + which causes validation errors on the `date-time` format. + To avoid this, we remove the fields if they are empty. + """ + clicks = record.get("clicks", {}) + opens = record.get("opens", {}) + if not clicks.get("last_click"): + clicks.pop("last_click", None) + if not opens.get("last_open"): + opens.pop("last_open", None) + return record + def path(self, **kwargs) -> str: return "reports" def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - response = super().parse_response(response, **kwargs) - - # In some cases, the 'last_click' field is returned as an empty string, - # which causes validation errors on the `date-time` format. - # To avoid this, we remove the field if it is empty. for record in response: - clicks = record.get("clicks", {}) - if not clicks.get("last_click"): - clicks.pop("last_click", None) - yield record + yield self.remove_empty_datetime_fields(record) class Segments(MailChimpListSubStream): diff --git a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py index 021f50470920..094eb4fe0bf5 100644 --- a/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-mailchimp/unit_tests/test_streams.py @@ -10,7 +10,7 @@ import responses from airbyte_cdk.models import SyncMode from requests.exceptions import HTTPError -from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Segments +from source_mailchimp.streams import Campaigns, EmailActivity, ListMembers, Lists, Reports, Segments from utils import read_full_refresh, read_incremental @@ -413,3 +413,39 @@ def test_403_error_handling( # Handle non-403 error except HTTPError as e: assert e.response.status_code == status_code + +@pytest.mark.parametrize( + "record, expected_return", + [ + ( + {"clicks": {"last_click": ""}, "opens": {"last_open": ""}}, + {"clicks": {}, "opens": {}}, + ), + ( + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": ""}}, + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {}}, + ), + ( + {"clicks": {"last_click": ""}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + {"clicks": {}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + + ), + ( + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + {"clicks": {"last_click": "2023-01-01T00:00:00.000Z"}, "opens": {"last_open": "2023-01-01T00:00:00.000Z"}}, + ), + ], + ids=[ + "last_click and last_open empty", + "last_click empty", + "last_open empty", + "last_click and last_open not empty" + ] +) +def test_reports_remove_empty_datetime_fields(auth, record, expected_return): + """ + Tests that the Reports stream removes the 'clicks' and 'opens' fields from the response + when they are empty strings + """ + stream = Reports(authenticator=auth) + assert stream.remove_empty_datetime_fields(record) == expected_return, f"Expected: {expected_return}, Actual: {stream.remove_empty_datetime_fields(record)}" diff --git a/docs/integrations/sources/mailchimp.md b/docs/integrations/sources/mailchimp.md index d3d2551a8a9c..20523890da5e 100644 --- a/docs/integrations/sources/mailchimp.md +++ b/docs/integrations/sources/mailchimp.md @@ -76,6 +76,7 @@ Now that you have set up the Mailchimp source connector, check out the following | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------| +| 0.8.3 | 2023-11-15 | [32543](https://github.com/airbytehq/airbyte/pull/32543) | Handle empty datetime fields in Reports stream | | 0.8.2 | 2023-11-13 | [32466](https://github.com/airbytehq/airbyte/pull/32466) | Improve error handling during connection check | | 0.8.1 | 2023-11-06 | [32226](https://github.com/airbytehq/airbyte/pull/32226) | Unmute expected records test after data anonymisation | | 0.8.0 | 2023-11-01 | [32032](https://github.com/airbytehq/airbyte/pull/32032) | Add ListMembers stream | From 6d54a5dc725ed98666e31c2cf46fed82b1eab42f Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Fri, 17 Nov 2023 19:18:24 +0100 Subject: [PATCH 40/57] Vector DBs: Mention langchain in docs (#32501) --- docs/integrations/destinations/chroma.md | 1 + docs/integrations/destinations/milvus.md | 2 +- docs/integrations/destinations/pinecone.md | 2 +- docs/integrations/destinations/qdrant.md | 1 + docs/integrations/destinations/weaviate.md | 2 +- 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/integrations/destinations/chroma.md b/docs/integrations/destinations/chroma.md index bd7357ffc70f..3e37bebba225 100644 --- a/docs/integrations/destinations/chroma.md +++ b/docs/integrations/destinations/chroma.md @@ -56,6 +56,7 @@ Make sure your Chroma database can be accessed by Airbyte. If your database is w You should now have all the requirements needed to configure Chroma as a destination in the UI. You'll need the following information to configure the Chroma destination: - (Required) **Text fields to embed** +- (Optional) **Text splitter** Options around configuring the chunking process provided by the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). - (Required) **Fields to store as metadata** - (Required) **Collection** The name of the collection in Chroma db to store your data - (Required) Authentication method diff --git a/docs/integrations/destinations/milvus.md b/docs/integrations/destinations/milvus.md index f5de8b2e04ad..2e7c225a0b8c 100644 --- a/docs/integrations/destinations/milvus.md +++ b/docs/integrations/destinations/milvus.md @@ -37,7 +37,7 @@ You'll need the following information to configure the destination: ### Processing -Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. +Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Options around configuring the chunking process use the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. diff --git a/docs/integrations/destinations/pinecone.md b/docs/integrations/destinations/pinecone.md index 6142e7935fcc..e060cf243bf0 100644 --- a/docs/integrations/destinations/pinecone.md +++ b/docs/integrations/destinations/pinecone.md @@ -46,7 +46,7 @@ All other fields are ignored. ### Processing -Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Please note that meta data fields can only be used for filtering and not for retrieval and have to be of type string, number, boolean (all other values are ignored). Please note that there's a 40kb limit on the _total_ size of the metadata saved for each entry. +Each record will be split into text fields and meta fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Please note that meta data fields can only be used for filtering and not for retrieval and have to be of type string, number, boolean (all other values are ignored). Please note that there's a 40kb limit on the _total_ size of the metadata saved for each entry. Options around configuring the chunking process use the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. diff --git a/docs/integrations/destinations/qdrant.md b/docs/integrations/destinations/qdrant.md index 549f26ada6d6..648d0b019283 100644 --- a/docs/integrations/destinations/qdrant.md +++ b/docs/integrations/destinations/qdrant.md @@ -45,6 +45,7 @@ Make sure your Qdrant database can be accessed by Airbyte. If your database is w You should now have all the requirements needed to configure Qdrant as a destination in the UI. You'll need the following information to configure the Qdrant destination: - (Required) **Text fields to embed** +- (Optional) **Text splitter** Options around configuring the chunking process provided by the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). - (Required) **Fields to store as metadata** - (Required) **Collection** The name of the collection in Qdrant db to store your data - (Required) **The field in the payload that contains the embedded text** diff --git a/docs/integrations/destinations/weaviate.md b/docs/integrations/destinations/weaviate.md index 0d914d136619..9e36ca07fe57 100644 --- a/docs/integrations/destinations/weaviate.md +++ b/docs/integrations/destinations/weaviate.md @@ -48,7 +48,7 @@ All other fields are serialized into their JSON representation. ### Processing -Each record will be split into text fields and metadata fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Please note that metadata fields can only be used for filtering and not for retrieval and have to be of type string, number, boolean (all other values are ignored). Please note that there's a 40kb limit on the _total_ size of the metadata saved for each entry. +Each record will be split into text fields and metadata fields as configured in the "Processing" section. All text fields are concatenated into a single string and then split into chunks of configured length. If specified, the metadata fields are stored as-is along with the embedded text chunks. Options around configuring the chunking process use the [Langchain Python library](https://python.langchain.com/docs/get_started/introduction). When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. From ab1490a38cd2823e0f7233db4abf586d19547c42 Mon Sep 17 00:00:00 2001 From: Denys Davydov Date: Fri, 17 Nov 2023 20:27:32 +0200 Subject: [PATCH 41/57] :bug: Source Stripe: update availability strategy (#32638) --- .../integration_tests/expected_records.jsonl | 4 +- .../connectors/source-stripe/metadata.yaml | 2 +- .../source_stripe/availability_strategy.py | 59 ++++++- .../source_stripe/stream_helpers.py | 41 +++++ .../source-stripe/source_stripe/streams.py | 46 ++++-- .../unit_tests/test_availability_strategy.py | 152 ++++++++++++++---- docs/integrations/sources/stripe.md | 3 +- 7 files changed, 261 insertions(+), 46 deletions(-) create mode 100644 airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py diff --git a/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl index a84e541f0ecf..da9e7ed4ea18 100644 --- a/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-stripe/integration_tests/expected_records.jsonl @@ -46,7 +46,7 @@ {"stream": "products", "data": {"id": "prod_KouQ5ez86yREmB", "object": "product", "active": true, "attributes": [], "created": 1640124902, "default_price": "price_1K9GbqEcXtiJtvvhJ3lZe4i5", "description": null, "features": [], "images": [], "livemode": false, "metadata": {}, "name": "edgao-test-product", "package_dimensions": null, "shippable": null, "statement_descriptor": null, "tax_code": "txcd_10000000", "type": "service", "unit_label": null, "updated": 1696839715, "url": null}, "emitted_at": 1697627307635} {"stream": "products", "data": {"id": "prod_NHcKselSHfKdfc", "object": "product", "active": true, "attributes": [], "created": 1675345504, "default_price": "price_1MX364EcXtiJtvvhE3WgTl4O", "description": "Test Product 1 description", "features": [], "images": ["https://files.stripe.com/links/MDB8YWNjdF8xSndub2lFY1h0aUp0dnZofGZsX3Rlc3RfdjBOT09UaHRiNVl2WmJ6clNYRUlmcFFD00cCBRNHnV"], "livemode": false, "metadata": {}, "name": "Test Product 1", "package_dimensions": null, "shippable": null, "statement_descriptor": null, "tax_code": "txcd_10301000", "type": "service", "unit_label": null, "updated": 1696839789, "url": null}, "emitted_at": 1697627307877} {"stream": "products", "data": {"id": "prod_NCgx1XP2IFQyKF", "object": "product", "active": true, "attributes": [], "created": 1674209524, "default_price": null, "description": null, "features": [], "images": [], "livemode": false, "metadata": {}, "name": "tu", "package_dimensions": null, "shippable": null, "statement_descriptor": null, "tax_code": "txcd_10000000", "type": "service", "unit_label": null, "updated": 1696839225, "url": null}, "emitted_at": 1697627307879} -{"stream": "subscriptions", "data": {"id": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "object": "subscription", "application": null, "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": 1697550676.0, "billing_thresholds": null, "cancel_at": 1705499476.0, "cancel_at_period_end": false, "canceled_at": 1697550676.0, "cancellation_details": {"comment": null, "feedback": null, "reason": "cancellation_requested"}, "collection_method": "charge_automatically", "created": 1697550676, "currency": "usd", "current_period_end": 1700229076.0, "current_period_start": 1697550676, "customer": "cus_NGoTFiJFVbSsvZ", "days_until_due": null, "default_payment_method": null, "default_source": null, "default_tax_rates": [], "description": null, "discount": null, "ended_at": null, "items": {"object": "list", "data": [{"id": "si_OptSP2o3XZUBpx", "object": "subscription_item", "billing_thresholds": null, "created": 1697550677, "metadata": {}, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "price": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "recurring": {"aggregate_usage": null, "interval": "month", "interval_count": 1, "trial_period_days": null, "usage_type": "licensed"}, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "recurring", "unit_amount": 600, "unit_amount_decimal": "600"}, "quantity": 1, "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "tax_rates": []}], "has_more": false, "total_count": 1.0, "url": "/v1/subscription_items?subscription=sub_1O2Dg0EcXtiJtvvhz7Q4zS0n"}, "latest_invoice": "in_1O2Dg0EcXtiJtvvhLe87VaYL", "livemode": false, "metadata": {}, "next_pending_invoice_item_invoice": null, "on_behalf_of": null, "pause_collection": null, "payment_settings": {"payment_method_options": null, "payment_method_types": null, "save_default_payment_method": null}, "pending_invoice_item_interval": null, "pending_setup_intent": null, "pending_update": null, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "quantity": 1, "schedule": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "start_date": 1697550676, "status": "active", "test_clock": null, "transfer_data": null, "trial_end": null, "trial_settings": {"end_behavior": {"missing_payment_method": "create_invoice"}}, "trial_start": null, "updated": 1697550676}, "emitted_at": 1697627310741} +{"stream": "subscriptions", "data": {"id": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "object": "subscription", "application": null, "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": 1697550676.0, "billing_thresholds": null, "cancel_at": 1705499476.0, "cancel_at_period_end": false, "canceled_at": 1697550676.0, "cancellation_details": {"comment": null, "feedback": null, "reason": "cancellation_requested"}, "collection_method": "charge_automatically", "created": 1697550676, "currency": "usd", "current_period_end": 1702821076.0, "current_period_start": 1700229076, "customer": "cus_NGoTFiJFVbSsvZ", "days_until_due": null, "default_payment_method": null, "default_source": null, "default_tax_rates": [], "description": null, "discount": null, "ended_at": null, "items": {"object": "list", "data": [{"id": "si_OptSP2o3XZUBpx", "object": "subscription_item", "billing_thresholds": null, "created": 1697550677, "metadata": {}, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "price": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "recurring": {"aggregate_usage": null, "interval": "month", "interval_count": 1, "trial_period_days": null, "usage_type": "licensed"}, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "recurring", "unit_amount": 600, "unit_amount_decimal": "600"}, "quantity": 1, "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "tax_rates": []}], "has_more": false, "total_count": 1.0, "url": "/v1/subscription_items?subscription=sub_1O2Dg0EcXtiJtvvhz7Q4zS0n"}, "latest_invoice": "in_1ODSSHEcXtiJtvvhW5LllxDH", "livemode": false, "metadata": {}, "next_pending_invoice_item_invoice": null, "on_behalf_of": null, "pause_collection": null, "payment_settings": {"payment_method_options": null, "payment_method_types": null, "save_default_payment_method": null}, "pending_invoice_item_interval": null, "pending_setup_intent": null, "pending_update": null, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "quantity": 1, "schedule": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "start_date": 1697550676, "status": "active", "test_clock": null, "transfer_data": null, "trial_end": null, "trial_settings": {"end_behavior": {"missing_payment_method": "create_invoice"}}, "trial_start": null, "updated": 1697550676}, "emitted_at": 1700232971060} {"stream": "subscription_schedule", "data": {"id": "sub_sched_1O2Dg0EcXtiJtvvh7GtbtIhP", "object": "subscription_schedule", "application": null, "canceled_at": null, "completed_at": null, "created": 1697550676, "current_phase": {"end_date": 1705499476, "start_date": 1697550676}, "customer": "cus_NGoTFiJFVbSsvZ", "default_settings": {"application_fee_percent": null, "automatic_tax": {"enabled": false}, "billing_cycle_anchor": "automatic", "billing_thresholds": null, "collection_method": "charge_automatically", "default_payment_method": null, "default_source": null, "description": "Test Test", "invoice_settings": "{'days_until_due': None}", "on_behalf_of": null, "transfer_data": null}, "end_behavior": "cancel", "livemode": false, "metadata": {}, "phases": [{"add_invoice_items": [], "application_fee_percent": null, "automatic_tax": {"enabled": true}, "billing_cycle_anchor": null, "billing_thresholds": null, "collection_method": "charge_automatically", "coupon": null, "currency": "usd", "default_payment_method": null, "default_tax_rates": [], "description": "Test Test", "end_date": 1705499476, "invoice_settings": "{'days_until_due': None}", "items": [{"billing_thresholds": null, "metadata": {}, "plan": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "price": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "quantity": 1, "tax_rates": []}], "metadata": {}, "on_behalf_of": null, "proration_behavior": "create_prorations", "start_date": 1697550676, "transfer_data": null, "trial_end": null}], "released_at": null, "released_subscription": null, "renewal_interval": null, "status": "active", "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "test_clock": null, "updated": 1697550676}, "emitted_at": 1697627312079} {"stream": "transfers", "data": {"id": "tr_1NH18zEcXtiJtvvhnd827cNO", "object": "transfer", "amount": 10000, "amount_reversed": 0, "balance_transaction": "txn_1NH190EcXtiJtvvhBO3PeR7p", "created": 1686301085, "currency": "usd", "description": null, "destination": "acct_1Jx8unEYmRTj5on1", "destination_payment": "py_1NH18zEYmRTj5on1GkCCsqLK", "livemode": false, "metadata": {}, "reversals": {"object": "list", "data": [], "has_more": false, "total_count": 0.0, "url": "/v1/transfers/tr_1NH18zEcXtiJtvvhnd827cNO/reversals"}, "reversed": false, "source_transaction": null, "source_type": "card", "transfer_group": null, "updated": 1686301085}, "emitted_at": 1697627313262} {"stream": "transfers", "data": {"id": "tr_1NGoaCEcXtiJtvvhjmHtOGOm", "object": "transfer", "amount": 100, "amount_reversed": 100, "balance_transaction": "txn_1NGoaDEcXtiJtvvhsZrNMsdJ", "created": 1686252800, "currency": "usd", "description": null, "destination": "acct_1Jx8unEYmRTj5on1", "destination_payment": "py_1NGoaCEYmRTj5on1LAlAIG3a", "livemode": false, "metadata": {}, "reversals": {"object": "list", "data": [{"id": "trr_1NGolCEcXtiJtvvhOYPck3CP", "object": "transfer_reversal", "amount": 100, "balance_transaction": "txn_1NGolCEcXtiJtvvhZRy4Kd5S", "created": 1686253482, "currency": "usd", "destination_payment_refund": "pyr_1NGolBEYmRTj5on1STal3rmp", "metadata": {}, "source_refund": null, "transfer": "tr_1NGoaCEcXtiJtvvhjmHtOGOm"}], "has_more": false, "total_count": 1.0, "url": "/v1/transfers/tr_1NGoaCEcXtiJtvvhjmHtOGOm/reversals"}, "reversed": true, "source_transaction": null, "source_type": "card", "transfer_group": "ORDER10", "updated": 1686252800}, "emitted_at": 1697627313264} @@ -69,4 +69,4 @@ {"stream": "invoice_line_items", "data": {"id": "il_1MX2yfEcXtiJtvvhiunY2j1x", "object": "line_item", "amount": 25200, "amount_excluding_tax": 25200, "currency": "usd", "description": "edgao-test-product", "discount_amounts": [{"amount": 2520, "discount": "di_1MX2ysEcXtiJtvvh8ORqRVKm"}], "discountable": true, "discounts": ["di_1MX2ysEcXtiJtvvh8ORqRVKm"], "invoice_item": "ii_1MX2yfEcXtiJtvvhfhyOG7SP", "livemode": false, "metadata": {}, "period": {"end": 1675345045, "start": 1675345045}, "plan": null, "price": {"id": "price_1K9GbqEcXtiJtvvhJ3lZe4i5", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1640124902, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_KouQ5ez86yREmB", "recurring": null, "tax_behavior": "inclusive", "tiers_mode": null, "transform_quantity": null, "type": "one_time", "unit_amount": 12600, "unit_amount_decimal": "12600"}, "proration": false, "proration_details": {"credited_items": null}, "quantity": 2, "subscription": null, "tax_amounts": [{"amount": 0, "inclusive": true, "tax_rate": "txr_1MX2yfEcXtiJtvvhVcMEMTRj", "taxability_reason": "not_collecting", "taxable_amount": 0}], "tax_rates": [], "type": "invoiceitem", "unit_amount_excluding_tax": "12600", "invoice_id": "in_1MX2yFEcXtiJtvvhMXhUCgKx"}, "emitted_at": 1697627336449} {"stream": "subscription_items", "data": {"id": "si_OptSP2o3XZUBpx", "object": "subscription_item", "billing_thresholds": null, "created": 1697550677, "metadata": {}, "plan": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "plan", "active": true, "aggregate_usage": null, "amount": 600, "amount_decimal": "600", "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "interval": "month", "interval_count": 1, "livemode": false, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "tiers_mode": null, "transform_usage": null, "trial_period_days": null, "usage_type": "licensed"}, "price": {"id": "price_1MSHZoEcXtiJtvvh6O8TYD8T", "object": "price", "active": true, "billing_scheme": "per_unit", "created": 1674209524, "currency": "usd", "custom_unit_amount": null, "livemode": false, "lookup_key": null, "metadata": {}, "nickname": null, "product": "prod_NCgx1XP2IFQyKF", "recurring": {"aggregate_usage": null, "interval": "month", "interval_count": 1, "trial_period_days": null, "usage_type": "licensed"}, "tax_behavior": "exclusive", "tiers_mode": null, "transform_quantity": null, "type": "recurring", "unit_amount": 600, "unit_amount_decimal": "600"}, "quantity": 1, "subscription": "sub_1O2Dg0EcXtiJtvvhz7Q4zS0n", "tax_rates": []}, "emitted_at": 1697627337431} {"stream": "transfer_reversals", "data": {"id": "trr_1NGolCEcXtiJtvvhOYPck3CP", "object": "transfer_reversal", "amount": 100, "balance_transaction": "txn_1NGolCEcXtiJtvvhZRy4Kd5S", "created": 1686253482, "currency": "usd", "destination_payment_refund": "pyr_1NGolBEYmRTj5on1STal3rmp", "metadata": {}, "source_refund": null, "transfer": "tr_1NGoaCEcXtiJtvvhjmHtOGOm"}, "emitted_at": 1697627338960} -{"stream": "usage_records", "data": {"id": "sis_1O4gIOEcXtiJtvvhmsoeBHkP", "object": "usage_record_summary", "invoice": null, "livemode": false, "period": {"end": null, "start": 1697550676}, "subscription_item": "si_OptSP2o3XZUBpx", "total_usage": 1}, "emitted_at": 1697627340175} +{"stream": "usage_records", "data": {"id": "sis_1ODTdwEcXtiJtvvhZChEVsbN", "object": "usage_record_summary", "invoice": null, "livemode": false, "period": {"end": null, "start": 1700229076}, "subscription_item": "si_OptSP2o3XZUBpx", "total_usage": 1}, "emitted_at": 1700233660884} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-stripe/metadata.yaml b/airbyte-integrations/connectors/source-stripe/metadata.yaml index f521fb172278..11fc73a40128 100644 --- a/airbyte-integrations/connectors/source-stripe/metadata.yaml +++ b/airbyte-integrations/connectors/source-stripe/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: e094cb9a-26de-4645-8761-65c0c425d1de - dockerImageTag: 5.0.0 + dockerImageTag: 5.0.1 dockerRepository: airbyte/source-stripe documentationUrl: https://docs.airbyte.com/integrations/sources/stripe githubIssueLabel: source-stripe diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py b/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py index 9906c21a525a..6226ffc12ea9 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/availability_strategy.py @@ -3,13 +3,16 @@ # import logging -from typing import Optional, Tuple +from typing import Any, Mapping, Optional, Tuple +from airbyte_cdk.models import SyncMode from airbyte_cdk.sources import Source from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy from requests import HTTPError +from .stream_helpers import get_first_record_for_slice, get_first_stream_slice + STRIPE_ERROR_CODES = { "more_permissions_required": "This is most likely due to insufficient permissions on the credentials in use. " "Try to grant required permissions/scopes or re-authenticate", @@ -20,6 +23,60 @@ class StripeAvailabilityStrategy(HttpAvailabilityStrategy): + def _check_availability_for_sync_mode( + self, + stream: Stream, + sync_mode: SyncMode, + logger: logging.Logger, + source: Optional["Source"], + stream_state: Optional[Mapping[str, Any]], + ) -> Tuple[bool, Optional[str]]: + try: + # Some streams need a stream slice to read records (e.g. if they have a SubstreamPartitionRouter) + # Streams that don't need a stream slice will return `None` as their first stream slice. + stream_slice = get_first_stream_slice(stream, sync_mode, stream_state) + except StopIteration: + # If stream_slices has no `next()` item (Note - this is different from stream_slices returning [None]!) + # This can happen when a substream's `stream_slices` method does a `for record in parent_records: yield ` + # without accounting for the case in which the parent stream is empty. + reason = f"Cannot attempt to connect to stream {stream.name} - no stream slices were found, likely because the parent stream is empty." + return False, reason + except HTTPError as error: + is_available, reason = self.handle_http_error(stream, logger, source, error) + if not is_available: + reason = f"Unable to get slices for {stream.name} stream, because of error in parent stream. {reason}" + return is_available, reason + + try: + get_first_record_for_slice(stream, sync_mode, stream_slice, stream_state) + return True, None + except StopIteration: + logger.info(f"Successfully connected to stream {stream.name}, but got 0 records.") + return True, None + except HTTPError as error: + is_available, reason = self.handle_http_error(stream, logger, source, error) + if not is_available: + reason = f"Unable to read {stream.name} stream. {reason}" + return is_available, reason + + def check_availability(self, stream: Stream, logger: logging.Logger, source: Optional["Source"]) -> Tuple[bool, Optional[str]]: + """ + Check stream availability by attempting to read the first record of the + stream. + + :param stream: stream + :param logger: source logger + :param source: (optional) source + :return: A tuple of (boolean, str). If boolean is true, then the stream + is available, and no str is required. Otherwise, the stream is unavailable + for some reason and the str should describe what went wrong and how to + resolve the unavailability, if possible. + """ + is_available, reason = self._check_availability_for_sync_mode(stream, SyncMode.full_refresh, logger, source, None) + if not is_available or not stream.supports_incremental: + return is_available, reason + return self._check_availability_for_sync_mode(stream, SyncMode.incremental, logger, source, {stream.cursor_field: 0}) + def handle_http_error( self, stream: Stream, logger: logging.Logger, source: Optional["Source"], error: HTTPError ) -> Tuple[bool, Optional[str]]: diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py b/airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py new file mode 100644 index 000000000000..dad073ae485b --- /dev/null +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/stream_helpers.py @@ -0,0 +1,41 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Mapping, Optional + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams.core import Stream, StreamData + + +def get_first_stream_slice(stream, sync_mode, stream_state) -> Optional[Mapping[str, Any]]: + """ + Gets the first stream_slice from a given stream's stream_slices. + :param stream: stream + :param sync_mode: sync_mode + :param stream_state: stream_state + :raises StopIteration: if there is no first slice to return (the stream_slices generator is empty) + :return: first stream slice from 'stream_slices' generator (`None` is a valid stream slice) + """ + # We wrap the return output of stream_slices() because some implementations return types that are iterable, + # but not iterators such as lists or tuples + slices = iter(stream.stream_slices(sync_mode=sync_mode, cursor_field=stream.cursor_field, stream_state=stream_state)) + return next(slices) + + +def get_first_record_for_slice( + stream: Stream, sync_mode: SyncMode, stream_slice: Optional[Mapping[str, Any]], stream_state: Optional[Mapping[str, Any]] +) -> StreamData: + """ + Gets the first record for a stream_slice of a stream. + :param stream: stream + :param sync_mode: sync_mode + :param stream_slice: stream_slice + :param stream_state: stream_state + :raises StopIteration: if there is no first record to return (the read_records generator is empty) + :return: StreamData containing the first record in the slice + """ + # We wrap the return output of read_records() because some implementations return types that are iterable, + # but not iterators such as lists or tuples + records_for_slice = iter(stream.read_records(sync_mode=sync_mode, stream_slice=stream_slice, stream_state=stream_state)) + return next(records_for_slice) diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py index a8b7feaef07f..f47f34d26bc6 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py @@ -15,6 +15,7 @@ from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream +from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer from source_stripe.availability_strategy import StripeAvailabilityStrategy, StripeSubStreamAvailabilityStrategy @@ -491,12 +492,9 @@ class CustomerBalanceTransactions(StripeStream): API docs: https://stripe.com/docs/api/customer_balance_transactions/list """ - def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): - return f"customers/{stream_slice['id']}/balance_transactions" - - @property - def customers(self) -> IncrementalStripeStream: - return IncrementalStripeStream( + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.parent = IncrementalStripeStream( name="customers", path="customers", use_cache=USE_CACHE, @@ -506,13 +504,19 @@ def customers(self) -> IncrementalStripeStream: start_date=self.start_date, ) + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): + return f"customers/{stream_slice['id']}/balance_transactions" + + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() + def stream_slices( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: - parent_stream = self.customers - slices = parent_stream.stream_slices(sync_mode=SyncMode.full_refresh) + slices = self.parent.stream_slices(sync_mode=SyncMode.full_refresh) for _slice in slices: - for customer in parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice): + for customer in self.parent.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice): # we use `get` here because some attributes may not be returned by some API versions if customer.get("next_invoice_sequence") == 1 and customer.get("balance") == 0: # We're making this check in order to speed up a sync. if a customer's balance is 0 and there are no @@ -547,6 +551,12 @@ def __init__(self, **kwargs): def path(self, **kwargs) -> str: return "setup_attempts" + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + # we use the default http availability strategy here because parent stream may lack data in the incremental stream mode + # and this stream would be marked inaccessible which is not actually true + return HttpAvailabilityStrategy() + def stream_slices( self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: @@ -586,6 +596,10 @@ def __init__(self, *args, **kwargs): parent = StripeStream(*args, name="accounts", path="accounts", use_cache=USE_CACHE, **kwargs) super().__init__(*args, parent=parent, **kwargs) + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() + def path(self, stream_slice: Mapping[str, Any] = None, **kwargs): return f"accounts/{stream_slice['parent']['id']}/persons" @@ -597,7 +611,9 @@ def stream_slices( class StripeSubStream(StripeStream, HttpSubStream): - pass + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + return StripeSubStreamAvailabilityStrategy() class StripeLazySubStream(StripeStream, HttpSubStream): @@ -800,10 +816,6 @@ def stream_slices( def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: return {self.cursor_field: max(current_stream_state.get(self.cursor_field, 0), latest_record[self.cursor_field])} - @property - def availability_strategy(self) -> Optional[AvailabilityStrategy]: - return StripeSubStreamAvailabilityStrategy() - @property def raise_on_http_errors(self) -> bool: return False @@ -822,3 +834,9 @@ def parse_response(self, response: requests.Response, *args, **kwargs) -> Iterab ) return [] response.raise_for_status() + + @property + def availability_strategy(self) -> Optional[AvailabilityStrategy]: + # we use the default http availability strategy here because parent stream may lack data in the incremental stream mode + # and this stream would be marked inaccessible which is not actually true + return HttpAvailabilityStrategy() diff --git a/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py b/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py index 0f747acac434..ee41b71dd049 100644 --- a/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py +++ b/airbyte-integrations/connectors/source-stripe/unit_tests/test_availability_strategy.py @@ -3,34 +3,45 @@ # import logging +import urllib.parse +import pytest from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy from source_stripe.availability_strategy import STRIPE_ERROR_CODES, StripeSubStreamAvailabilityStrategy from source_stripe.streams import IncrementalStripeStream, StripeLazySubStream -def test_traverse_over_substreams(mocker): +@pytest.fixture() +def stream_mock(mocker): + def _mocker(): + return mocker.Mock(stream_slices=mocker.Mock(return_value=[{}]), read_records=mocker.Mock(return_value=[{}])) + return _mocker + + +def test_traverse_over_substreams(stream_mock, mocker): # Mock base HttpAvailabilityStrategy to capture all the check_availability method calls - check_availability_mock = mocker.MagicMock() - check_availability_mock.return_value = (True, None) + check_availability_mock = mocker.MagicMock(return_value=(True, None)) + cdk_check_availability_mock = mocker.MagicMock(return_value=(True, None)) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock + ) + mocker.patch( + "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", cdk_check_availability_mock ) - # Prepare tree of nested objects - root = mocker.Mock() + root = stream_mock() root.availability_strategy = HttpAvailabilityStrategy() root.parent = None - child_1 = mocker.Mock() + child_1 = stream_mock() child_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1.parent = root - child_1_1 = mocker.Mock() + child_1_1 = stream_mock() child_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1.parent = child_1 - child_1_1_1 = mocker.Mock() + child_1_1_1 = stream_mock() child_1_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1_1.parent = child_1_1 @@ -38,39 +49,38 @@ def test_traverse_over_substreams(mocker): is_available, reason = child_1_1_1.availability_strategy.check_availability(child_1_1_1, mocker.Mock(), mocker.Mock()) assert is_available and reason is None - # Check availability strategy was called once for every nested object - assert check_availability_mock.call_count == 4 + assert check_availability_mock.call_count == 3 + assert cdk_check_availability_mock.call_count == 1 # Check each availability strategy was called with proper instance argument - assert id(check_availability_mock.call_args_list[0].args[0]) == id(root) - assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1) - assert id(check_availability_mock.call_args_list[2].args[0]) == id(child_1_1) - assert id(check_availability_mock.call_args_list[3].args[0]) == id(child_1_1_1) + assert id(cdk_check_availability_mock.call_args_list[0].args[0]) == id(root) + assert id(check_availability_mock.call_args_list[0].args[0]) == id(child_1) + assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1_1) + assert id(check_availability_mock.call_args_list[2].args[0]) == id(child_1_1_1) -def test_traverse_over_substreams_failure(mocker): +def test_traverse_over_substreams_failure(stream_mock, mocker): # Mock base HttpAvailabilityStrategy to capture all the check_availability method calls - check_availability_mock = mocker.MagicMock() - check_availability_mock.side_effect = [(True, None), (False, "child_1")] + check_availability_mock = mocker.MagicMock(side_effect=[(True, None), (False, "child_1")]) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock ) # Prepare tree of nested objects - root = mocker.Mock() + root = stream_mock() root.availability_strategy = HttpAvailabilityStrategy() root.parent = None - child_1 = mocker.Mock() + child_1 = stream_mock() child_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1.parent = root - child_1_1 = mocker.Mock() + child_1_1 = stream_mock() child_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1.parent = child_1 - child_1_1_1 = mocker.Mock() + child_1_1_1 = stream_mock() child_1_1_1.availability_strategy = StripeSubStreamAvailabilityStrategy() child_1_1_1.parent = child_1_1 @@ -83,15 +93,15 @@ def test_traverse_over_substreams_failure(mocker): assert check_availability_mock.call_count == 2 # Check each availability strategy was called with proper instance argument - assert id(check_availability_mock.call_args_list[0].args[0]) == id(root) - assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1) + assert id(check_availability_mock.call_args_list[0].args[0]) == id(child_1) + assert id(check_availability_mock.call_args_list[1].args[0]) == id(child_1_1) def test_substream_availability(mocker, stream_by_name): check_availability_mock = mocker.MagicMock() check_availability_mock.return_value = (True, None) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock ) stream = stream_by_name("invoice_line_items") is_available, reason = stream.availability_strategy.check_availability(stream, mocker.Mock(), mocker.Mock()) @@ -106,7 +116,7 @@ def test_substream_availability_no_parent(mocker, stream_by_name): check_availability_mock = mocker.MagicMock() check_availability_mock.return_value = (True, None) mocker.patch( - "airbyte_cdk.sources.streams.http.availability_strategy.HttpAvailabilityStrategy.check_availability", check_availability_mock + "source_stripe.availability_strategy.StripeAvailabilityStrategy.check_availability", check_availability_mock ) stream = stream_by_name("invoice_line_items") stream.parent = None @@ -125,3 +135,91 @@ def test_403_error_handling(stream_by_name, requests_mock): available, message = stream.check_availability(logger) assert not available assert STRIPE_ERROR_CODES[error_code] in message + + +@pytest.mark.parametrize( + "stream_name, endpoints, expected_calls", + ( + ( + "accounts", + { + "/v1/accounts": {"data": []} + }, + 1 + ), + ( + "refunds", + { + "/v1/refunds": {"data": []} + }, + 2 + ), + ( + "credit_notes", + { + "/v1/credit_notes": {"data": []}, "/v1/events": {"data": []} + }, + 2 + ), + ( + "charges", + { + "/v1/charges": {"data": []}, "/v1/events": {"data": []} + }, + 2 + ), + ( + "subscription_items", + { + "/v1/subscriptions": {"data": [{"id": 1}]}, + "/v1/events": {"data": []} + }, + 3 + ), + ( + "bank_accounts", + { + "/v1/customers": {"data": [{"id": 1}]}, + "/v1/events": {"data": []} + }, + 2 + ), + ( + "customer_balance_transactions", + { + "/v1/events": {"data": [{"data":{"object": {"id": 1}}, "created": 1, "type": "customer.updated"}]}, + "/v1/customers": {"data": [{"id": 1}]}, + "/v1/customers/1/balance_transactions": {"data": []} + }, + 4 + ), + ( + "transfer_reversals", + { + "/v1/transfers": {"data": [{"id": 1}]}, + "/v1/events": {"data": [{"data":{"object": {"id": 1}}, "created": 1, "type": "transfer.updated"}]}, + "/v1/transfers/1/reversals": {"data": []} + }, + 4 + ), + ( + "persons", + { + "/v1/accounts": {"data": [{"id": 1}]}, + "/v1/events": {"data": []}, + "/v1/accounts/1/persons": {"data": []} + }, + 4 + ) + ) +) +def test_availability_strategy_visits_endpoints(stream_by_name, stream_name, endpoints, expected_calls, requests_mock, mocker, config): + for endpoint, data in endpoints.items(): + requests_mock.get(endpoint, json=data) + stream = stream_by_name(stream_name, config) + is_available, reason = stream.check_availability(mocker.Mock(), mocker.Mock()) + assert (is_available, reason) == (True, None) + assert len(requests_mock.request_history) == expected_calls + + for call in requests_mock.request_history: + assert urllib.parse.urlparse(call.url).path in endpoints.keys() diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index 3f949c6847e6..c873eae04833 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -216,7 +216,8 @@ Each record is marked with `is_deleted` flag when the appropriate event happens | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 5.0.0 | 2023-11-14 | [32286](https://github.com/airbytehq/airbyte/pull/32286/) | Fix multiple issues regarding usage of the incremental sync mode for the `Refunds`, `CheckoutSessions`, `CheckoutSessionsLineItems` streams. Fix schemas for the streams: `Invoices`, `Subscriptions`, `SubscriptionSchedule` | +| 5.0.1 | 2023-11-17 | [32638](https://github.com/airbytehq/airbyte/pull/32638/) | Availability stretegy: check availability of both endpoints (if applicable) - common API + events API | +| 5.0.0 | 2023-11-16 | [32286](https://github.com/airbytehq/airbyte/pull/32286/) | Fix multiple issues regarding usage of the incremental sync mode for the `Refunds`, `CheckoutSessions`, `CheckoutSessionsLineItems` streams. Fix schemas for the streams: `Invoices`, `Subscriptions`, `SubscriptionSchedule` | | 4.5.4 | 2023-11-16 | [32284](https://github.com/airbytehq/airbyte/pull/32284/) | Enable client-side rate limiting | | 4.5.3 | 2023-11-14 | [32473](https://github.com/airbytehq/airbyte/pull/32473/) | Have all full_refresh stream syncs be concurrent | | 4.5.2 | 2023-11-03 | [32146](https://github.com/airbytehq/airbyte/pull/32146/) | Fix multiple BankAccount issues | From 428a68167fffa1b94c9e5e04346a854071e77401 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Fri, 17 Nov 2023 10:48:03 -0800 Subject: [PATCH 42/57] JDBC sources: improve timestamptz handling (#32616) Co-authored-by: edgao --- .../source/redshift/RedshiftSource.java | 12 ++-- .../redshift/RedshiftSourceOperations.java | 25 +++++++ .../sources/RedshiftSourceOperationsTest.java | 69 +++++++++++++++++++ docs/integrations/sources/redshift.md | 1 + 4 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java diff --git a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java index e7258bc07e8b..d80a2558ef1b 100644 --- a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java +++ b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSource.java @@ -47,10 +47,7 @@ public JsonNode toDatabaseConfig(final JsonNode redshiftConfig) { final ImmutableMap.Builder builder = ImmutableMap.builder() .put(JdbcUtils.USERNAME_KEY, redshiftConfig.get(JdbcUtils.USERNAME_KEY).asText()) .put(JdbcUtils.PASSWORD_KEY, redshiftConfig.get(JdbcUtils.PASSWORD_KEY).asText()) - .put(JdbcUtils.JDBC_URL_KEY, String.format(DatabaseDriver.REDSHIFT.getUrlFormatString(), - redshiftConfig.get(JdbcUtils.HOST_KEY).asText(), - redshiftConfig.get(JdbcUtils.PORT_KEY).asInt(), - redshiftConfig.get(JdbcUtils.DATABASE_KEY).asText())); + .put(JdbcUtils.JDBC_URL_KEY, getJdbcUrl(redshiftConfig)); if (redshiftConfig.has(JdbcUtils.SCHEMAS_KEY) && redshiftConfig.get(JdbcUtils.SCHEMAS_KEY).isArray()) { schemas = new ArrayList<>(); @@ -75,6 +72,13 @@ public JsonNode toDatabaseConfig(final JsonNode redshiftConfig) { .build()); } + public static String getJdbcUrl(final JsonNode redshiftConfig) { + return String.format(DatabaseDriver.REDSHIFT.getUrlFormatString(), + redshiftConfig.get(JdbcUtils.HOST_KEY).asText(), + redshiftConfig.get(JdbcUtils.PORT_KEY).asInt(), + redshiftConfig.get(JdbcUtils.DATABASE_KEY).asText()); + } + private void addSsl(final List additionalProperties) { additionalProperties.add("ssl=true"); additionalProperties.add("sslfactory=com.amazon.redshift.ssl.NonValidatingFactory"); diff --git a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java index 487fe6da1c29..2f3b9f169ee3 100644 --- a/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java +++ b/airbyte-integrations/connectors/source-redshift/src/main/java/io/airbyte/integrations/source/redshift/RedshiftSourceOperations.java @@ -14,6 +14,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; +import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; import org.slf4j.Logger; @@ -23,6 +24,19 @@ public class RedshiftSourceOperations extends JdbcSourceOperations { private static final Logger LOGGER = LoggerFactory.getLogger(RedshiftSourceOperations.class); + @Override + public void copyToJsonField(final ResultSet resultSet, final int colIndex, final ObjectNode json) throws SQLException { + if ("timestamptz".equalsIgnoreCase(resultSet.getMetaData().getColumnTypeName(colIndex))) { + // Massive hack. Sometimes the JDBCType is TIMESTAMP (i.e. without timezone) + // even though it _should_ be TIMESTAMP_WITH_TIMEZONE. + // Check for this case explicitly. + final String columnName = resultSet.getMetaData().getColumnName(colIndex); + putTimestampWithTimezone(json, columnName, resultSet, colIndex); + } else { + super.copyToJsonField(resultSet, colIndex, json); + } + } + @Override protected void putTime(final ObjectNode node, final String columnName, @@ -44,6 +58,17 @@ protected void setTimestamp(final PreparedStatement preparedStatement, final int preparedStatement.setTimestamp(parameterIndex, Timestamp.valueOf(date)); } + @Override + protected void putTimestampWithTimezone(final ObjectNode node, final String columnName, final ResultSet resultSet, final int index) + throws SQLException { + try { + super.putTimestampWithTimezone(node, columnName, resultSet, index); + } catch (final Exception e) { + final Instant instant = resultSet.getTimestamp(index).toInstant(); + node.put(columnName, instant.toString()); + } + } + @Override protected void setDate(final PreparedStatement preparedStatement, final int parameterIndex, final String value) throws SQLException { final LocalDate date = LocalDate.parse(value); diff --git a/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java new file mode 100644 index 000000000000..856f6e9d1e6e --- /dev/null +++ b/airbyte-integrations/connectors/source-redshift/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/RedshiftSourceOperationsTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.io.airbyte.integration_tests.sources; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.source.jdbc.JdbcDataSourceUtils; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.redshift.RedshiftSource; +import io.airbyte.integrations.source.redshift.RedshiftSourceOperations; +import java.nio.file.Path; +import java.sql.SQLException; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.util.List; +import javax.sql.DataSource; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class RedshiftSourceOperationsTest { + + private JdbcDatabase database; + + @BeforeEach + void setup() { + final JsonNode config = Jsons.deserialize(IOs.readFile(Path.of("secrets/config.json"))); + + final DataSource dataSource = DataSourceFactory.create( + config.get("username").asText(), + config.get("password").asText(), + DatabaseDriver.REDSHIFT.getDriverClassName(), + RedshiftSource.getJdbcUrl(config), + JdbcDataSourceUtils.getConnectionProperties(config)); + database = new DefaultJdbcDatabase(dataSource, new RedshiftSourceOperations()); + } + + @Test + void testTimestampWithTimezone() throws SQLException { + // CURRENT_TIMESTAMP is converted to a string by queryJsons. + // CAST(CURRENT_TIMESTAMP AS VARCHAR) does the timestamp -> string conversion on the server side. + // If queryJsons is implemented correctly, both timestamps should be the same. + final List result = database.queryJsons("SELECT CURRENT_TIMESTAMP, CAST(CURRENT_TIMESTAMP AS VARCHAR)"); + + final Instant clientSideParse = Instant.parse(result.get(0).get("timestamptz").asText()); + // Redshift's default timestamp format is "2023-11-17 17:50:36.746606+00", which Instant.parse() + // can't handle. Build a custom datetime formatter. + // (Redshift supports server-side timestamp formatting, but it doesn't provide a way to force + // HH:MM offsets, which are required by Instant.parse) + final Instant serverSideParse = new DateTimeFormatterBuilder() + .append(DateTimeFormatter.ISO_DATE) + .appendLiteral(' ') + .append(DateTimeFormatter.ISO_LOCAL_TIME) + // "X" represents a +/-HH offset + .appendPattern("X") + .toFormatter() + .parse(result.get(0).get("varchar").asText(), Instant::from); + assertEquals(serverSideParse, clientSideParse); + } + +} diff --git a/docs/integrations/sources/redshift.md b/docs/integrations/sources/redshift.md index dafe396d2684..f7d84b6e06d2 100644 --- a/docs/integrations/sources/redshift.md +++ b/docs/integrations/sources/redshift.md @@ -56,6 +56,7 @@ All Redshift connections are encrypted using SSL | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------- | +| (none) | 2023-11-17 | [32616](https://github.com/airbytehq/airbyte/pull/32616) | Improve timestamptz handling | | 0.4.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | | 0.3.17 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | | 0.3.16 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources | From b7afdbe65089f680b902ff6f75a78715dde78a35 Mon Sep 17 00:00:00 2001 From: Alexandre Cuoci Date: Fri, 17 Nov 2023 15:06:51 -0500 Subject: [PATCH 43/57] Update implementation-guide.md (#32625) --- docs/enterprise-setup/self-managed/README.md | 16 ++++++++++++---- .../self-managed/implementation-guide.md | 4 ++-- docs/enterprise-setup/self-managed/sso.md | 1 - docusaurus/sidebars.js | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/enterprise-setup/self-managed/README.md b/docs/enterprise-setup/self-managed/README.md index 30d1e7e05598..21d5fedf047d 100644 --- a/docs/enterprise-setup/self-managed/README.md +++ b/docs/enterprise-setup/self-managed/README.md @@ -1,9 +1,17 @@ # Airbyte Self-Managed -[Airbyte Self-Managed](https://airbyte.com/solutions/airbyte-enterprise) is a self-hosted version of Airbyte with additional features for enterprise customers. Airbyte Enterprise is in an early access stage for select priority users. +[Airbyte Self-Managed](https://airbyte.com/product/airbyte-enterprise) is the best way to run Airbyte yourself. You get all 300+ pre-built connectors, data never leaves your environment, and Airbyte becomes self-serve in your organization with new tools to manage multiple users, and multiple teams using Airbyte all in one place. -A valid license key is required to get started with Airbyte Enterprise. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. +A valid license key is required to get started with Airbyte Self-Managed. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. The following pages outline how to: -1. [Deploy Airbyte Enterprise using Kubernetes](./implementation-guide.md) -2. [Configure Okta for Single Sign-On (SSO) with Airbyte Enterprise](./sso.md) +1. [Deploy Airbyte Self-Managed using Kubernetes](./implementation-guide.md) +2. [Configure Okta for Single Sign-On (SSO) with Airbyte Self-Managed](./sso.md) + +| Feature | Description | +|---------------------------|--------------------------------------------------------------------------------------------------------------| +| Premium Support | [Priority assistance](https://docs.airbyte.com/operator-guides/contact-support/#airbyte-enterprise-self-hosted-support) with deploying, managing and upgrading Airbyte or troubleshooting any connection issues. | +| User Management | [Okta SSO](./sso.md) to extend each Airbyte workspace to multiple users | +| Multiple Workspaces | Ability to create + manage multiple workspaces on one Airbyte instance | +| Role-Based Access Control | Isolate workspaces from one another with users roles scoped to individual workspaces | + diff --git a/docs/enterprise-setup/self-managed/implementation-guide.md b/docs/enterprise-setup/self-managed/implementation-guide.md index 3b5c2da9c5d2..882a024436bb 100644 --- a/docs/enterprise-setup/self-managed/implementation-guide.md +++ b/docs/enterprise-setup/self-managed/implementation-guide.md @@ -60,7 +60,7 @@ cp configs/airbyte.sample.yml configs/airbyte.yml 3. Add your Airbyte Enterprise license key to your `airbyte.yml`. -4. Add your [auth details](/airbyte-enterprise#single-sign-on-sso) to your `airbyte.yml`. Auth configurations aren't easy to modify after Airbyte is installed, so please double check them to make sure they're accurate before proceeding. +4. Add your [auth details](/enterprise-setup/self-managed/sso) to your `airbyte.yml`. Auth configurations aren't easy to modify after Airbyte is installed, so please double check them to make sure they're accurate before proceeding.
Configuring auth in your airbyte.yml file @@ -100,4 +100,4 @@ After specifying your own configuration, run the following command: ```text ./tools/bin/install_airbyte_pro_on_helm.sh --values path/to/values.yaml $RELEASE_NAME airbyte/airbyte -``` \ No newline at end of file +``` diff --git a/docs/enterprise-setup/self-managed/sso.md b/docs/enterprise-setup/self-managed/sso.md index a7295e60ecf5..55d7053736f7 100644 --- a/docs/enterprise-setup/self-managed/sso.md +++ b/docs/enterprise-setup/self-managed/sso.md @@ -46,7 +46,6 @@ _Example values_ `` should point to where your Airbyte instance will be available, including the http/https protocol. - ## Deploying Airbyte Self-Managed with Okta Once your Okta app is set up, you're ready to deploy Airbyte with SSO. Take note of the following configuration values, as you will need them to configure Airbyte to use your new Okta SSO app integration: diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js index 61e5dde2146a..55f4497d1e22 100644 --- a/docusaurus/sidebars.js +++ b/docusaurus/sidebars.js @@ -420,7 +420,7 @@ const deployAirbyte = { const airbyteSelfManaged = { type: "category", - label: "Airbyte Self Managed", + label: "Airbyte Self-Managed", link: { type: "doc", id: "enterprise-setup/self-managed/README", From 8a678fb931fb524fa4150a0babd72a12e1e31039 Mon Sep 17 00:00:00 2001 From: midavadim Date: Fri, 17 Nov 2023 22:44:01 +0200 Subject: [PATCH 44/57] :tada: Source Pinterest add custom resports (#32601) Co-authored-by: midavadim --- .../acceptance-test-config.yml | 3 +- .../config_custom_report.json | 18 ++ .../configured_catalog_custom_report.json | 15 ++ .../integration_tests/expected_records.jsonl | 3 +- .../connectors/source-pinterest/metadata.yaml | 2 +- .../source_pinterest/reports/reports.py | 51 +++++ .../source_pinterest/source.py | 55 ++++- .../source_pinterest/spec.json | 214 ++++++++++++++++++ .../unit_tests/test_reports.py | 61 +++++ .../unit_tests/test_source.py | 2 +- docs/integrations/sources/pinterest.md | 53 ++--- 11 files changed, 438 insertions(+), 39 deletions(-) create mode 100644 airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json create mode 100644 airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json diff --git a/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml b/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml index 768d1d550502..4eab013a5fad 100644 --- a/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-pinterest/acceptance-test-config.yml @@ -5,7 +5,8 @@ acceptance_tests: tests: - spec_path: source_pinterest/spec.json backward_compatibility_tests_config: - disable_for_version: "0.7.0" # removed non-working token based auth method + disable_for_version: "0.7.3" # added custom report + # disable_for_version: "0.7.0" # removed non-working token based auth method # disable_for_version: "0.5.0" # Add Pattern for "start_date" connection: tests: diff --git a/airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json b/airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json new file mode 100644 index 000000000000..c8991f049ddf --- /dev/null +++ b/airbyte-integrations/connectors/source-pinterest/integration_tests/config_custom_report.json @@ -0,0 +1,18 @@ +{ + "client_id": "1111111", + "client_secret": "XXXX", + "refresh_token": "XXXXX" + "start_date": "2023-01-08", + "custom_reports": [{ + "name": "vadim_report", + "level": "AD_GROUP", + "granularity": "MONTH", + "click_window_days": 30, + "engagement_window_days": 30, + "view_window_days": 30, + "conversion_report_time": "TIME_OF_CONVERSION", + "attribution_types": ["INDIVIDUAL", "HOUSEHOLD"], + "columns": ["ADVERTISER_ID", "AD_ACCOUNT_ID", "AD_GROUP_ID", "CTR", "IMPRESSION_2"], + "start_date": "2023-01-08" + }] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json b/airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json new file mode 100644 index 000000000000..645099b98d0e --- /dev/null +++ b/airbyte-integrations/connectors/source-pinterest/integration_tests/configured_catalog_custom_report.json @@ -0,0 +1,15 @@ +{ + "streams": [ + { + "stream": { + "name": "custom_vadim_report", + "json_schema": {}, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append" + } + ] +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl index dbba2254d507..fb45fd3c024b 100644 --- a/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-pinterest/integration_tests/expected_records.jsonl @@ -20,4 +20,5 @@ {"stream": "ad_group_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "TOTAL_IMPRESSION_FREQUENCY": 1.0, "TOTAL_IMPRESSION_USER": 1.0, "DATE": "2023-10-29"}, "emitted_at": 1699895043538} {"stream": "ad_group_targeting_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "TARGETING_VALUE": "TWOCOLUMN_FEED", "TARGETING_TYPE": "FEED_TYPE", "DATE": "2023-10-29"}, "emitted_at": 1699895106949} {"stream": "pin_promotion_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "AD_ID": "687218400210", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "PIN_ID": 6.66743919837295e+17, "PIN_PROMOTION_ID": 687218400210.0, "TOTAL_IMPRESSION_FREQUENCY": 1.0, "TOTAL_IMPRESSION_USER": 1.0, "DATE": "2023-10-29"}, "emitted_at": 1699895200157} -{"stream": "pin_promotion_targeting_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "AD_ID": "687218400210", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "PIN_ID": 6.66743919837295e+17, "PIN_PROMOTION_ID": 687218400210.0, "TARGETING_VALUE": "Education > Subjects > Science > Applied Science > Technology", "TARGETING_TYPE": "TARGETED_INTEREST", "DATE": "2023-10-29"}, "emitted_at": 1699895289749} \ No newline at end of file +{"stream": "pin_promotion_targeting_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ENTITY_STATUS": "ACTIVE", "AD_GROUP_ID": "2680068678993", "AD_ID": "687218400210", "CAMPAIGN_DAILY_SPEND_CAP": 25000000.0, "CAMPAIGN_ENTITY_STATUS": "ACTIVE", "CAMPAIGN_ID": 626744128982.0, "CAMPAIGN_LIFETIME_SPEND_CAP": 0.0, "CAMPAIGN_NAME": "2021-06-08 09:08 UTC | Brand awareness", "IMPRESSION_2": 1.0, "PIN_ID": 6.66743919837295e+17, "PIN_PROMOTION_ID": 687218400210.0, "TARGETING_VALUE": "Education > Subjects > Science > Applied Science > Technology", "TARGETING_TYPE": "TARGETED_INTEREST", "DATE": "2023-10-29"}, "emitted_at": 1699895289749} +{"stream": "custom_vadim_report", "data": {"ADVERTISER_ID": 549761668032.0, "AD_ACCOUNT_ID": "549761668032", "AD_GROUP_ID": "2680068678993", "IMPRESSION_2": 11.0, "DATE_RANGE": "2023-10-01 - 2023-10-31"}, "emitted_at": 1700158289892} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-pinterest/metadata.yaml b/airbyte-integrations/connectors/source-pinterest/metadata.yaml index 82a76edc0a4b..a0da1eb5a704 100644 --- a/airbyte-integrations/connectors/source-pinterest/metadata.yaml +++ b/airbyte-integrations/connectors/source-pinterest/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: 5cb7e5fe-38c2-11ec-8d3d-0242ac130003 - dockerImageTag: 0.8.0 + dockerImageTag: 0.8.1 dockerRepository: airbyte/source-pinterest connectorBuildOptions: baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py index 98e4809bb296..04e85473ff21 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/reports/reports.py @@ -8,6 +8,7 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional from urllib.parse import urljoin +import airbyte_cdk.sources.utils.casing as casing import backoff import requests from airbyte_cdk.models import SyncMode @@ -260,3 +261,53 @@ class KeywordReport(PinterestAnalyticsTargetingReportStream): @property def level(self): return "KEYWORD" + + +class CustomReport(PinterestAnalyticsTargetingReportStream): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + self._custom_class_name = f"Custom_{self.config['name']}" + self._level = self.config["level"] + self.granularity = self.config["granularity"] + self.click_window_days = self.config["click_window_days"] + self.engagement_window_days = self.config["engagement_window_days"] + self.view_window_days = self.config["view_window_days"] + self.conversion_report_time = self.config["conversion_report_time"] + self.attribution_types = self.config["attribution_types"] + self.columns = self.config["columns"] + + @property + def level(self): + return self._level + + @property + def name(self) -> str: + """We override stream name to let the user change it via configuration.""" + name = self._custom_class_name or self.__class__.__name__ + return casing.camel_to_snake(name) + + def request_body_json(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Optional[Mapping]: + """Return the body of the API request in JSON format.""" + return { + "start_date": stream_slice["start_date"], + "end_date": stream_slice["end_date"], + "level": self.level, + "granularity": self.granularity, + "click_window_days": self.click_window_days, + "engagement_window_days": self.engagement_window_days, + "view_window_days": self.view_window_days, + "conversion_report_time": self.conversion_report_time, + "attribution_types": self.attribution_types, + "columns": self.columns, + } + + @property + def window_in_days(self): + """Docs: https://developers.pinterest.com/docs/api/v5/#operation/analytics/get_report""" + if self.granularity == "HOUR": + return 2 + elif self.level == "PRODUCT_ITEM": + return 31 + else: + return 185 diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py b/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py index b8f9b693a9ab..ea5af593ebf8 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/source.py @@ -3,8 +3,9 @@ # import copy +import logging from base64 import standard_b64encode -from typing import Any, List, Mapping, Tuple +from typing import Any, List, Mapping, Tuple, Type import pendulum import requests @@ -21,6 +22,7 @@ AdvertizerReport, AdvertizerTargetingReport, CampaignTargetingReport, + CustomReport, KeywordReport, PinPromotionReport, PinPromotionTargetingReport, @@ -52,6 +54,8 @@ UserAccountAnalytics, ) +logger = logging.getLogger("airbyte") + class SourcePinterest(AbstractSource): def _validate_and_transform(self, config: Mapping[str, Any], amount_of_days_allowed_for_lookup: int = 89): @@ -60,20 +64,25 @@ def _validate_and_transform(self, config: Mapping[str, Any], amount_of_days_allo latest_date_allowed_by_api = today.subtract(days=amount_of_days_allowed_for_lookup) start_date = config.get("start_date") - if not start_date: - config["start_date"] = latest_date_allowed_by_api - else: + + # transform to datetime + if start_date and isinstance(start_date, str): try: - config["start_date"] = pendulum.from_format(config["start_date"], "YYYY-MM-DD") + config["start_date"] = pendulum.from_format(start_date, "YYYY-MM-DD") except ValueError: - message = "Entered `Start Date` does not match format YYYY-MM-DD" + message = f"Entered `Start Date` {start_date} does not match format YYYY-MM-DD" raise AirbyteTracedException( message=message, internal_message=message, failure_type=FailureType.config_error, ) - if (today - config["start_date"]).days > amount_of_days_allowed_for_lookup: - config["start_date"] = latest_date_allowed_by_api + + if not start_date or config["start_date"] < latest_date_allowed_by_api: + logger.info( + f"Current start_date: {start_date} does not meet API report requirements. Resetting start_date to: {latest_date_allowed_by_api}" + ) + config["start_date"] = latest_date_allowed_by_api + return config @staticmethod @@ -154,4 +163,32 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: ProductGroupTargetingReport(ad_accounts, config=report_config), KeywordReport(ad_accounts, config=report_config), ProductItemReport(ad_accounts, config=report_config), - ] + ] + self.get_custom_report_streams(ad_accounts, config=report_config) + + def get_custom_report_streams(self, parent, config: dict) -> List[Type[Stream]]: + """return custom report streams""" + custom_streams = [] + for report_config in config.get("custom_reports", []): + report_config["authenticator"] = config["authenticator"] + + # https://developers.pinterest.com/docs/api/v5/#operation/analytics/get_report + if report_config.get("granularity") == "HOUR": + # Otherwise: Response Code: 400 {"code":1,"message":"HOURLY request must be less than 3 days"} + amount_of_days_allowed_for_lookup = 2 + elif report_config.get("level") == "PRODUCT_ITEM": + amount_of_days_allowed_for_lookup = 91 + else: + amount_of_days_allowed_for_lookup = 913 + + start_date = report_config.get("start_date") + if not start_date: + report_config["start_date"] = config.get("start_date") + + report_config = self._validate_and_transform(report_config, amount_of_days_allowed_for_lookup) + + stream = CustomReport( + parent=parent, + config=report_config, + ) + custom_streams.append(stream) + return custom_streams diff --git a/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json b/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json index c19b80a65901..ad385a664b48 100644 --- a/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json +++ b/airbyte-integrations/connectors/source-pinterest/source_pinterest/spec.json @@ -60,6 +60,220 @@ } } ] + }, + "custom_reports": { + "title": "Custom Reports", + "description": "A list which contains ad statistics entries, each entry must have a name and can contains fields, breakdowns or action_breakdowns. Click on \"add\" to fill this field.", + "type": "array", + "items": { + "title": "ReportConfig", + "description": "Config for custom report", + "type": "object", + "required": ["name", "level", "granularity", "columns"], + "properties": { + "name": { + "title": "Name", + "description": "The name value of report", + "type": "string", + "order": 0 + }, + "level": { + "title": "Level", + "description": "Chosen level for API", + "default": "ADVERTISER", + "enum": ["ADVERTISER", "ADVERTISER_TARGETING", "CAMPAIGN", "CAMPAIGN_TARGETING", "AD_GROUP", "AD_GROUP_TARGETING", "PIN_PROMOTION", "PIN_PROMOTION_TARGETING", "KEYWORD", "PRODUCT_GROUP", "PRODUCT_GROUP_TARGETING", "PRODUCT_ITEM"], + "type": "string", + "order": 1 + }, + "granularity": { + "title": "Granularity", + "description": "Chosen granularity for API", + "default": "TOTAL", + "enum": ["TOTAL", "DAY", "HOUR", "WEEK", "MONTH"], + "type": "string", + "order": 2 + }, + "columns": { + "title": "Columns", + "description": "A list of chosen columns", + "default": [], + "type": "array", + "order": 3, + "items": { + "title": "ValidEnums", + "description": "An enumeration.", + "enum": [ + "ADVERTISER_ID", + "AD_ACCOUNT_ID", + "AD_GROUP_ENTITY_STATUS", + "AD_GROUP_ID", + "AD_ID", + "CAMPAIGN_DAILY_SPEND_CAP", + "CAMPAIGN_ENTITY_STATUS", + "CAMPAIGN_ID", + "CAMPAIGN_LIFETIME_SPEND_CAP", + "CAMPAIGN_NAME", + "CHECKOUT_ROAS", + "CLICKTHROUGH_1", + "CLICKTHROUGH_1_GROSS", + "CLICKTHROUGH_2", + "CPC_IN_MICRO_DOLLAR", + "CPM_IN_DOLLAR", + "CPM_IN_MICRO_DOLLAR", + "CTR", + "CTR_2", + "ECPCV_IN_DOLLAR", + "ECPCV_P95_IN_DOLLAR", + "ECPC_IN_DOLLAR", + "ECPC_IN_MICRO_DOLLAR", + "ECPE_IN_DOLLAR", + "ECPM_IN_MICRO_DOLLAR", + "ECPV_IN_DOLLAR", + "ECTR", + "EENGAGEMENT_RATE", + "ENGAGEMENT_1", + "ENGAGEMENT_2", + "ENGAGEMENT_RATE", + "IDEA_PIN_PRODUCT_TAG_VISIT_1", + "IDEA_PIN_PRODUCT_TAG_VISIT_2", + "IMPRESSION_1", + "IMPRESSION_1_GROSS", + "IMPRESSION_2", + "INAPP_CHECKOUT_COST_PER_ACTION", + "OUTBOUND_CLICK_1", + "OUTBOUND_CLICK_2", + "PAGE_VISIT_COST_PER_ACTION", + "PAGE_VISIT_ROAS", + "PAID_IMPRESSION", + "PIN_ID", + "PIN_PROMOTION_ID", + "REPIN_1", + "REPIN_2", + "REPIN_RATE", + "SPEND_IN_DOLLAR", + "SPEND_IN_MICRO_DOLLAR", + "TOTAL_CHECKOUT", + "TOTAL_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_CLICKTHROUGH", + "TOTAL_CLICK_ADD_TO_CART", + "TOTAL_CLICK_CHECKOUT", + "TOTAL_CLICK_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_CLICK_LEAD", + "TOTAL_CLICK_SIGNUP", + "TOTAL_CLICK_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_CONVERSIONS", + "TOTAL_CUSTOM", + "TOTAL_ENGAGEMENT", + "TOTAL_ENGAGEMENT_CHECKOUT", + "TOTAL_ENGAGEMENT_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_ENGAGEMENT_LEAD", + "TOTAL_ENGAGEMENT_SIGNUP", + "TOTAL_ENGAGEMENT_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_IDEA_PIN_PRODUCT_TAG_VISIT", + "TOTAL_IMPRESSION_FREQUENCY", + "TOTAL_IMPRESSION_USER", + "TOTAL_LEAD", + "TOTAL_OFFLINE_CHECKOUT", + "TOTAL_PAGE_VISIT", + "TOTAL_REPIN_RATE", + "TOTAL_SIGNUP", + "TOTAL_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_VIDEO_3SEC_VIEWS", + "TOTAL_VIDEO_AVG_WATCHTIME_IN_SECOND", + "TOTAL_VIDEO_MRC_VIEWS", + "TOTAL_VIDEO_P0_COMBINED", + "TOTAL_VIDEO_P100_COMPLETE", + "TOTAL_VIDEO_P25_COMBINED", + "TOTAL_VIDEO_P50_COMBINED", + "TOTAL_VIDEO_P75_COMBINED", + "TOTAL_VIDEO_P95_COMBINED", + "TOTAL_VIEW_ADD_TO_CART", + "TOTAL_VIEW_CHECKOUT", + "TOTAL_VIEW_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_VIEW_LEAD", + "TOTAL_VIEW_SIGNUP", + "TOTAL_VIEW_SIGNUP_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_CHECKOUT", + "TOTAL_WEB_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_CLICK_CHECKOUT", + "TOTAL_WEB_CLICK_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_ENGAGEMENT_CHECKOUT", + "TOTAL_WEB_ENGAGEMENT_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "TOTAL_WEB_SESSIONS", + "TOTAL_WEB_VIEW_CHECKOUT", + "TOTAL_WEB_VIEW_CHECKOUT_VALUE_IN_MICRO_DOLLAR", + "VIDEO_3SEC_VIEWS_2", + "VIDEO_LENGTH", + "VIDEO_MRC_VIEWS_2", + "VIDEO_P0_COMBINED_2", + "VIDEO_P100_COMPLETE_2", + "VIDEO_P25_COMBINED_2", + "VIDEO_P50_COMBINED_2", + "VIDEO_P75_COMBINED_2", + "VIDEO_P95_COMBINED_2", + "WEB_CHECKOUT_COST_PER_ACTION", + "WEB_CHECKOUT_ROAS", + "WEB_SESSIONS_1", + "WEB_SESSIONS_2" + ] + } + }, + "click_window_days": { + "title": "Click window days", + "description": "Number of days to use as the conversion attribution window for a pin click action.", + "default": 30, + "enum": [0, 1, 7, 14, 30, 60], + "type": "integer", + "order": 4 + }, + "engagement_window_days": { + "title": "Engagement window days", + "description": "Number of days to use as the conversion attribution window for an engagement action.", + "default": [30], + "enum": [0, 1, 7, 14, 30, 60], + "type": "integer", + "order": 5 + }, + "view_window_days": { + "title": "View window days", + "description": "Number of days to use as the conversion attribution window for a view action.", + "default": [30], + "enum": [0, 1, 7, 14, 30, 60], + "type": "integer", + "order": 6 + }, + "conversion_report_time": { + "title": "Conversion report time", + "description": "The date by which the conversion metrics returned from this endpoint will be reported. There are two dates associated with a conversion event: the date that the user interacted with the ad, and the date that the user completed a conversion event..", + "default": "TIME_OF_AD_ACTION", + "enum": ["TIME_OF_AD_ACTION", "TIME_OF_CONVERSION"], + "type": "string", + "order": 7 + }, + "attribution_types": { + "title": "Attribution types", + "description": "List of types of attribution for the conversion report", + "default": ["INDIVIDUAL", "HOUSEHOLD"], + "type": "array", + "items": { + "title": "ValidEnums", + "description": "An enumeration.", + "enum": ["INDIVIDUAL", "HOUSEHOLD"] + }, + "order": 8 + }, + "start_date": { + "type": "string", + "title": "Start Date", + "description": "A date in the format YYYY-MM-DD. If you have not set a date, it would be defaulted to latest allowed date by report api (913 days from today).", + "format": "date", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "pattern_descriptor": "YYYY-MM-DD", + "examples": ["2022-07-28"], + "order": 9 + } + } + } } } }, diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py index 8cc1a4f96057..61ba1f1c61f0 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_reports.py @@ -1,12 +1,31 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import copy +import os +from unittest.mock import MagicMock +import pytest import responses from source_pinterest import SourcePinterest +from source_pinterest.reports import CampaignAnalyticsReport +from source_pinterest.reports.reports import ( + AdGroupReport, + AdGroupTargetingReport, + AdvertizerReport, + AdvertizerTargetingReport, + CampaignTargetingReport, + KeywordReport, + PinPromotionReport, + PinPromotionTargetingReport, + ProductGroupReport, + ProductGroupTargetingReport, + ProductItemReport, +) from source_pinterest.utils import get_analytics_columns from unit_tests.test_source import setup_responses +os.environ["REQUEST_CACHE_PATH"] = '/tmp' @responses.activate def test_request_body_json(analytics_report_stream, date_range): @@ -62,3 +81,45 @@ def test_streams(test_config): streams = source.streams(test_config) expected_streams_number = 32 assert len(streams) == expected_streams_number + +@responses.activate +def test_custom_streams(test_config): + config = copy.deepcopy(test_config) + config['custom_reports'] = [{ + "name": "vadim_report", + "level": "AD_GROUP", + "granularity": "MONTH", + "click_window_days": 30, + "engagement_window_days": 30, + "view_window_days": 30, + "conversion_report_time": "TIME_OF_CONVERSION", + "attribution_types": ["INDIVIDUAL", "HOUSEHOLD"], + "columns": ["ADVERTISER_ID", "AD_ACCOUNT_ID", "AD_GROUP_ID", "CTR", "IMPRESSION_2"], + "start_date": "2023-01-08" + }] + setup_responses() + source = SourcePinterest() + streams = source.streams(config) + expected_streams_number = 33 + assert len(streams) == expected_streams_number + +@pytest.mark.parametrize( + "report_name, expected_level", + [ + [CampaignAnalyticsReport, 'CAMPAIGN'], + [CampaignTargetingReport, 'CAMPAIGN_TARGETING'], + [AdvertizerReport, 'ADVERTISER'], + [AdvertizerTargetingReport, 'ADVERTISER_TARGETING'], + [AdGroupReport, 'AD_GROUP'], + [AdGroupTargetingReport, 'AD_GROUP_TARGETING'], + [PinPromotionReport, 'PIN_PROMOTION'], + [PinPromotionTargetingReport, 'PIN_PROMOTION_TARGETING'], + [ProductGroupReport, 'PRODUCT_GROUP'], + [ProductGroupTargetingReport, 'PRODUCT_GROUP_TARGETING'], + [ProductItemReport, 'PRODUCT_ITEM'], + [KeywordReport, 'KEYWORD'] + ], +) +def test_level(test_config, report_name, expected_level): + assert report_name(parent=None, config=MagicMock()).level == expected_level + diff --git a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py index d86620fad40c..2fd50933d8e7 100644 --- a/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-pinterest/unit_tests/test_source.py @@ -36,7 +36,7 @@ def test_check_wrong_date_connection(wrong_date_config): logger_mock = MagicMock() with pytest.raises(AirbyteTracedException) as e: source.check_connection(logger_mock, wrong_date_config) - assert e.value.message == "Entered `Start Date` does not match format YYYY-MM-DD" + assert e.value.message == "Entered `Start Date` wrong_date_format does not match format YYYY-MM-DD" @responses.activate diff --git a/docs/integrations/sources/pinterest.md b/docs/integrations/sources/pinterest.md index 5b8f94e40de1..1aae30167248 100644 --- a/docs/integrations/sources/pinterest.md +++ b/docs/integrations/sources/pinterest.md @@ -86,31 +86,32 @@ The connector is restricted by the Pinterest [requests limitation](https://devel | Version | Date | Pull Request | Subject | |:--------|:-----------| :------------------------------------------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.8.1 | 2023-11-16 | [32601](https://github.com/airbytehq/airbyte/pull/32601) | added ability to create custom reports | | 0.8.0 | 2023-11-16 | [32592](https://github.com/airbytehq/airbyte/pull/32592) | Make start_date optional; add suggested streams; add missing fields | | 0.7.2 | 2023-11-08 | [32299](https://github.com/airbytehq/airbyte/pull/32299) | added default `AvailabilityStrategy`, fixed bug which cases duplicated requests, added new streams: Catalogs, CatalogsFeeds, CatalogsProductGroups, Audiences, Keywords, ConversionTags, CustomerLists, CampaignTargetingReport, AdvertizerReport, AdvertizerTargetingReport, AdGroupReport, AdGroupTargetingReport, PinPromotionReport, PinPromotionTargetingReport, ProductGroupReport, ProductGroupTargetingReport, ProductItemReport, KeywordReport | -| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | -| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | -| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | -| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | -| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | -| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | -| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | -| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | -| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | -| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | -| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | -| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | -| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | -| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | -| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | -| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | -| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | -| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | -| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | -| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | -| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | -| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | -| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | -| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | -| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector | +| 0.7.1 | 2023-11-01 | [32078](https://github.com/airbytehq/airbyte/pull/32078) | handle non json response | +| 0.7.0 | 2023-10-25 | [31876](https://github.com/airbytehq/airbyte/pull/31876) | Migrated to base image, removed token based authentication mthod becuase access_token is valid for 1 day only | +| 0.6.0 | 2023-07-25 | [28672](https://github.com/airbytehq/airbyte/pull/28672) | Add report stream for `CAMPAIGN` level | +| 0.5.3 | 2023-07-05 | [27964](https://github.com/airbytehq/airbyte/pull/27964) | Add `id` field to `owner` field in `ad_accounts` stream | +| 0.5.2 | 2023-06-02 | [26949](https://github.com/airbytehq/airbyte/pull/26949) | Update `BoardPins` stream with `note` property | +| 0.5.1 | 2023-05-11 | [25984](https://github.com/airbytehq/airbyte/pull/25984) | Add pattern for start_date | +| 0.5.0 | 2023-05-17 | [26188](https://github.com/airbytehq/airbyte/pull/26188) | Add `product_tags` field to the `BoardPins` stream | +| 0.4.0 | 2023-05-16 | [26112](https://github.com/airbytehq/airbyte/pull/26112) | Add `is_standard` field to the `BoardPins` stream | +| 0.3.0 | 2023-05-09 | [25915](https://github.com/airbytehq/airbyte/pull/25915) | Add `creative_type` field to the `BoardPins` stream | +| 0.2.6 | 2023-04-26 | [25548](https://github.com/airbytehq/airbyte/pull/25548) | Fix `format` issue for `boards` stream schema for fields with `date-time` | +| 0.2.5 | 2023-04-19 | [00000](https://github.com/airbytehq/airbyte/pull/00000) | Update `AMOUNT_OF_DAYS_ALLOWED_FOR_LOOKUP` to 89 days | +| 0.2.4 | 2023-02-25 | [23457](https://github.com/airbytehq/airbyte/pull/23457) | Add missing columns for analytics streams for pinterest source | +| 0.2.3 | 2023-03-01 | [23649](https://github.com/airbytehq/airbyte/pull/23649) | Fix for `HTTP - 400 Bad Request` when requesting data >= 90 days | +| 0.2.2 | 2023-01-27 | [22020](https://github.com/airbytehq/airbyte/pull/22020) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.2.1 | 2022-12-15 | [20532](https://github.com/airbytehq/airbyte/pull/20532) | Bump CDK version | +| 0.2.0 | 2022-12-13 | [20242](https://github.com/airbytehq/airbyte/pull/20242) | Add data-type normalization up to the schemas declared | +| 0.1.9 | 2022-09-06 | [15074](https://github.com/airbytehq/airbyte/pull/15074) | Add filter based on statuses | +| 0.1.8 | 2022-10-21 | [18285](https://github.com/airbytehq/airbyte/pull/18285) | Fix type of `start_date` | +| 0.1.7 | 2022-09-29 | [17387](https://github.com/airbytehq/airbyte/pull/17387) | Set `start_date` dynamically based on API restrictions. | +| 0.1.6 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Use CDK 0.1.89 | +| 0.1.5 | 2022-09-16 | [16799](https://github.com/airbytehq/airbyte/pull/16799) | Migrate to per-stream state | +| 0.1.4 | 2022-09-06 | [16161](https://github.com/airbytehq/airbyte/pull/16161) | Add ability to handle `429 - Too Many Requests` error with respect to `Max Rate Limit Exceeded Error` | +| 0.1.3 | 2022-09-02 | [16271](https://github.com/airbytehq/airbyte/pull/16271) | Add support of `OAuth2.0` authentication method | +| 0.1.2 | 2021-12-22 | [10223](https://github.com/airbytehq/airbyte/pull/10223) | Fix naming of `AD_ID` and `AD_ACCOUNT_ID` fields | +| 0.1.1 | 2021-12-22 | [9043](https://github.com/airbytehq/airbyte/pull/9043) | Update connector fields title/description | +| 0.1.0 | 2021-10-29 | [7493](https://github.com/airbytehq/airbyte/pull/7493) | Release Pinterest CDK Connector | From f0d68ba895932427b1cc96e23870d38cbc85881d Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants <36314070+artem1205@users.noreply.github.com> Date: Fri, 17 Nov 2023 22:30:22 +0100 Subject: [PATCH 45/57] =?UTF-8?q?=F0=9F=90=9B=20Source=20Amazon=20Seller?= =?UTF-8?q?=20Partner:=20remove=20`max=5Fwait=5Fseconds`=20from=20spec=20(?= =?UTF-8?q?#32462)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integration_tests/spec.json | 200 ------------------ .../metadata.yaml | 2 +- .../source-amazon-seller-partner/setup.py | 2 +- .../source_amazon_seller_partner/source.py | 1 - .../source_amazon_seller_partner/spec.json | 9 - .../source_amazon_seller_partner/streams.py | 15 +- .../unit_tests/test_finance_streams.py | 2 - .../unit_tests/test_order_items_stream.py | 1 - .../test_reports_stream_sales_and_traffic.py | 1 - .../test_reports_streams_settlement_report.py | 1 - .../unit_tests/test_transform_function.py | 1 - .../sources/amazon-seller-partner.md | 1 + 12 files changed, 8 insertions(+), 228 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json b/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json deleted file mode 100644 index 9c8e32370a3e..000000000000 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/integration_tests/spec.json +++ /dev/null @@ -1,200 +0,0 @@ -{ - "documentationUrl": "https://docs.airbyte.com/integrations/sources/amazon-seller-partner", - "changelogUrl": "https://docs.airbyte.com/integrations/sources/amazon-seller-partner", - "connectionSpecification": { - "title": "Amazon Seller Partner Spec", - "type": "object", - "required": [ - "aws_environment", - "region", - "lwa_app_id", - "lwa_client_secret", - "refresh_token", - "replication_start_date" - ], - "additionalProperties": true, - "properties": { - "auth_type": { - "title": "Auth Type", - "const": "oauth2.0", - "order": 0, - "type": "string" - }, - "aws_environment": { - "title": "AWS Environment", - "description": "Select the AWS Environment.", - "enum": ["PRODUCTION", "SANDBOX"], - "default": "PRODUCTION", - "type": "string", - "order": 1 - }, - "region": { - "title": "AWS Region", - "description": "Select the AWS Region.", - "enum": [ - "AE", - "AU", - "BE", - "BR", - "CA", - "DE", - "EG", - "ES", - "FR", - "GB", - "IN", - "IT", - "JP", - "MX", - "NL", - "PL", - "SA", - "SE", - "SG", - "TR", - "UK", - "US" - ], - "default": "US", - "type": "string", - "order": 2 - }, - "aws_access_key": { - "title": "AWS Access Key", - "description": "Specifies the AWS access key used as part of the credentials to authenticate the user.", - "airbyte_secret": true, - "order": 3, - "type": "string" - }, - "aws_secret_key": { - "title": "AWS Secret Access Key", - "description": "Specifies the AWS secret key used as part of the credentials to authenticate the user.", - "airbyte_secret": true, - "order": 4, - "type": "string" - }, - "role_arn": { - "title": "Role ARN", - "description": "Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. (Needs permission to 'Assume Role' STS).", - "airbyte_secret": true, - "order": 5, - "type": "string" - }, - "lwa_app_id": { - "title": "LWA Client Id", - "description": "Your Login with Amazon Client ID.", - "order": 6, - "airbyte_secret": true, - "type": "string" - }, - "lwa_client_secret": { - "title": "LWA Client Secret", - "description": "Your Login with Amazon Client Secret.", - "airbyte_secret": true, - "order": 7, - "type": "string" - }, - "refresh_token": { - "title": "Refresh Token", - "description": "The Refresh Token obtained via OAuth flow authorization.", - "airbyte_secret": true, - "order": 8, - "type": "string" - }, - "replication_start_date": { - "title": "Start Date", - "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", - "examples": ["2017-01-25T00:00:00Z"], - "order": 9, - "type": "string" - }, - "replication_end_date": { - "title": "End Date", - "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data after this date will not be replicated.", - "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$|^$", - "examples": ["2017-01-25T00:00:00Z"], - "order": 10, - "type": "string" - }, - "period_in_days": { - "title": "Period In Days", - "type": "integer", - "description": "Will be used for stream slicing for initial full_refresh sync when no updated state is present for reports that support sliced incremental sync.", - "default": 90, - "order": 11 - }, - "report_options": { - "title": "Report Options", - "description": "Additional information passed to reports. This varies by report type. Must be a valid json string.", - "examples": [ - "{\"GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT\": {\"reportPeriod\": \"WEEK\"}}", - "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" - ], - "order": 12, - "type": "string" - }, - "max_wait_seconds": { - "title": "Max wait time for reports (in seconds)", - "description": "Sometimes report can take up to 30 minutes to generate. This will set the limit for how long to wait for a successful report.", - "default": 500, - "examples": ["500", "1980"], - "order": 13, - "type": "integer" - }, - "advanced_stream_options": { - "title": "Advanced Stream Options", - "description": "Additional information to configure report options. This varies by report type, not every report implement this kind of feature. Must be a valid json string.", - "examples": [ - "{\"GET_SALES_AND_TRAFFIC_REPORT\": {\"availability_sla_days\": 3}}", - "{\"GET_SOME_REPORT\": {\"custom\": \"true\"}}" - ], - "order": 14, - "type": "string" - } - } - }, - "advanced_auth": { - "auth_flow_type": "oauth2.0", - "predicate_key": ["auth_type"], - "predicate_value": "oauth2.0", - "oauth_config_specification": { - "complete_oauth_output_specification": { - "type": "object", - "additionalProperties": false, - "properties": { - "refresh_token": { - "type": "string", - "path_in_connector_config": ["refresh_token"] - } - } - }, - "complete_oauth_server_input_specification": { - "type": "object", - "additionalProperties": false, - "properties": { - "lwa_app_id": { - "type": "string" - }, - "lwa_client_secret": { - "type": "string" - } - } - }, - "complete_oauth_server_output_specification": { - "type": "object", - "additionalProperties": false, - "properties": { - "lwa_app_id": { - "type": "string", - "path_in_connector_config": ["lwa_app_id"] - }, - "lwa_client_secret": { - "type": "string", - "path_in_connector_config": ["lwa_client_secret"] - } - } - } - } - } -} diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml index 3f39e4aae628..80b56d5d014b 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/metadata.yaml @@ -7,7 +7,7 @@ data: connectorSubtype: api connectorType: source definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460 - dockerImageTag: 2.0.1 + dockerImageTag: 2.0.2 dockerRepository: airbyte/source-amazon-seller-partner documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner githubIssueLabel: source-amazon-seller-partner diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py b/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py index af80eec8c453..9b4396a6c472 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/setup.py @@ -5,7 +5,7 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk", "pendulum~=2.1", "pycryptodome~=3.10", "xmltodict~=0.12"] +MAIN_REQUIREMENTS = ["airbyte-cdk", "xmltodict~=0.12"] TEST_REQUIREMENTS = [ "requests-mock~=1.9.3", diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py index 5a565c21ad71..dfe04d11d35f 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/source.py @@ -81,7 +81,6 @@ def _get_stream_kwargs(self, config: Mapping[str, Any]) -> Mapping[str, Any]: "marketplace_id": marketplace_id, "period_in_days": config.get("period_in_days", 90), "report_options": config.get("report_options"), - "max_wait_seconds": config.get("max_wait_seconds", 500), "replication_end_date": config.get("replication_end_date"), "advanced_stream_options": config.get("advanced_stream_options"), } diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json index d64b1ee1d86f..f0f37d084ff1 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/spec.json @@ -125,15 +125,6 @@ "order": 10, "type": "string" }, - "max_wait_seconds": { - "title": "Max wait time for reports (in seconds)", - "description": "Sometimes report can take up to 30 minutes to generate. This will set the limit for how long to wait for a successful report.", - "default": 500, - "examples": ["500", "1980"], - "order": 11, - "minimum": 1, - "type": "integer" - }, "advanced_stream_options": { "title": "Advanced Stream Options", "description": "Additional information to configure report options. This varies by report type, not every report implement this kind of feature. Must be a valid json string.", diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py index 401332714252..ac7ff0485a74 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/source_amazon_seller_partner/streams.py @@ -38,7 +38,6 @@ def __init__( period_in_days: Optional[int], report_options: Optional[str], advanced_stream_options: Optional[str], - max_wait_seconds: Optional[int], replication_end_date: Optional[str], *args, **kwargs, @@ -132,6 +131,7 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late class ReportsAmazonSPStream(HttpStream, ABC): + max_wait_seconds = 3600 """ API docs: https://github.com/amzn/selling-partner-api-docs/blob/main/references/reports-api/reports_2020-09-04.md API model: https://github.com/amzn/selling-partner-api-models/blob/main/models/reports-api-model/reports_2020-09-04.json @@ -163,7 +163,6 @@ def __init__( marketplace_id: str, period_in_days: Optional[int], report_options: Optional[str], - max_wait_seconds: Optional[int], replication_end_date: Optional[str], advanced_stream_options: Optional[str], *args, @@ -176,7 +175,6 @@ def __init__( self.marketplace_id = marketplace_id self.period_in_days = max(period_in_days, self.replication_start_date_limit_in_days) # ensure old configs work as well self._report_options = report_options or "{}" - self.max_wait_seconds = max_wait_seconds self._advanced_stream_options = dict() self._http_method = "GET" if advanced_stream_options is not None: @@ -250,11 +248,11 @@ def _retrieve_report(self, report_id: str) -> Mapping[str, Any]: return report_payload @default_backoff_handler(factor=5, max_tries=5) - def download_and_decompress_report_document(self, url, payload): + def download_and_decompress_report_document(self, payload: dict) -> str: """ Unpacks a report document """ - report = requests.get(url) + report = requests.get(payload.get("url")) report.raise_for_status() if "compressionAlgorithm" in payload: return gzip.decompress(report.content).decode("iso-8859-1") @@ -265,7 +263,7 @@ def parse_response( ) -> Iterable[Mapping]: payload = response.json() - document = self.download_and_decompress_report_document(payload.get("url"), payload) + document = self.download_and_decompress_report_document(payload) document_records = self.parse_document(document) yield from document_records @@ -902,10 +900,7 @@ def parse_response( payload = response.json() - document = self.decompress_report_document( - payload.get("url"), - payload, - ) + document = self.download_and_decompress_report_document(payload) document_records = self.parse_document(document) # Not all (partial) responses include the request date, so adding it manually here diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py index 7621df97a0f2..5e84a2cf47f3 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_finance_streams.py @@ -104,7 +104,6 @@ def _internal(start_date: str = START_DATE_1, end_date: str = END_DATE_1): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream @@ -123,7 +122,6 @@ def _internal(start_date: str = START_DATE_1, end_date: str = END_DATE_1): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py index 0c4a80a1bcf4..7b7d5c016a9a 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_order_items_stream.py @@ -39,7 +39,6 @@ def _internal(): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py index c255aa92eee6..106b5b543785 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_stream_sales_and_traffic.py @@ -18,7 +18,6 @@ def test_stream_uses_advanced_options(): period_in_days=0, report_options=None, advanced_stream_options='{"GET_SALES_AND_TRAFFIC_REPORT":{"availability_sla_days": 3}}', - max_wait_seconds=500, ) assert stream.availability_sla_days == 3 diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py index a1f64cf10b64..77a21ef4bca8 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_reports_streams_settlement_report.py @@ -86,7 +86,6 @@ def _internal(start_date: str = START_DATE_1, end_date: str = END_DATE_1): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=500, ) return stream diff --git a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py index 27d7ca2d3b36..9b2aab17a298 100644 --- a/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py +++ b/airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_transform_function.py @@ -16,7 +16,6 @@ def reports_stream(marketplace_id): period_in_days=0, report_options=None, advanced_stream_options=None, - max_wait_seconds=0, ) return stream diff --git a/docs/integrations/sources/amazon-seller-partner.md b/docs/integrations/sources/amazon-seller-partner.md index 1023cee1379b..cfc3348ab1ba 100644 --- a/docs/integrations/sources/amazon-seller-partner.md +++ b/docs/integrations/sources/amazon-seller-partner.md @@ -143,6 +143,7 @@ So, for any value that exceeds the limit, the `period_in_days` will be automatic | Version | Date | Pull Request | Subject | |:---------|:-----------|:--------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `2.0.2` | 2023-11-17 | [\#32462](https://github.com/airbytehq/airbyte/pull/32462) | Remove Max time option from specification; set default waiting time for reports to 1 hour | | `2.0.1` | 2023-11-16 | [\#32550](https://github.com/airbytehq/airbyte/pull/32550) | Fix the OAuth flow | | `2.0.0` | 2023-11-23 | [\#32355](https://github.com/airbytehq/airbyte/pull/32355) | Remove Brand Analytics from Airbyte Cloud, permanently remove deprecated FBA reports | | `1.6.2` | 2023-11-14 | [\#32508](https://github.com/airbytehq/airbyte/pull/32508) | Do not use AWS signature as it is no longer required by the Amazon API | From e94501b87be88127f000c6206ae309afaba2e963 Mon Sep 17 00:00:00 2001 From: Natalie Kwong <38087517+nataliekwong@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:29:32 -0800 Subject: [PATCH 46/57] Update Cloud Docs (#32539) --- docs/cloud/core-concepts.md | 107 +++---------- .../getting-started-with-airbyte-cloud.md | 146 +++++++----------- .../configuring-connections.md | 19 +-- .../manage-airbyte-cloud-notifications.md | 7 +- .../manage-airbyte-cloud-workspace.md | 4 +- .../manage-connection-state.md | 2 +- .../managing-airbyte-cloud/manage-credits.md | 4 +- .../manage-schema-changes.md | 64 ++++---- .../review-sync-history.md | 35 ++--- .../understand-airbyte-cloud-limits.md | 2 +- docs/integrations/destinations/firestore.md | 31 +++- 11 files changed, 175 insertions(+), 246 deletions(-) diff --git a/docs/cloud/core-concepts.md b/docs/cloud/core-concepts.md index 9383c6ffd036..c3c949599ee8 100644 --- a/docs/cloud/core-concepts.md +++ b/docs/cloud/core-concepts.md @@ -1,6 +1,6 @@ # Core Concepts -Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, what format the data is written to in the destination, and if the data is stored in raw tables format or basic normalized (or JSON) format. +Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, and how the data is written to in the destination. This page describes the concepts you need to know to use Airbyte. @@ -18,49 +18,15 @@ An Airbyte component which pulls data from a source or pushes data to a destinat ## Connection -A connection is an automated data pipeline that replicates data from a source to a destination. - -Setting up a connection involves configuring the following parameters: - - - - - - - - - - - - - - - - - - - - - - - - - - -
Parameter - Description -
Sync schedule - When should a data sync be triggered? -
Destination Namespace and stream names - Where should the replicated data be written? -
Catalog selection - What data should be replicated from the source to the destination? -
Sync mode - How should the streams be replicated (read and written)? -
Optional transformations - How should Airbyte protocol messages (raw JSON blob) data be converted into other data representations? -
+A connection is an automated data pipeline that replicates data from a source to a destination. Setting up a connection enables configuration of the following parameters: +| Concept | Description | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| Replication Frequency | When should a data sync be triggered? | +| Destination Namespace and Stream Prefix | Where should the replicated data be written? | +| Catalog Selection | What data (streams and columns) should be replicated from the source to the destination? | +| Sync Mode | How should the streams be replicated (read and written)? | +| Schema Propagation | How should Airbyte handle schema drift in sources? | ## Stream A stream is a group of related records. @@ -82,49 +48,26 @@ Examples of fields: ## Namespace -Namespace is a group of streams in a source or destination. Common use cases for namespaces are enforcing permissions, segregating test and production data, and general data organization. - -A schema in a relational database system is an example of a namespace. - -In a source, the namespace is the location from where the data is replicated to the destination. - -In a destination, the namespace is the location where the replicated data is stored in the destination. Airbyte supports the following configuration options for destination namespaces: - - - - - - - - - - - - - - - - - - -
Configuration - Description -
Mirror source structure - Some sources (for example, databases) provide namespace information for a stream. If a source provides the namespace information, the destination will reproduce the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. -
Destination default - All streams will be replicated and stored in the default namespace defined on the destination settings page. For settings for popular destinations, see ​​Destination Connector Settings -
Custom format - All streams will be replicated and stored in a user-defined custom format. See Custom format for more details. -
+Namespace is a method of grouping streams in a source or destination. Namespaces are used to generally organize data, segregate tests and production data, and enforce permissions. In a relational database system, this is known as a schema. + +In a source, the namespace is the location from where the data is replicated to the destination. In a destination, the namespace is the location where the replicated data is stored in the destination. + +Airbyte supports the following configuration options for a connection: + + | Destination Namepsace | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| Destination default | All streams will be replicated to the single default namespace defined by the Destination. For more details, see ​​Destination Connector Settings | +| Mirror source structure | Some sources (for example, databases) provide namespace information for a stream. If a source provides namespace information, the destination will mirror the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. | +| Custom format | All streams will be replicated to a single user-defined namespace. See Custom format for more details | ## Connection sync modes A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. -- **Full Refresh | Overwrite:** Sync all records from the source and replace data in destination by overwriting it. -- **Full Refresh | Append:** Sync all records from the source and add them to the destination without deleting any data. -- **Incremental Sync | Append:** Sync new records from the source and add them to the destination without deleting any data. -- **Incremental Sync | Append + Deduped:** Sync new records from the source and add them to the destination. Also provides a de-duplicated view mirroring the state of the stream in the source. +- **Full Refresh | Overwrite:** Sync all records from the source and replace data in destination by overwriting it each time. +- **Full Refresh | Append:** Sync all records from the source and add them to the destination without deleting any data. This creates a historical copy of all records each sync. +- **Incremental Sync | Append:** Sync new records from the source and add them to the destination without deleting any data. This enables efficient historical tracking over time of data. +- **Incremental Sync | Append + Deduped:** Sync new records from the source and add them to the destination. Also provides a de-duplicated view mirroring the state of the stream in the source. This is the most common replication use case. ## Normalization @@ -132,8 +75,6 @@ Normalization is the process of structuring data from the source into a format a Note that normalization is only relevant for the following relational database & warehouse destinations: -- BigQuery -- Snowflake - Redshift - Postgres - Oracle diff --git a/docs/cloud/getting-started-with-airbyte-cloud.md b/docs/cloud/getting-started-with-airbyte-cloud.md index 0c4cc4d284cc..2fecf212572f 100644 --- a/docs/cloud/getting-started-with-airbyte-cloud.md +++ b/docs/cloud/getting-started-with-airbyte-cloud.md @@ -11,7 +11,7 @@ To use Airbyte Cloud: Airbyte Cloud offers a 14-day free trial that begins after your first successful sync. For more information, see [Pricing](https://airbyte.com/pricing). :::note - If you are invited to a workspace, you cannot use your Google login to create a new Airbyte account. + If you are invited to a workspace, you currently cannot use your Google login to create a new Airbyte account. ::: 2. If you signed up using your email address, Airbyte will send you an email with a verification link. On clicking the link, you'll be taken to your new workspace. @@ -28,16 +28,8 @@ A source is an API, file, database, or data warehouse that you want to ingest da To set up a source: -:::note - -Set your [default data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) before creating a new source to ensure your data is processed in the correct region. - -::: - -1. On the Airbyte Cloud dashboard, click **Sources** and then click **+ New source**. -2. On the Set up the source page, select the source you want to set up from the **Source** catalog. - - The fields relevant to your source are displayed. The Setup Guide provides information to help you fill out the fields for your selected source. +1. On the Airbyte Cloud dashboard, click **Sources**. +2. On the Set up the source page, select the source you want to set up from the **Source catalog**. Airbyte currently offers more than 200 source connectors in Cloud to choose from. Once you've selected the source, a Setup Guide will lead you through the authentication and setup of the source. 3. Click **Set up source**. @@ -49,11 +41,8 @@ A destination is a data warehouse, data lake, database, or an analytics tool whe To set up a destination: -1. On the Airbyte Cloud dashboard, click **Destinations** and then click **+ New destination**. -2. On the Set up the destination page, select the destination you want to set up from the **Destination** catalog. - - The fields relevant to your destination are displayed. The Setup Guide provides information to help you fill out the fields for your selected destination. - +1. On the Airbyte Cloud dashboard, click **Destinations**. +2. On the Set up the Destination page, select the destination you want to set up from the **Destination catalog**. Airbyte currently offers more than 38 destination connectors in Cloud to choose from. Once you've selected the destination, a Setup Guide will lead you through the authentication and setup of the source. 3. Click **Set up destination**. ## Set up a connection @@ -64,96 +53,73 @@ A connection is an automated data pipeline that replicates data from a source to Setting up a connection involves configuring the following parameters: -| Parameter | Description | +| Replication Setting | Description | | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| Replication frequency | How often should the data sync? | -| [Data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-the-data-residency-for-a-connection) | Where should the data be processed? | -| Destination Namespace and stream prefix | Where should the replicated data be written? | -| Catalog selection | Which streams and fields should be replicated from the source to the destination? | -| Sync mode | How should the streams be replicated (read and written)? | - -For more information, see [Connections and Sync Modes](../understanding-airbyte/connections/README.md) and [Namespaces](../understanding-airbyte/namespaces.md) - -If you need to use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html): +| [Destination Namespace](../understanding-airbyte/namespaces.md) and stream prefix | Where should the replicated data be written to? | +| Replication Frequency | How often should the data sync? | +| [Data Residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-the-data-residency-for-a-connection) | Where should the data be processed? | +| [Schema Propagation](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes) | Should schema drift be automated? | -1. In the **Replication Frequency** dropdown, click **Cron**. -2. Enter a cron expression and choose a time zone to create a sync schedule. - -:::note - -- Only one sync per connection can run at a time. -- If cron schedules a sync to run before the last one finishes, the scheduled sync will start after the last sync completes. +After configuring the connection settings, you will then define specifically what data will be synced. +:::info +A connection's schema consists of one or many streams. Each stream is most commonly associated with a database table or an API endpoint. Within a stream, there can be one or many fields or columns. ::: +| Catalog Selection | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| Stream Selection | Which streams should be replicated from the source to the destination? | +| Column Selection | Which fields should be included in the sync? | +| [Sync Mode](../understanding-airbyte/connections/README.md) | How should the streams be replicated (read and written)? | + To set up a connection: -:::note +:::tip Set your [default data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) before creating a new connection to ensure your data is processed in the correct region. ::: 1. On the Airbyte Cloud dashboard, click **Connections** and then click **+ New connection**. -2. On the New connection page, select a source: +2. Select a source: - - To use an existing source, select your desired source from the **Sources**. Click the source to use it. - - To set up a new source, select "+ New source". Select a destination from the catalog. The fields relevant to your source are displayed. The Setup Guide provides information to help you fill out the fields for your selected source. Click **Set up source**. + - To use a data source you've already set up with Airbyte, select from the list of existing sources. Click the source to use it. + - To set up a new source, select **Set up a new source** and fill out the fields relevant to your source using the Setup Guide. 3. Select a destination: - - To use an existing destination, select your desired destination from the existing destinations. Click the destination to use it. - - To set up a new destination, select "+ New destination". Select a destination from the catalog. The fields relevant to your destination are displayed. The Setup Guide provides information to help you fill out the fields for your selected destination. Click **Set up destination**. - - The Set up the connection page is displayed. - -4. From the **Replication frequency** dropdown, select how often you want the data to sync from the source to the destination. - - **Note:** The default replication frequency is **Every 24 hours**. - -5. From the **Destination Namespace** dropdown, select the format in which you want to store the data in the destination: - - **Note:** The default configuration is **Mirror source structure**. - - - - - - - - - - - - - - - - - - -
Configuration - Description -
Mirror source structure - Some sources (for example, databases) provide namespace information for a stream. If a source provides the namespace information, the destination will reproduce the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option -
Destination default - All streams will be replicated and stored in the default namespace defined on the Destination Settings page. For more information, see ​​Destination Connector Settings -
Custom format - All streams will be replicated and stored in a custom format. See Custom format for more details -
+ - To use a data source you've already set up with Airbyte, select from the list of existing destinations. Click the destination to use it. + - To set up a new destination, select **Set up a new destination** and fill out the fields relevant to your destination using the Setup Guide. + + Airbyte will scan the schema of the source, and then display the **Connection Configuration** page. + +4. From the **Replication frequency** dropdown, select how often you want the data to sync from the source to the destination. The default replication frequency is **Every 24 hours**. You can also set up [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). + + Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour. + +5. From the **Destination Namespace** dropdown, select the format in which you want to store the data in the destination. Note: The default configuration is **Destination default**. + +| Destination Namepsace | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| Destination default | All streams will be replicated to the single default namespace defined by the Destination. For more details, see ​​Destination Connector Settings | +| Mirror source structure | Some sources (for example, databases) provide namespace information for a stream. If a source provides namespace information, the destination will mirror the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. | +| Custom format | All streams will be replicated to a single user-defined namespace. See Custom format for more details | :::tip -To better understand the destination namespace configurations, see [Destination Namespace example](../understanding-airbyte/namespaces.md#examples) +To ensure your data is synced correctly, see our examples of how to use the [Destination Namespace](../understanding-airbyte/namespaces.md#examples) ::: -6. (Optional) In the **Destination Stream Prefix (Optional)** field, add a prefix to stream names (for example, adding a prefix `airbyte_` renames `projects` to `airbyte_projects`). -7. Activate the streams you want to sync: - - (Optional) If your source has many tables, type the name of the stream you want to enable in the **Search stream name** search box. -8. Configure the sync settings: +6. (Optional) In the **Destination Stream Prefix (Optional)** field, add a prefix to stream names. For example, adding a prefix `airbyte_` renames the stream `projects` to `airbyte_projects`. This is helpful if you are sending multiple connections to the same Destination Namespace to ensure connections do not conflict when writing to the destination. + +7. Select in the **Detect and propagate schema changes** dropdown whether Airbyte should propagate schema changes. See more details about how we handle [schema changes](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes). + + +8. Activate the streams you want to sync by toggling the **Sync** button on. Use the **Search stream name** search box to find streams quickly. If you want to sync all streams, bulk toggle to enable all streams. - 1. Toggle the **Sync** button to enable sync for the stream. - 2. **Source stream name**: The table name in the source - 3. **Sync mode**: Select how you want the data to be replicated from the source to the destination: +9. Configure the stream settings: + 1. **Data Destination**: Where the data will land in the destination + 2. **Stream**: The table name in the source + 3. **Sync mode**: How the data will be replicated from the source to the destination. For the source: @@ -165,23 +131,23 @@ To better understand the destination namespace configurations, see [Destination - Select **Overwrite** to erase the old data and replace it completely - Select **Append** to capture changes to your table **Note:** This creates duplicate records - - Select **Append + Deduped** to mirror your source while keeping records unique + - Select **Append + Deduped** to mirror your source while keeping records unique (most common) **Note:** Some sync modes may not yet be available for the source or destination. 4. **Cursor field**: Used in **Incremental** sync mode to determine which records to sync. Airbyte pre-selects the cursor field for you (example: updated date). If you have multiple cursor fields, select the one you want. 5. **Primary key**: Used in **Append + Deduped** sync mode to determine the unique identifier. - 6. Choose which fields to sync. By default, all fields are synced. + 6. Choose which fields or columns to sync. By default, all fields are synced. 10. Click **Set up connection**. -11. Airbyte tests the connection. If the sync is successful, the Connection page is displayed. +11. Airbyte tests the connectio setup. If the test is successful, Airbyte will save the configuration. If the Replication Frequency uses a preset schedule or CRON, your first sync will immediately begin! -## Verify the connection +## Verify the sync -Verify the sync by checking the logs: +Once the first sync has completed, you can verify the sync has completed by checking in Airbyte Cloud and in your destination. 1. On the Airbyte Cloud dashboard, click **Connections**. The list of connections is displayed. Click on the connection you just set up. -2. The Sync History is displayed. Click on the first log in the sync history to view the log details. +2. The **Job History** tab shows each sync run, along with the sync summary of data and rows moved. You can also manually trigger syncs or view detailed logs for each sync here. 3. Check the data at your destination. If you added a Destination Stream Prefix while setting up the connection, make sure to search for the stream name with the prefix. ## Allowlist IP addresses diff --git a/docs/cloud/managing-airbyte-cloud/configuring-connections.md b/docs/cloud/managing-airbyte-cloud/configuring-connections.md index 49e6fd43bbaf..4e95bac58714 100644 --- a/docs/cloud/managing-airbyte-cloud/configuring-connections.md +++ b/docs/cloud/managing-airbyte-cloud/configuring-connections.md @@ -1,8 +1,8 @@ # Configuring connections -After you have created a connection, you can change how your data syncs to the destination by modifying the [configuration settings](#configure-connection-settings) and the [stream settings](#modify-streams-in-your-connection). +A connection links a source to a destination and defines how your data will sync. After you have created a connection, you can modify any of the [configuration settings](#configure-connection-settings) or [stream settings](#modify-streams-in-your-connection). -## Configure connection settings +## Configure Connection Settings Configuring the connection settings allows you to manage various aspects of the sync, such as how often data syncs and where data is written. @@ -12,9 +12,7 @@ To configure these settings: 2. Click the **Replication** tab. -3. Click the **Configuration** dropdown. - -You can configure the following settings: +3. Click the **Configuration** dropdown to expand the options. :::note @@ -22,12 +20,15 @@ These settings apply to all streams in the connection. ::: +You can configure the following settings: + | Setting | Description | |--------------------------------------|-------------------------------------------------------------------------------------| | Replication frequency | How often the data syncs | | Destination namespace | Where the replicated data is written | | Destination stream prefix | How you identify streams from different connectors | | [Detect and propagate schema changes](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes/#review-non-breaking-schema-changes) | How Airbyte handles syncs when it detects schema changes in the source | +| Connection Data Residency | Where data will be processed | To use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html): @@ -39,13 +40,13 @@ To use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2. * Only one sync per connection can run at a time. * If a sync is scheduled to run before the previous sync finishes, the scheduled sync will start after the completion of the previous sync. -* Reach out to [Sales](https://airbyte.com/company/talk-to-sales) to enable syncs more frequently than once per hour. +* Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour. ::: ## Modify streams in your connection -In the **Activate the streams you want to sync** table, you can choose which streams to sync and how they are loaded to the destination. +In the **Activate the streams you want to sync** table, you choose which streams to sync and how they are loaded to the destination. :::info A connection's schema consists of one or many streams. Each stream is most commonly associated with a database table or an API endpoint. Within a stream, there can be one or many fields or columns. @@ -71,9 +72,9 @@ Source-defined cursors and primary keys are selected automatically and cannot be ::: -3. Click on a stream to display the stream details panel. +3. Click on a stream to display the stream details panel. You'll see each column we detect from the source. -4. Toggle individual fields to include or exclude them in the sync, or use the toggle in the table header to select all fields at once. +4. Toggle individual fields or columns to include or exclude them in the sync, or use the toggle in the table header to select all fields at once. :::info diff --git a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md index 49a663b451c9..160b28d5f47e 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md +++ b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md @@ -10,8 +10,11 @@ This page provides guidance on how to manage notifications for Airbyte Cloud, al | Successful Syncs | A sync from any of your connections succeeds. Note that if sync runs frequently or if there are many syncs in the workspace these types of events can be noisy | Automated Connection Updates | A connection is updated automatically (ex. a source schema is automatically updated) | | Connection Updates Requiring Action | A connection update requires you to take action (ex. a breaking schema change is detected) | -| Sync Disabled Warning | A connection will be disabled soon due to repeated failures. It has failed 50 times consecutively or there were only failed jobs in the past 7 days | -| Sync Disabled | A connection was automatically disabled due to repeated failures. It will be disabled when it has failed 100 times consecutively or has been failing for 14 days in a row | +| Warning - Repeated Failures | A connection will be disabled soon due to repeated failures. It has failed 50 times consecutively or there were only failed jobs in the past 7 days | +| Sync Disabled - Repeated Failures | A connection was automatically disabled due to repeated failures. It will be disabled when it has failed 100 times consecutively or has been failing for 14 days in a row | +| Warning - Upgrade Required (email only) | A new connector version is available and requires manual upgrade | +| Sync Disabled - Upgrade Required (email only) | One or more connections were automatically disabled due to a connector upgrade deadline passing +| ## Configure Notification Settings diff --git a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md index 1db3697191a5..40336d7b9273 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md +++ b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md @@ -80,6 +80,4 @@ To switch between workspaces: 1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click the current workspace name under the Airbyte logo in the navigation bar. -2. Click **View all workspaces**. - -3. Click the name of the workspace you want to switch to. +2. Search for the workspace or click the name of the workspace you want to switch to. diff --git a/docs/cloud/managing-airbyte-cloud/manage-connection-state.md b/docs/cloud/managing-airbyte-cloud/manage-connection-state.md index 929a56834534..321c3753e7b8 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-connection-state.md +++ b/docs/cloud/managing-airbyte-cloud/manage-connection-state.md @@ -11,7 +11,7 @@ To review the connection state: **Connection State** displays. -To edit the connection state: +Editing the connection state allows the sync to start from any date in the past. If the state is edited, Airbyte will start syncing incrementally from the new date. This is helpful if you do not want to fully resync your data. To edit the connection state: :::warning Updates to connection state should be handled with extreme care. Updates may break your syncs, requiring a reset to fix. Make changes only as directed by the Airbyte team. diff --git a/docs/cloud/managing-airbyte-cloud/manage-credits.md b/docs/cloud/managing-airbyte-cloud/manage-credits.md index 040a083e58d5..7ed15c0ed76f 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-credits.md +++ b/docs/cloud/managing-airbyte-cloud/manage-credits.md @@ -18,11 +18,11 @@ To buy credits: Purchase limits: * Minimum: 20 credits - * Maximum: 2,500 credits + * Maximum: 6,000 credits ::: - To buy more credits or a custom plan, reach out to [Sales](https://airbyte.com/talk-to-sales). + To buy more credits or discuss a custom plan, reach out to [Sales](https://airbyte.com/talk-to-sales). 5. Fill out the payment information. diff --git a/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md b/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md index c6113b461c65..1e76e5f6ff58 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md +++ b/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md @@ -2,30 +2,38 @@ You can specify for each connection how Airbyte should handle any change of schema in the source. This process helps ensure accurate and efficient data syncs, minimizing errors and saving you time and effort in managing your data pipelines. -Airbyte checks for any changes in your source schema before syncing, at most once every 24 hours. +Airbyte checks for any changes in your source schema immediately before syncing, at most once every 24 hours. -Based on your configured settings for "Detect and propagate schema changes", Airbyte can automatically sync those changes or ignore them: -* **Propagate all changes** automatically propagates stream changes (additions or deletions) or column changes (additions or deletions) detected in the source -* **Propagate column changes only** automatically propagates column changes detected in the source -* **Ignore** any schema change, in which case the schema you’ve set up will not change even if the source schema changes until you approve the changes manually -* **Pause connection** disables the connection from syncing further once a change is detected +Based on your configured settings for **Detect and propagate schema changes**, Airbyte will automatically sync those changes or ignore them: -When a new column is detected and propagated, values for that column will be filled in for the updated rows. If you are missing values for rows not updated, a backfill can be done by completing a full refresh. +| Setting | Description | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| Propagate all changes | All new tables and column changes from the source will automatically be propagated and reflected in the destination. This includes stream changes (additions or deletions), column changes (additions or deletions) and data type changes +| Propagate column changes only (default) | Only column changes will be propagated +| Ignore | Schema changes will be detected, but not propagated. Syncs will continue running with the schema you've set up. To propagate the detected schema changes, you will need to approve the changes manually | +| Pause Connection | Connections will be automatically disabled as soon as any schema changes are detected | -When a column is deleted, the values for that column will stop updating for the updated rows and be filled with Null values. +When propagation is enabled, your data in the destination will automatically shift to bring in the new changes. -When a new stream is detected and propagated, the first sync will fill all data in as if it is a historical sync. When a stream is deleted from the source, the stream will stop updating, and we leave any existing data in the destination. The rest of the enabled streams will continue syncing. +| Type of Schema Change | Propagation Behavior | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| New Column | The new colummn will be created in the destination. Values for the column will be filled in for the updated rows. If you are missing values for rows not updated, a backfill can be done by completing a full resync. +| Removal of column | The old column will be removed from the destination. +| New stream | The first sync will create the new stream in the destination and fill all data in as if it is a historical sync. | +| Removal of stream | The stream will stop updating, and any existing data in the destination will remain. | +| Column data type changes | The data in the destination will remain the same. Any new or updated rows with incompatible data types will result in a row error in the raw Airbyte tables. You will need to refresh the schema and do a full resync to ensure the data types are consistent. -In all cases, if a breaking change is detected, the connection will be paused for manual review to prevent future syncs from failing. Breaking schema changes occur when: +In all cases, if a breaking schema change is detected, the connection will be paused immediately for manual review to prevent future syncs from failing. Breaking schema changes occur when: * An existing primary key is removed from the source * An existing cursor is removed from the source -See "Fix breaking schema changes" to understand how to resolve these types of changes. +To re-enable the streams, ensure the correct **Primary Key** and **Cursor** are selected for each stream and save the connection. ## Review non-breaking schema changes -To review non-breaking schema changes: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection with non-breaking changes (indicated by a **yellow exclamation mark** icon). +If the connection is set to **Ignore** any schema changes, Airbyte continues syncing according to your last saved schema. You need to manually approve any detected schema changes for the schema in the destination to change. + +1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections**. Select a connection and navigate to the **Replication** tab. If schema changes are detected, you'll see a blue "i" icon next to the Replication ab. 2. Click **Review changes**. @@ -35,41 +43,31 @@ To review non-breaking schema changes: 5. Scroll to the bottom of the page and click **Save changes**. -:::note - - By default, Airbyte ignores non-breaking changes and continues syncing. You can configure how Airbyte handles syncs when it detects non-breaking changes by [editing the stream configuration](https://docs.airbyte.com/cloud/managing-airbyte-cloud/edit-stream-configuration). - -::: - -## Resolve breaking changes +## Resolving breaking changes Breaking changes require your attention to resolve. They may immediately cause the connection to be disabled, or you can upgrade the connector manually within a time period once reviewing the changes. -A connection will automatically be disabled if: -* An existing primary key is removed -* An existing cursor field is removed +A connection will always automatically be disabled if an existing primary key or cursor field is removed. You must review and fix the changes before editing the connection or resuming syncs. -If the breaking change is due to a new version, the connection will alert you of a breaking change but continue to sync until the cutoff date. On the cutoff date, the connection will automatically be disabled on that date to prevent failure or unexpected behavior. These breaking changes include: +Breaking changes can also occur when a new version of the connector is released. In these cases, the connection will alert you of a breaking change but continue to sync until the cutoff date for upgrade. On the cutoff date, the connection will automatically be disabled on that date to prevent failure or unexpected behavior. It is **highly recommended** to upgrade before the cutoff date to ensure you continue syncing without interruption. + +A major version upgrade will include a breaking change if any of these apply: | Type of Change | Description | |------------------|---------------------------------------------------------------------------------------------------------------------| -| Spec Change | The configuration required by users of this connector has been changed and syncs will fail until users reconfigure or re-authenticate. | -| Schema Change | The type of property previously present within a record has changed +| Connector Spec Change | The configuration has been changed and syncs will fail until users reconfigure or re-authenticate. | +| Schema Change | The type of property previously present within a record has changed and a refresh of the source schema is required. | Stream or Property Removal | Data that was previously being synced is no longer going to be synced | -| Destination Format / Normalization Change | The way the destination writes the final data or how normalization cleans that data is changing in a way that requires a full refresh | +| Destination Format / Normalization Change | The way the destination writes the final data or how Airbyte cleans that data is changing in a way that requires a full refresh | | State Changes | The format of the source’s state has changed, and the full dataset will need to be re-synced | To review and fix breaking schema changes: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection with breaking changes (indicated by a **red exclamation mark** icon). +1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection with breaking changes. -2. Review the description of what has changed. The breaking change will require you to upgrade your source or destination to a new version. +2. Review the description of what has changed in the new version. The breaking change will require you to upgrade your source or destination to a new version by a specific cutoff date. 3. Update the source or destination to the new version to continue syncing. -:::note -If a connection’s source schema has breaking changes (an existing cursor or primary key is removed), it will stop syncing immediately. You must review and fix the changes before editing the connection or resuming syncs. -::: - ### Manually refresh the source schema In addition to Airbyte Cloud’s automatic schema change detection, you can manually refresh the source schema to stay up to date with changes in your schema. diff --git a/docs/cloud/managing-airbyte-cloud/review-sync-history.md b/docs/cloud/managing-airbyte-cloud/review-sync-history.md index b5a1f06ba903..0bb5cf2290f5 100644 --- a/docs/cloud/managing-airbyte-cloud/review-sync-history.md +++ b/docs/cloud/managing-airbyte-cloud/review-sync-history.md @@ -2,34 +2,19 @@ The job history displays information about synced data, such as the amount of data moved, the number of records read and committed, and the total sync time. Reviewing this summary can help you monitor the sync performance and identify any potential issues. -To review the sync history: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections**. - -2. Click a connection in the list to view its sync history. Sync History displays the sync status or [reset](https://docs.airbyte.com/operator-guides/reset/) status. The sync status is defined as: +To review the sync history, click a connection in the list to view its sync history. Sync History displays the sync status or [reset](https://docs.airbyte.com/operator-guides/reset/) status. The sync status is defined as: | Status | Description | |---------------------|---------------------------------------------------------------------------------------------------------------------| | Succeeded | 100% of the data has been extracted and loaded to the destination | -| Partially Succeeded | a subset of the data has been loaded to the destination -| Failed | none of the data has been loaded to the destination | -| Cancelled | the sync was cancelled manually before finishing | -| Running | the sync is currently running | - -:::note - -In the event of a failure, Airbyte will make several attempts to sync your data before waiting for the next sync to retry. The latest rules can be read about [here](../../understanding-airbyte/jobs.md#retry-rules). - -::: - -3. To view the full sync log, click the three grey dots next to any sync job. Select "View logs" to open the logs in the browser. - -4. To find a link to the job, click the three grey dots next to any sync job. Select "Copy link to job" to copy the link to your clipboard. - -5. To download a copy of the logs locally, click the three grey dots next to any sync job. Select "Donwload logs". +| Partially Succeeded | A subset of the data has been loaded to the destination +| Failed |Nnone of the data has been loaded to the destination | +| Cancelled | The sync was cancelled manually before finishing | +| Running | The sync is currently running | ## Sync summary -Each sync shows the time the sync was initiated and additional metadata. +Each sync shows the time the sync was initiated and additional metadata. This information can help in understanding sync performance over time. | Data | Description | |------------------------------------------|--------------------------------------------------------------------------------------| @@ -38,3 +23,11 @@ Each sync shows the time the sync was initiated and additional metadata. | x loaded records | Number of records the destination confirmed it received. | | xh xm xs | Total time (hours, minutes, seconds) for the sync to complete | + +:::note + +In the event of a failure, Airbyte will make several attempts to sync your data before waiting for the next sync to retry. The latest rules can be read about [here](../../understanding-airbyte/jobs.md#retry-rules). + +::: + +On this page, you can also view the complete logs and find any relevant errors, find a link to the job to share with Support, or download a copy of the logs locally. \ No newline at end of file diff --git a/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md b/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md index bbc2211fd2e6..9d8a429eab9e 100644 --- a/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md +++ b/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md @@ -10,7 +10,7 @@ Understanding the following limitations will help you more effectively manage Ai * Max number of streams that can be returned by a source in a discover call: 1K * Max number of streams that can be configured to sync in a single connection: 1K * Size of a single record: 20MB -* Shortest sync schedule: Every 60 min +* Shortest sync schedule: Every 60 min (Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour) * Schedule accuracy: +/- 30 min *Limits on workspaces, sources, and destinations do not apply to customers of [Powered by Airbyte](https://airbyte.com/solutions/powered-by-airbyte). To learn more [contact us](https://airbyte.com/talk-to-sales)! diff --git a/docs/integrations/destinations/firestore.md b/docs/integrations/destinations/firestore.md index c82a9f12068e..94a6002a70c4 100644 --- a/docs/integrations/destinations/firestore.md +++ b/docs/integrations/destinations/firestore.md @@ -1,6 +1,35 @@ # Firestore -The Firestore destination for Airbyte +This destination writes data to Google Firestore. + +Google Firestore, officially known as Cloud Firestore, is a flexible, scalable database for mobile, web, and server development from Firebase and Google Cloud. It is commonly used for developing applications as a NoSQL database that provides real-time data syncing across user devices. + +## Getting started + +### Requiremnets + +- An existing GCP project +- A role with permissions to create a Service Account Key in GCP + +### Step 1: Create a Service Account +1. Log in to the Google Cloud Console. Select the project where your Firestore database is located. +2. Navigate to "IAM & Admin" and select "Service Accounts". Create a Service Account and assign appropriate roles. Ensure “Cloud Datastore User” or “Firebase Rules System” are enabled. +3. Navigate to the service account and generate the JSON key. Download and copy the contents to the configuration. + +## Sync overview + +### Output schema + +Each stream will be output into a BigQuery table. + +#### Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :----------------------------- | :------------------- | :---- | +| Full Refresh Sync | ✅ | | +| Incremental - Append Sync | ✅ | | +| Incremental - Append + Deduped | ✅ | | +| Namespaces | ✅ | | ## Changelog From 9dae0d3c2102a65ce258e3c7215b4da671d46d41 Mon Sep 17 00:00:00 2001 From: Natalie Kwong <38087517+nataliekwong@users.noreply.github.com> Date: Fri, 17 Nov 2023 14:29:53 -0800 Subject: [PATCH 47/57] Reinstate Pardot source connector doc (#32579) --- docs/integrations/sources/pardot.md | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/docs/integrations/sources/pardot.md b/docs/integrations/sources/pardot.md index f8f304797a39..c4304a8abe0e 100644 --- a/docs/integrations/sources/pardot.md +++ b/docs/integrations/sources/pardot.md @@ -1,7 +1,62 @@ # Pardot +## Overview + The Airbyte Source for [Salesforce Pardot](https://www.pardot.com/) +The Pardot supports full refresh syncs + +### Output schema + +Several output streams are available from this source: + +* [Campaigns](https://developer.salesforce.com/docs/marketing/pardot/guide/campaigns-v4.html) +* [EmailClicks](https://developer.salesforce.com/docs/marketing/pardot/guide/batch-email-clicks-v4.html) +* [ListMembership](https://developer.salesforce.com/docs/marketing/pardot/guide/list-memberships-v4.html) +* [Lists](https://developer.salesforce.com/docs/marketing/pardot/guide/lists-v4.html) +* [ProspectAccounts](https://developer.salesforce.com/docs/marketing/pardot/guide/prospect-accounts-v4.html) +* [Prospects](https://developer.salesforce.com/docs/marketing/pardot/guide/prospects-v4.html) +* [Users](https://developer.salesforce.com/docs/marketing/pardot/guide/users-v4.html) +* [VisitorActivities](https://developer.salesforce.com/docs/marketing/pardot/guide/visitor-activities-v4.html) +* [Visitors](https://developer.salesforce.com/docs/marketing/pardot/guide/visitors-v4.html) +* [Visits](https://developer.salesforce.com/docs/marketing/pardot/guide/visits-v4.html) + +If there are more endpoints you'd like Airbyte to support, please [create an issue.](https://github.com/airbytehq/airbyte/issues/new/choose) + +### Features + +| Feature | Supported? | +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental Sync | No | +| SSL connection | No | +| Namespaces | No | + +### Performance considerations + +The Pardot connector should not run into Pardot API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* Pardot Account +* Pardot Business Unit ID +* Client ID +* Client Secret +* Refresh Token +* Start Date +* Is Sandbox environment? + +### Setup guide + +* `pardot_business_unit_id`: Pardot Business ID, can be found at Setup > Pardot > Pardot Account Setup +* `client_id`: The Consumer Key that can be found when viewing your app in Salesforce +* `client_secret`: The Consumer Secret that can be found when viewing your app in Salesforce +* `refresh_token`: Salesforce Refresh Token used for Airbyte to access your Salesforce account. If you don't know what this is, follow [this guide](https://medium.com/@bpmmendis94/obtain-access-refresh-tokens-from-salesforce-rest-api-a324fe4ccd9b) to retrieve it. +* `start_date`: UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated. Leave blank to skip this filter +* `is_sandbox`: Whether or not the the app is in a Salesforce sandbox. If you do not know what this, assume it is false. + ## Changelog | Version | Date | Pull Request | Subject | From a41d11b4a024c9715cf4387802bc88c5c918453c Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk <35109939+tolik0@users.noreply.github.com> Date: Tue, 21 Nov 2023 13:10:16 +0200 Subject: [PATCH 48/57] =?UTF-8?q?=F0=9F=90=9B=20Source=20S3:=20Fix=20disco?= =?UTF-8?q?very=20for=20zip=20file=20(#32677)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alexandre Girard --- airbyte-integrations/connectors/source-s3/metadata.yaml | 2 +- .../connectors/source-s3/source_s3/v4/stream_reader.py | 2 +- docs/integrations/sources/s3.md | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 27c20f59f30f..4c99fb3cc67a 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.2.1 + dockerImageTag: 4.2.2 dockerRepository: airbyte/source-s3 documentationUrl: https://docs.airbyte.com/integrations/sources/s3 githubIssueLabel: source-s3 diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index 01f67fad69b2..d8bfbd5b16bc 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -157,7 +157,7 @@ def _page( break def _handle_file(self, file): - if file["Key"].endswith("zip"): + if file["Key"].endswith(".zip"): yield from self._handle_zip_file(file) else: yield self._handle_regular_file(file) diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index bc5652a0c934..a0e2e8ec9d70 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -256,7 +256,8 @@ To perform the text extraction from PDF and Docx files, the connector uses the [ | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| -| 4.2.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema | +| 4.2.2 | 2023-11-20 | [32677](https://github.com/airbytehq/airbyte/pull/32677) | Only read files with ".zip" extension as zipped files | +| 4.2.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema | | 4.2.0 | 2023-11-02 | [32109](https://github.com/airbytehq/airbyte/pull/32109) | Fix docs; add HTTPS validation for S3 endpoint; fix coverage | | 4.1.4 | 2023-10-30 | [31904](https://github.com/airbytehq/airbyte/pull/31904) | Update CDK | | 4.1.3 | 2023-10-25 | [31654](https://github.com/airbytehq/airbyte/pull/31654) | Reduce image size | From 6fce769ce3a5e131b04ec60e4a9c5de5330b2806 Mon Sep 17 00:00:00 2001 From: Daryna Ishchenko <80129833+darynaishchenko@users.noreply.github.com> Date: Thu, 23 Nov 2023 15:06:38 +0200 Subject: [PATCH 49/57] :bug: Source Github: return AirbyteMessage if max retry exeeded for 202 status code (#32679) Co-authored-by: darynaishchenko --- .../integration_tests/expected_records.jsonl | 10 ++-- .../connectors/source-github/metadata.yaml | 2 +- .../source-github/source_github/streams.py | 12 +++-- .../source-github/unit_tests/test_stream.py | 49 +++++++++++++++---- docs/integrations/sources/github.md | 3 +- 5 files changed, 56 insertions(+), 20 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl index 7ccbbee8037c..9b65df5c424e 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl @@ -21,10 +21,10 @@ {"stream":"projects_v2","data":{"closed":false,"created_at":"2023-09-25T18:34:52Z","closed_at":null,"updated_at":"2023-09-25T18:35:45Z","creator":{"avatarUrl":"https://avatars.githubusercontent.com/u/92915184?u=e53c87d81ec6fb0596bc0f75e12e84e8f0df8d83&v=4","login":"airbyteio","resourcePath":"/airbyteio","url":"https://github.com/airbyteio"},"node_id":"PVT_kwDOA4_XW84AV7NS","id":5747538,"number":58,"public":false,"readme":"# Title\nintegration test project","short_description":"integration test project description","template":false,"title":"integration test project","url":"https://github.com/orgs/airbytehq/projects/58","viewerCanClose":true,"viewerCanReopen":true,"viewerCanUpdate":true,"owner_id":"MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3","repository":"airbytehq/integration-test"},"emitted_at":1695666959656} {"stream":"pull_request_comment_reactions","data":{"node_id":"MDMyOlB1bGxSZXF1ZXN0UmV2aWV3Q29tbWVudFJlYWN0aW9uMTI3MDUxNDM4","id":127051438,"content":"HEART","created_at":"2021-09-06T11:37:25Z","user":{"node_id":"MDQ6VXNlcjM0MTAzMTI1","id":34103125,"login":"yevhenii-ldv","avatar_url":"https://avatars.githubusercontent.com/u/34103125?u=3e49bb73177a9f70896e3d49b34656ab659c70a5&v=4","html_url":"https://github.com/yevhenii-ldv","site_admin":false,"type":"User"},"repository":"airbytehq/integration-test","comment_id":699253726},"emitted_at":1677668755106} {"stream":"pull_request_commits","data":{"sha":"00a74695eb754865a552196ee158a87f0b9dcff7","node_id":"MDY6Q29tbWl0NDAwMDUyMjEzOjAwYTc0Njk1ZWI3NTQ4NjVhNTUyMTk2ZWUxNThhODdmMGI5ZGNmZjc=","commit":{"author":{"name":"Arthur Galuza","email":"a.galuza@exaft.com","date":"2021-08-27T15:41:11Z"},"committer":{"name":"Arthur Galuza","email":"a.galuza@exaft.com","date":"2021-08-27T15:41:11Z"},"message":"commit number 0","tree":{"sha":"3f2a52f90f9acc30359b00065e5b989267fef1f5","url":"https://api.github.com/repos/airbytehq/integration-test/git/trees/3f2a52f90f9acc30359b00065e5b989267fef1f5"},"url":"https://api.github.com/repos/airbytehq/integration-test/git/commits/00a74695eb754865a552196ee158a87f0b9dcff7","comment_count":0,"verification":{"verified":false,"reason":"unsigned","signature":null,"payload":null}},"url":"https://api.github.com/repos/airbytehq/integration-test/commits/00a74695eb754865a552196ee158a87f0b9dcff7","html_url":"https://github.com/airbytehq/integration-test/commit/00a74695eb754865a552196ee158a87f0b9dcff7","comments_url":"https://api.github.com/repos/airbytehq/integration-test/commits/00a74695eb754865a552196ee158a87f0b9dcff7/comments","author":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"committer":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"parents":[{"sha":"978753aeb56f7b49872279d1b491411a6235aa90","url":"https://api.github.com/repos/airbytehq/integration-test/commits/978753aeb56f7b49872279d1b491411a6235aa90","html_url":"https://github.com/airbytehq/integration-test/commit/978753aeb56f7b49872279d1b491411a6235aa90"}],"repository":"airbytehq/integration-test","pull_number":5},"emitted_at":1677668756160} -{"stream":"pull_request_stats","data":{"node_id":"MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2","id":721435506,"number":5,"updated_at":"2021-08-27T15:53:14Z","changed_files":5,"deletions":0,"additions":5,"merged":false,"mergeable":"MERGEABLE","can_be_rebased":true,"maintainer_can_modify":false,"merge_state_status":"BLOCKED","comments":0,"commits":5,"review_comments":0,"merged_by":null,"repository":"airbytehq/integration-test"},"emitted_at":1677668759962} -{"stream": "pull_requests", "data": {"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5", "id": 721435506, "node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "html_url": "https://github.com/airbytehq/integration-test/pull/5", "diff_url": "https://github.com/airbytehq/integration-test/pull/5.diff", "patch_url": "https://github.com/airbytehq/integration-test/pull/5.patch", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5", "number": 5, "state": "open", "locked": false, "title": "New PR from feature/branch_4", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "body": null, "created_at": "2021-08-27T15:43:40Z", "updated_at": "2021-08-27T15:53:14Z", "closed_at": null, "merged_at": null, "merge_commit_sha": "191309e3da8b36705156348ae73f4dca836533f9", "assignee": null, "assignees": [], "requested_reviewers": [], "requested_teams": [], "labels": [{"id": 3295756566, "node_id": "MDU6TGFiZWwzMjk1NzU2NTY2", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/bug", "name": "bug", "color": "d73a4a", "default": true, "description": "Something isn't working"}, {"id": 3300346197, "node_id": "MDU6TGFiZWwzMzAwMzQ2MTk3", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/critical", "name": "critical", "color": "ededed", "default": false, "description": null}], "milestone": null, "draft": false, "commits_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits", "review_comments_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments", "review_comment_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "head": {"label": "airbytehq:feature/branch_4", "ref": "feature/branch_4", "sha": "31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo_id": 400052213}, "base": {"label": "airbytehq:master", "ref": "master", "sha": "978753aeb56f7b49872279d1b491411a6235aa90", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments", "created_at": "2021-08-26T05:32:43Z", "updated_at": "2022-07-08T01:27:13Z", "pushed_at": "2023-05-03T16:40:56Z", "git_url": "git://github.com/airbytehq/integration-test.git", "ssh_url": "git@github.com:airbytehq/integration-test.git", "clone_url": "https://github.com/airbytehq/integration-test.git", "svn_url": "https://github.com/airbytehq/integration-test", "homepage": null, "size": 11, "stargazers_count": 4, "watchers_count": 4, "language": null, "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": true, "has_pages": false, "has_discussions": false, "forks_count": 2, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 10, "license": null, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [], "visibility": "public", "forks": 2, "open_issues": 10, "watchers": 4, "default_branch": "master"}, "repo_id": null}, "_links": {"self": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5"}, "html": {"href": "https://github.com/airbytehq/integration-test/pull/5"}, "issue": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5"}, "comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments"}, "review_comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments"}, "review_comment": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}"}, "commits": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits"}, "statuses": {"href": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083"}}, "author_association": "CONTRIBUTOR", "auto_merge": null, "active_lock_reason": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1685698519242} +{"stream": "pull_request_stats", "data": {"node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "id": 721435506, "number": 5, "updated_at": "2023-11-16T14:38:58Z", "changed_files": 5, "deletions": 0, "additions": 5, "merged": false, "mergeable": "MERGEABLE", "can_be_rebased": false, "maintainer_can_modify": false, "merge_state_status": "BLOCKED", "comments": 0, "commits": 5, "review_comments": 0, "merged_by": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1700557306144} +{"stream": "pull_requests", "data": {"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5", "id": 721435506, "node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "html_url": "https://github.com/airbytehq/integration-test/pull/5", "diff_url": "https://github.com/airbytehq/integration-test/pull/5.diff", "patch_url": "https://github.com/airbytehq/integration-test/pull/5.patch", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5", "number": 5, "state": "closed", "locked": false, "title": "New PR from feature/branch_4", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "body": null, "created_at": "2021-08-27T15:43:40Z", "updated_at": "2023-11-16T14:38:58Z", "closed_at": "2023-11-16T14:38:58Z", "merged_at": null, "merge_commit_sha": "191309e3da8b36705156348ae73f4dca836533f9", "assignee": null, "assignees": [], "requested_reviewers": [], "requested_teams": [], "labels": [{"id": 3295756566, "node_id": "MDU6TGFiZWwzMjk1NzU2NTY2", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/bug", "name": "bug", "color": "d73a4a", "default": true, "description": "Something isn't working"}, {"id": 3300346197, "node_id": "MDU6TGFiZWwzMzAwMzQ2MTk3", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/critical", "name": "critical", "color": "ededed", "default": false, "description": null}], "milestone": null, "draft": false, "commits_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits", "review_comments_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments", "review_comment_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "head": {"label": "airbytehq:feature/branch_4", "ref": "feature/branch_4", "sha": "31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo_id": 400052213}, "base": {"label": "airbytehq:master", "ref": "master", "sha": "978753aeb56f7b49872279d1b491411a6235aa90", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments", "created_at": "2021-08-26T05:32:43Z", "updated_at": "2023-11-16T14:48:53Z", "pushed_at": "2023-05-03T16:40:56Z", "git_url": "git://github.com/airbytehq/integration-test.git", "ssh_url": "git@github.com:airbytehq/integration-test.git", "clone_url": "https://github.com/airbytehq/integration-test.git", "svn_url": "https://github.com/airbytehq/integration-test", "homepage": null, "size": 11, "stargazers_count": 4, "watchers_count": 4, "language": null, "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": true, "has_pages": false, "has_discussions": false, "forks_count": 2, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 6, "license": null, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [], "visibility": "public", "forks": 2, "open_issues": 6, "watchers": 4, "default_branch": "master"}, "repo_id": null}, "_links": {"self": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5"}, "html": {"href": "https://github.com/airbytehq/integration-test/pull/5"}, "issue": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5"}, "comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments"}, "review_comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments"}, "review_comment": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}"}, "commits": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits"}, "statuses": {"href": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083"}}, "author_association": "CONTRIBUTOR", "auto_merge": null, "active_lock_reason": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1700585060024} {"stream":"releases","data":{"url":"https://api.github.com/repos/airbytehq/integration-test/releases/48581586","assets_url":"https://api.github.com/repos/airbytehq/integration-test/releases/48581586/assets","upload_url":"https://uploads.github.com/repos/airbytehq/integration-test/releases/48581586/assets{?name,label}","html_url":"https://github.com/airbytehq/integration-test/releases/tag/dev-0.9","id":48581586,"author":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"node_id":"MDc6UmVsZWFzZTQ4NTgxNTg2","tag_name":"dev-0.9","target_commitish":"master","name":"9 global release","draft":false,"prerelease":false,"created_at":"2021-08-27T07:03:09Z","published_at":"2021-08-27T15:43:53Z","assets":[],"tarball_url":"https://api.github.com/repos/airbytehq/integration-test/tarball/dev-0.9","zipball_url":"https://api.github.com/repos/airbytehq/integration-test/zipball/dev-0.9","body":"","repository":"airbytehq/integration-test"},"emitted_at":1677668760424} -{"stream":"repositories","data":{"id":283046497,"node_id":"MDEwOlJlcG9zaXRvcnkyODMwNDY0OTc=","name":"airbyte","full_name":"airbytehq/airbyte","private":false,"owner":{"login":"airbytehq","id":59758427,"node_id":"MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3","avatar_url":"https://avatars.githubusercontent.com/u/59758427?v=4","gravatar_id":"","url":"https://api.github.com/users/airbytehq","html_url":"https://github.com/airbytehq","followers_url":"https://api.github.com/users/airbytehq/followers","following_url":"https://api.github.com/users/airbytehq/following{/other_user}","gists_url":"https://api.github.com/users/airbytehq/gists{/gist_id}","starred_url":"https://api.github.com/users/airbytehq/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/airbytehq/subscriptions","organizations_url":"https://api.github.com/users/airbytehq/orgs","repos_url":"https://api.github.com/users/airbytehq/repos","events_url":"https://api.github.com/users/airbytehq/events{/privacy}","received_events_url":"https://api.github.com/users/airbytehq/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/airbytehq/airbyte","description":"Data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes.","fork":false,"url":"https://api.github.com/repos/airbytehq/airbyte","forks_url":"https://api.github.com/repos/airbytehq/airbyte/forks","keys_url":"https://api.github.com/repos/airbytehq/airbyte/keys{/key_id}","collaborators_url":"https://api.github.com/repos/airbytehq/airbyte/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/airbytehq/airbyte/teams","hooks_url":"https://api.github.com/repos/airbytehq/airbyte/hooks","issue_events_url":"https://api.github.com/repos/airbytehq/airbyte/issues/events{/number}","events_url":"https://api.github.com/repos/airbytehq/airbyte/events","assignees_url":"https://api.github.com/repos/airbytehq/airbyte/assignees{/user}","branches_url":"https://api.github.com/repos/airbytehq/airbyte/branches{/branch}","tags_url":"https://api.github.com/repos/airbytehq/airbyte/tags","blobs_url":"https://api.github.com/repos/airbytehq/airbyte/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/airbytehq/airbyte/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/airbytehq/airbyte/git/refs{/sha}","trees_url":"https://api.github.com/repos/airbytehq/airbyte/git/trees{/sha}","statuses_url":"https://api.github.com/repos/airbytehq/airbyte/statuses/{sha}","languages_url":"https://api.github.com/repos/airbytehq/airbyte/languages","stargazers_url":"https://api.github.com/repos/airbytehq/airbyte/stargazers","contributors_url":"https://api.github.com/repos/airbytehq/airbyte/contributors","subscribers_url":"https://api.github.com/repos/airbytehq/airbyte/subscribers","subscription_url":"https://api.github.com/repos/airbytehq/airbyte/subscription","commits_url":"https://api.github.com/repos/airbytehq/airbyte/commits{/sha}","git_commits_url":"https://api.github.com/repos/airbytehq/airbyte/git/commits{/sha}","comments_url":"https://api.github.com/repos/airbytehq/airbyte/comments{/number}","issue_comment_url":"https://api.github.com/repos/airbytehq/airbyte/issues/comments{/number}","contents_url":"https://api.github.com/repos/airbytehq/airbyte/contents/{+path}","compare_url":"https://api.github.com/repos/airbytehq/airbyte/compare/{base}...{head}","merges_url":"https://api.github.com/repos/airbytehq/airbyte/merges","archive_url":"https://api.github.com/repos/airbytehq/airbyte/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/airbytehq/airbyte/downloads","issues_url":"https://api.github.com/repos/airbytehq/airbyte/issues{/number}","pulls_url":"https://api.github.com/repos/airbytehq/airbyte/pulls{/number}","milestones_url":"https://api.github.com/repos/airbytehq/airbyte/milestones{/number}","notifications_url":"https://api.github.com/repos/airbytehq/airbyte/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/airbytehq/airbyte/labels{/name}","releases_url":"https://api.github.com/repos/airbytehq/airbyte/releases{/id}","deployments_url":"https://api.github.com/repos/airbytehq/airbyte/deployments","created_at":"2020-07-27T23:55:54Z","updated_at":"2023-09-22T09:10:42Z","pushed_at":"2023-09-22T09:17:52Z","git_url":"git://github.com/airbytehq/airbyte.git","ssh_url":"git@github.com:airbytehq/airbyte.git","clone_url":"https://github.com/airbytehq/airbyte.git","svn_url":"https://github.com/airbytehq/airbyte","homepage":"https://airbyte.com","size":396556,"stargazers_count":11806,"watchers_count":11806,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":false,"has_pages":false,"has_discussions":true,"forks_count":3071,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":5029,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":["airbyte","bigquery","change-data-capture","data","data-analysis","data-collection","data-engineering","data-ingestion","data-integration","elt","etl","java","pipeline","python","redshift","snowflake"],"visibility":"public","forks":3071,"open_issues":5029,"watchers":11806,"default_branch":"master","permissions":{"admin":true,"maintain":true,"push":true,"triage":true,"pull":true},"security_and_analysis":{"secret_scanning":{"status":"disabled"},"secret_scanning_push_protection":{"status":"disabled"},"dependabot_security_updates":{"status":"enabled"}},"organization":"airbytehq"},"emitted_at":1695374353086} +{"stream": "repositories", "data": {"id": 283046497, "node_id": "MDEwOlJlcG9zaXRvcnkyODMwNDY0OTc=", "name": "airbyte", "full_name": "airbytehq/airbyte", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/airbyte", "description": "Data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes.", "fork": false, "url": "https://api.github.com/repos/airbytehq/airbyte", "forks_url": "https://api.github.com/repos/airbytehq/airbyte/forks", "keys_url": "https://api.github.com/repos/airbytehq/airbyte/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/airbyte/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/airbyte/teams", "hooks_url": "https://api.github.com/repos/airbytehq/airbyte/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/airbyte/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/airbyte/events", "assignees_url": "https://api.github.com/repos/airbytehq/airbyte/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/airbyte/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/airbyte/tags", "blobs_url": "https://api.github.com/repos/airbytehq/airbyte/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/airbyte/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/airbyte/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/airbyte/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/airbyte/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/airbyte/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/airbyte/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/airbyte/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/airbyte/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/airbyte/subscription", "commits_url": "https://api.github.com/repos/airbytehq/airbyte/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/airbyte/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/airbyte/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/airbyte/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/airbyte/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/airbyte/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/airbyte/merges", "archive_url": "https://api.github.com/repos/airbytehq/airbyte/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/airbyte/downloads", "issues_url": "https://api.github.com/repos/airbytehq/airbyte/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/airbyte/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/airbyte/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/airbyte/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/airbyte/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/airbyte/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/airbyte/deployments", "created_at": "2020-07-27T23:55:54Z", "updated_at": "2023-11-21T14:55:05Z", "pushed_at": "2023-11-21T16:55:37Z", "git_url": "git://github.com/airbytehq/airbyte.git", "ssh_url": "git@github.com:airbytehq/airbyte.git", "clone_url": "https://github.com/airbytehq/airbyte.git", "svn_url": "https://github.com/airbytehq/airbyte", "homepage": "https://airbyte.com", "size": 455477, "stargazers_count": 12328, "watchers_count": 12328, "language": "Python", "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": false, "has_pages": false, "has_discussions": true, "forks_count": 3226, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 5053, "license": {"key": "other", "name": "Other", "spdx_id": "NOASSERTION", "url": null, "node_id": "MDc6TGljZW5zZTA="}, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": ["airbyte", "bigquery", "change-data-capture", "data", "data-analysis", "data-collection", "data-engineering", "data-ingestion", "data-integration", "elt", "etl", "java", "pipeline", "python", "redshift", "snowflake"], "visibility": "public", "forks": 3226, "open_issues": 5053, "watchers": 12328, "default_branch": "master", "permissions": {"admin": true, "maintain": true, "push": true, "triage": true, "pull": true}, "security_and_analysis": {"secret_scanning": {"status": "disabled"}, "secret_scanning_push_protection": {"status": "disabled"}, "dependabot_security_updates": {"status": "enabled"}, "secret_scanning_validity_checks": {"status": "disabled"}}, "organization": "airbytehq"}, "emitted_at": 1700585836592} {"stream":"review_comments","data":{"url":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726","pull_request_review_id":742633128,"id":699253726,"node_id":"MDI0OlB1bGxSZXF1ZXN0UmV2aWV3Q29tbWVudDY5OTI1MzcyNg==","diff_hunk":"@@ -0,0 +1 @@\n+text_for_file_","path":"github_sources/file_1.txt","commit_id":"da5fa314f9b3a272d0aa47a453aec0f68a80cbae","original_commit_id":"da5fa314f9b3a272d0aa47a453aec0f68a80cbae","user":{"login":"yevhenii-ldv","id":34103125,"node_id":"MDQ6VXNlcjM0MTAzMTI1","avatar_url":"https://avatars.githubusercontent.com/u/34103125?v=4","gravatar_id":"","url":"https://api.github.com/users/yevhenii-ldv","html_url":"https://github.com/yevhenii-ldv","followers_url":"https://api.github.com/users/yevhenii-ldv/followers","following_url":"https://api.github.com/users/yevhenii-ldv/following{/other_user}","gists_url":"https://api.github.com/users/yevhenii-ldv/gists{/gist_id}","starred_url":"https://api.github.com/users/yevhenii-ldv/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/yevhenii-ldv/subscriptions","organizations_url":"https://api.github.com/users/yevhenii-ldv/orgs","repos_url":"https://api.github.com/users/yevhenii-ldv/repos","events_url":"https://api.github.com/users/yevhenii-ldv/events{/privacy}","received_events_url":"https://api.github.com/users/yevhenii-ldv/received_events","type":"User","site_admin":false},"body":"Good point","created_at":"2021-08-31T12:01:15Z","updated_at":"2021-08-31T12:01:15Z","html_url":"https://github.com/airbytehq/integration-test/pull/4#discussion_r699253726","pull_request_url":"https://api.github.com/repos/airbytehq/integration-test/pulls/4","author_association":"MEMBER","_links":{"self":{"href":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726"},"html":{"href":"https://github.com/airbytehq/integration-test/pull/4#discussion_r699253726"},"pull_request":{"href":"https://api.github.com/repos/airbytehq/integration-test/pulls/4"}},"reactions":{"url":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726/reactions","total_count":1,"+1":0,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":1,"rocket":0,"eyes":0},"start_line":null,"original_start_line":null,"start_side":null,"line":1,"original_line":1,"side":"RIGHT","original_position":1,"position":1,"subject_type":"line","repository":"airbytehq/integration-test"},"emitted_at":1695375624151} {"stream":"reviews","data":{"node_id":"MDE3OlB1bGxSZXF1ZXN0UmV2aWV3NzQwNjU5Nzk4","id":740659798,"body":"Review commit for branch feature/branch_4","state":"COMMENTED","html_url":"https://github.com/airbytehq/integration-test/pull/5#pullrequestreview-740659798","author_association":"CONTRIBUTOR","submitted_at":"2021-08-27T15:43:42Z","created_at":"2021-08-27T15:43:42Z","updated_at":"2021-08-27T15:43:42Z","user":{"node_id":"MDQ6VXNlcjc0MzkwMQ==","id":743901,"login":"gaart","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","html_url":"https://github.com/gaart","site_admin":false,"type":"User"},"repository":"airbytehq/integration-test","pull_request_url":"https://github.com/airbytehq/integration-test/pull/5","commit_id":"31a3e3f19fefce60fba6bfc69dd2b3fb5195a083","_links":{"html":{"href":"https://github.com/airbytehq/integration-test/pull/5#pullrequestreview-740659798"},"pull_request":{"href":"https://github.com/airbytehq/integration-test/pull/5"}}},"emitted_at":1677668764954} {"stream":"stargazers","data":{"starred_at":"2021-08-27T16:23:34Z","user":{"login":"VasylLazebnyk","id":68591643,"node_id":"MDQ6VXNlcjY4NTkxNjQz","avatar_url":"https://avatars.githubusercontent.com/u/68591643?v=4","gravatar_id":"","url":"https://api.github.com/users/VasylLazebnyk","html_url":"https://github.com/VasylLazebnyk","followers_url":"https://api.github.com/users/VasylLazebnyk/followers","following_url":"https://api.github.com/users/VasylLazebnyk/following{/other_user}","gists_url":"https://api.github.com/users/VasylLazebnyk/gists{/gist_id}","starred_url":"https://api.github.com/users/VasylLazebnyk/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/VasylLazebnyk/subscriptions","organizations_url":"https://api.github.com/users/VasylLazebnyk/orgs","repos_url":"https://api.github.com/users/VasylLazebnyk/repos","events_url":"https://api.github.com/users/VasylLazebnyk/events{/privacy}","received_events_url":"https://api.github.com/users/VasylLazebnyk/received_events","type":"User","site_admin":false},"repository":"airbytehq/integration-test","user_id":68591643},"emitted_at":1677668765231} @@ -32,8 +32,8 @@ {"stream":"teams", "data": {"name": "Zazmic", "id": 4432406, "node_id": "MDQ6VGVhbTQ0MzI0MDY=", "slug": "zazmic", "description": "", "privacy": "closed", "notification_setting": "notifications_enabled", "url": "https://api.github.com/organizations/59758427/team/4432406", "html_url": "https://github.com/orgs/airbytehq/teams/zazmic", "members_url": "https://api.github.com/organizations/59758427/team/4432406/members{/member}", "repositories_url": "https://api.github.com/organizations/59758427/team/4432406/repos", "permission": "pull", "parent": null, "organization": "airbytehq"}, "emitted_at": 1681307598422} {"stream":"users","data":{"login":"AirbyteEricksson","id":101604444,"node_id":"U_kgDOBg5cXA","avatar_url":"https://avatars.githubusercontent.com/u/101604444?v=4","gravatar_id":"","url":"https://api.github.com/users/AirbyteEricksson","html_url":"https://github.com/AirbyteEricksson","followers_url":"https://api.github.com/users/AirbyteEricksson/followers","following_url":"https://api.github.com/users/AirbyteEricksson/following{/other_user}","gists_url":"https://api.github.com/users/AirbyteEricksson/gists{/gist_id}","starred_url":"https://api.github.com/users/AirbyteEricksson/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/AirbyteEricksson/subscriptions","organizations_url":"https://api.github.com/users/AirbyteEricksson/orgs","repos_url":"https://api.github.com/users/AirbyteEricksson/repos","events_url":"https://api.github.com/users/AirbyteEricksson/events{/privacy}","received_events_url":"https://api.github.com/users/AirbyteEricksson/received_events","type":"User","site_admin":false,"organization":"airbytehq"},"emitted_at":1677668766142} {"stream":"workflows","data":{"id":22952989,"node_id":"W_kwDOF9hP9c4BXjwd","name":"Pull Request Labeler","path":".github/workflows/labeler.yml","state":"active","created_at":"2022-03-30T21:30:37.000+02:00","updated_at":"2022-03-30T21:30:37.000+02:00","url":"https://api.github.com/repos/airbytehq/integration-test/actions/workflows/22952989","html_url":"https://github.com/airbytehq/integration-test/blob/master/.github/workflows/labeler.yml","badge_url":"https://github.com/airbytehq/integration-test/workflows/Pull%20Request%20Labeler/badge.svg","repository":"airbytehq/integration-test"},"emitted_at":1677668766580} -{"stream": "workflow_runs", "data": {"id": 3184250176, "name": "Pull Request Labeler", "node_id": "WFR_kwLOF9hP9c69y81A", "head_branch": "feature/branch_5", "head_sha": "f71e5f6894578148d52b487dff07e55804fd9cfd", "path": ".github/workflows/labeler.yml", "display_title": "New PR from feature/branch_5", "run_number": 3, "event": "pull_request_target", "status": "completed", "conclusion": "success", "workflow_id": 22952989, "check_suite_id": 8611635614, "check_suite_node_id": "CS_kwDOF9hP9c8AAAACAUshng", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/3184250176", "pull_requests": [{"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/14", "id": 984835098, "number": 14, "head": {"ref": "feature/branch_5", "sha": "f71e5f6894578148d52b487dff07e55804fd9cfd", "repo": {"id": 400052213, "url": "https://api.github.com/repos/airbytehq/integration-test", "name": "integration-test"}}, "base": {"ref": "master", "sha": "a12c9379604f7b32e54e5459122aa48473f806ee", "repo": {"id": 400052213, "url": "https://api.github.com/repos/airbytehq/integration-test", "name": "integration-test"}}}], "created_at": "2022-10-04T17:41:18Z", "updated_at": "2023-11-08T19:58:29Z", "actor": {"login": "grubberr", "id": 195743, "node_id": "MDQ6VXNlcjE5NTc0Mw==", "avatar_url": "https://avatars.githubusercontent.com/u/195743?v=4", "gravatar_id": "", "url": "https://api.github.com/users/grubberr", "html_url": "https://github.com/grubberr", "followers_url": "https://api.github.com/users/grubberr/followers", "following_url": "https://api.github.com/users/grubberr/following{/other_user}", "gists_url": "https://api.github.com/users/grubberr/gists{/gist_id}", "starred_url": "https://api.github.com/users/grubberr/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/grubberr/subscriptions", "organizations_url": "https://api.github.com/users/grubberr/orgs", "repos_url": "https://api.github.com/users/grubberr/repos", "events_url": "https://api.github.com/users/grubberr/events{/privacy}", "received_events_url": "https://api.github.com/users/grubberr/received_events", "type": "User", "site_admin": false}, "run_attempt": 1, "referenced_workflows": [], "run_started_at": "2022-10-04T17:41:18Z", "triggering_actor": {"login": "grubberr", "id": 195743, "node_id": "MDQ6VXNlcjE5NTc0Mw==", "avatar_url": "https://avatars.githubusercontent.com/u/195743?v=4", "gravatar_id": "", "url": "https://api.github.com/users/grubberr", "html_url": "https://github.com/grubberr", "followers_url": "https://api.github.com/users/grubberr/followers", "following_url": "https://api.github.com/users/grubberr/following{/other_user}", "gists_url": "https://api.github.com/users/grubberr/gists{/gist_id}", "starred_url": "https://api.github.com/users/grubberr/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/grubberr/subscriptions", "organizations_url": "https://api.github.com/users/grubberr/orgs", "repos_url": "https://api.github.com/users/grubberr/repos", "events_url": "https://api.github.com/users/grubberr/events{/privacy}", "received_events_url": "https://api.github.com/users/grubberr/received_events", "type": "User", "site_admin": false}, "jobs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/jobs", "logs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/logs", "check_suite_url": "https://api.github.com/repos/airbytehq/integration-test/check-suites/8611635614", "artifacts_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/artifacts", "cancel_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/cancel", "rerun_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176/rerun", "previous_attempt_url": null, "workflow_url": "https://api.github.com/repos/airbytehq/integration-test/actions/workflows/22952989", "head_commit": {"id": "f71e5f6894578148d52b487dff07e55804fd9cfd", "tree_id": "bb78ec62be8c5c640010e7c897f40932ce59e725", "message": "file_5.txt updated\n\nSigned-off-by: Sergey Chvalyuk ", "timestamp": "2022-10-04T17:41:08Z", "author": {"name": "Sergey Chvalyuk", "email": "grubberr@gmail.com"}, "committer": {"name": "Sergey Chvalyuk", "email": "grubberr@gmail.com"}}, "repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}, "head_repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}}, "emitted_at": 1699644824401} -{"stream": "workflow_jobs", "data": {"id": 8705992587, "run_id": 3184250176, "workflow_name": "Pull Request Labeler", "head_branch": "feature/branch_5", "run_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/3184250176", "run_attempt": 1, "node_id": "CR_kwDOF9hP9c8AAAACBurniw", "head_sha": "f71e5f6894578148d52b487dff07e55804fd9cfd", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/jobs/8705992587", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/3184250176/job/8705992587", "status": "completed", "conclusion": "success", "created_at": "2022-10-04T17:41:20Z", "started_at": "2022-10-04T17:41:27Z", "completed_at": "2022-10-04T17:41:30Z", "name": "triage", "steps": [], "check_run_url": "https://api.github.com/repos/airbytehq/integration-test/check-runs/8705992587", "labels": ["ubuntu-latest"], "runner_id": 1, "runner_name": "Hosted Agent", "runner_group_id": 2, "runner_group_name": "GitHub Actions", "repository": "airbytehq/integration-test"}, "emitted_at": 1699646006344} +{"stream": "workflow_runs", "data": {"id": 4871166142, "name": "Pull Request Labeler", "node_id": "WFR_kwLOF9hP9c8AAAABIlgYvg", "head_branch": "arsenlosenko/test-pending-comments-in-pr", "head_sha": "47c7a128f28791f657265eb89cdf7ab28a0ff51b", "path": ".github/workflows/labeler.yml", "display_title": "Update .gitignore", "run_number": 4, "event": "pull_request_target", "status": "completed", "conclusion": "success", "workflow_id": 22952989, "check_suite_id": 12643387080, "check_suite_node_id": "CS_kwDOF9hP9c8AAAAC8ZrGyA", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/4871166142", "pull_requests": [], "created_at": "2023-05-03T11:05:23Z", "updated_at": "2023-05-03T11:05:36Z", "actor": {"login": "arsenlosenko", "id": 20901439, "node_id": "MDQ6VXNlcjIwOTAxNDM5", "avatar_url": "https://avatars.githubusercontent.com/u/20901439?v=4", "gravatar_id": "", "url": "https://api.github.com/users/arsenlosenko", "html_url": "https://github.com/arsenlosenko", "followers_url": "https://api.github.com/users/arsenlosenko/followers", "following_url": "https://api.github.com/users/arsenlosenko/following{/other_user}", "gists_url": "https://api.github.com/users/arsenlosenko/gists{/gist_id}", "starred_url": "https://api.github.com/users/arsenlosenko/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/arsenlosenko/subscriptions", "organizations_url": "https://api.github.com/users/arsenlosenko/orgs", "repos_url": "https://api.github.com/users/arsenlosenko/repos", "events_url": "https://api.github.com/users/arsenlosenko/events{/privacy}", "received_events_url": "https://api.github.com/users/arsenlosenko/received_events", "type": "User", "site_admin": false}, "run_attempt": 1, "referenced_workflows": [], "run_started_at": "2023-05-03T11:05:23Z", "triggering_actor": {"login": "arsenlosenko", "id": 20901439, "node_id": "MDQ6VXNlcjIwOTAxNDM5", "avatar_url": "https://avatars.githubusercontent.com/u/20901439?v=4", "gravatar_id": "", "url": "https://api.github.com/users/arsenlosenko", "html_url": "https://github.com/arsenlosenko", "followers_url": "https://api.github.com/users/arsenlosenko/followers", "following_url": "https://api.github.com/users/arsenlosenko/following{/other_user}", "gists_url": "https://api.github.com/users/arsenlosenko/gists{/gist_id}", "starred_url": "https://api.github.com/users/arsenlosenko/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/arsenlosenko/subscriptions", "organizations_url": "https://api.github.com/users/arsenlosenko/orgs", "repos_url": "https://api.github.com/users/arsenlosenko/repos", "events_url": "https://api.github.com/users/arsenlosenko/events{/privacy}", "received_events_url": "https://api.github.com/users/arsenlosenko/received_events", "type": "User", "site_admin": false}, "jobs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/jobs", "logs_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/logs", "check_suite_url": "https://api.github.com/repos/airbytehq/integration-test/check-suites/12643387080", "artifacts_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/artifacts", "cancel_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/cancel", "rerun_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142/rerun", "previous_attempt_url": null, "workflow_url": "https://api.github.com/repos/airbytehq/integration-test/actions/workflows/22952989", "head_commit": {"id": "47c7a128f28791f657265eb89cdf7ab28a0ff51b", "tree_id": "3cc1c41924b3cb67150684024877f6e02d283afb", "message": "Update .gitignore", "timestamp": "2023-05-03T11:04:11Z", "author": {"name": "Arsen Losenko", "email": "20901439+arsenlosenko@users.noreply.github.com"}, "committer": {"name": "Arsen Losenko", "email": "20901439+arsenlosenko@users.noreply.github.com"}}, "repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}, "head_repository": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments"}}, "emitted_at": 1700586521273} +{"stream": "workflow_jobs", "data": {"id": 13199605689, "run_id": 4871166142, "workflow_name": "Pull Request Labeler", "head_branch": "arsenlosenko/test-pending-comments-in-pr", "run_url": "https://api.github.com/repos/airbytehq/integration-test/actions/runs/4871166142", "run_attempt": 1, "node_id": "CR_kwDOF9hP9c8AAAADEsH_uQ", "head_sha": "47c7a128f28791f657265eb89cdf7ab28a0ff51b", "url": "https://api.github.com/repos/airbytehq/integration-test/actions/jobs/13199605689", "html_url": "https://github.com/airbytehq/integration-test/actions/runs/4871166142/job/13199605689", "status": "completed", "conclusion": "success", "created_at": "2023-05-03T11:05:25Z", "started_at": "2023-05-03T11:05:30Z", "completed_at": "2023-05-03T11:05:34Z", "name": "triage", "steps": [{"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1, "started_at": "2023-05-03T14:05:30.000+03:00", "completed_at": "2023-05-03T14:05:31.000+03:00"}, {"name": "Run actions/labeler@v3", "status": "completed", "conclusion": "success", "number": 2, "started_at": "2023-05-03T14:05:32.000+03:00", "completed_at": "2023-05-03T14:05:32.000+03:00"}, {"name": "Complete job", "status": "completed", "conclusion": "success", "number": 3, "started_at": "2023-05-03T14:05:32.000+03:00", "completed_at": "2023-05-03T14:05:32.000+03:00"}], "check_run_url": "https://api.github.com/repos/airbytehq/integration-test/check-runs/13199605689", "labels": ["ubuntu-latest"], "runner_id": 4, "runner_name": "GitHub Actions 4", "runner_group_id": 2, "runner_group_name": "GitHub Actions", "repository": "airbytehq/integration-test"}, "emitted_at": 1700587195423} {"stream": "team_members", "data": {"login": "johnlafleur", "id": 68561602, "node_id": "MDQ6VXNlcjY4NTYxNjAy", "avatar_url": "https://avatars.githubusercontent.com/u/68561602?v=4", "gravatar_id": "", "url": "https://api.github.com/users/johnlafleur", "html_url": "https://github.com/johnlafleur", "followers_url": "https://api.github.com/users/johnlafleur/followers", "following_url": "https://api.github.com/users/johnlafleur/following{/other_user}", "gists_url": "https://api.github.com/users/johnlafleur/gists{/gist_id}", "starred_url": "https://api.github.com/users/johnlafleur/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/johnlafleur/subscriptions", "organizations_url": "https://api.github.com/users/johnlafleur/orgs", "repos_url": "https://api.github.com/users/johnlafleur/repos", "events_url": "https://api.github.com/users/johnlafleur/events{/privacy}", "received_events_url": "https://api.github.com/users/johnlafleur/received_events", "type": "User", "site_admin": false, "organization": "airbytehq", "team_slug": "airbyte-eng"}, "emitted_at": 1698750584444} {"stream": "team_memberships", "data": {"state": "active", "role": "member", "url": "https://api.github.com/organizations/59758427/team/4559297/memberships/johnlafleur", "organization": "airbytehq", "team_slug": "airbyte-core", "username": "johnlafleur"}, "emitted_at": 1698757985640} {"stream": "issue_timeline_events", "data": {"repository": "airbytehq/integration-test", "issue_number": 6, "labeled": {"id": 5219398390, "node_id": "MDEyOkxhYmVsZWRFdmVudDUyMTkzOTgzOTA=", "url": "https://api.github.com/repos/airbytehq/integration-test/issues/events/5219398390", "actor": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "event": "labeled", "commit_id": null, "commit_url": null, "created_at": "2021-08-27T15:43:58Z", "label": {"name": "critical", "color": "ededed"}, "performed_via_github_app": null}, "milestoned": {"id": 5219398392, "node_id": "MDE1Ok1pbGVzdG9uZWRFdmVudDUyMTkzOTgzOTI=", "url": "https://api.github.com/repos/airbytehq/integration-test/issues/events/5219398392", "actor": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "event": "milestoned", "commit_id": null, "commit_url": null, "created_at": "2021-08-27T15:43:58Z", "milestone": {"title": "main"}, "performed_via_github_app": null}, "commented": {"url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments/907296167", "html_url": "https://github.com/airbytehq/integration-test/issues/6#issuecomment-907296167", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/6", "id": 907296167, "node_id": "IC_kwDOF9hP9c42FD2n", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "created_at": "2021-08-27T15:43:59Z", "updated_at": "2021-08-27T15:43:59Z", "author_association": "CONTRIBUTOR", "body": "comment for issues https://api.github.com/repos/airbytehq/integration-test/issues/6/comments", "reactions": {"url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments/907296167/reactions", "total_count": 0, "+1": 0, "-1": 0, "laugh": 0, "hooray": 0, "confused": 0, "heart": 0, "rocket": 0, "eyes": 0}, "performed_via_github_app": null, "event": "commented", "actor": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}}}, "emitted_at": 1695815681406} diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index aeb654a7ec8f..0ab538e21a6c 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.5.3 + dockerImageTag: 1.5.4 dockerRepository: airbyte/source-github documentationUrl: https://docs.airbyte.com/integrations/sources/github githubIssueLabel: source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 200babf62f8e..fac84f7ed531 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -9,7 +9,8 @@ import pendulum import requests -from airbyte_cdk.models import SyncMode +from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode +from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException @@ -1606,8 +1607,13 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter yield from super().read_records(stream_slice=stream_slice, **kwargs) except HTTPError as e: if e.response.status_code == requests.codes.ACCEPTED: - self.logger.info(f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`.") - yield + yield AirbyteMessage( + type=MessageType.LOG, + log=AirbyteLogMessage( + level=Level.INFO, + message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.", + ), + ) else: raise e diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index ce9675c5d784..87d9c3478cd3 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -10,11 +10,11 @@ import pytest import requests import responses -from airbyte_cdk.models import SyncMode +from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException, UserDefinedBackoffException from requests import HTTPError from responses import matchers -from source_github import constants +from source_github import SourceGithub, constants from source_github.streams import ( Branches, Collaborators, @@ -1369,21 +1369,50 @@ def test_stream_contributor_activity_parse_empty_response(caplog): @responses.activate def test_stream_contributor_activity_accepted_response(caplog): - repository_args = { - "page_size_for_large_streams": 20, - "repositories": ["airbytehq/airbyte"], - } - stream = ContributorActivity(**repository_args) + responses.add( + responses.GET, + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte"}, + status=200, + ) + responses.add( + responses.GET, + "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", + json={"full_name": "airbytehq/test_airbyte", "default_branch": "default_branch"}, + status=200, + ) + responses.add( + responses.GET, + "https://api.github.com/repos/airbytehq/test_airbyte/branches?per_page=100", + json={}, + status=200, + ) resp = responses.add( responses.GET, - "https://api.github.com/repos/airbytehq/airbyte/stats/contributors", + "https://api.github.com/repos/airbytehq/test_airbyte/stats/contributors?per_page=100", body="", status=202, ) + + source = SourceGithub() + configured_catalog = { + "streams": [ + { + "stream": {"name": "contributor_activity", "json_schema": {}, "supported_sync_modes": ["full_refresh"],"source_defined_primary_key": [["id"]]}, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] + } + catalog = ConfiguredAirbyteCatalog.parse_obj(configured_catalog) + config = {"access_token": "test_token", "repository": "airbytehq/test_airbyte"} + logger_mock = MagicMock() + with patch("time.sleep", return_value=0): - list(read_full_refresh(stream)) + records = list(source.read(config=config, logger=logger_mock, catalog=catalog, state={})) + + assert records[2].log.message == "Syncing `ContributorActivity` stream isn't available for repository `airbytehq/test_airbyte`." assert resp.call_count == 6 - assert "Syncing `ContributorActivity` stream isn't available for repository `airbytehq/airbyte`." in caplog.messages @responses.activate diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index e6175ad7e465..4160ad4722a0 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -193,7 +193,8 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | +| 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image | | 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | | 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | | 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | From c5db1e947b915f94ff24fac2017940607f09544f Mon Sep 17 00:00:00 2001 From: Tim Roes Date: Sun, 26 Nov 2023 20:57:38 +0100 Subject: [PATCH 50/57] =?UTF-8?q?=F0=9F=93=9A=20Documentation=20November?= =?UTF-8?q?=202023=20overhaul=20(#32811)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Natalie Kwong <38087517+nataliekwong@users.noreply.github.com> Co-authored-by: timroes Co-authored-by: nataliekwong --- docs/.gitbook/assets/explore_logs.png | Bin 105640 -> 0 bytes docs/archive/changelog/README.md | 645 --------------- docs/archive/changelog/connectors.md | 776 ------------------ docs/archive/changelog/platform.md | 509 ------------ docs/archive/examples/README.md | 2 - .../build-a-slack-activity-dashboard.md | 424 ---------- docs/archive/examples/postgres-replication.md | 116 --- docs/archive/examples/slack-history.md | 109 --- .../archive/examples/slack-history/index.html | 77 -- .../examples/zoom-activity-dashboard.md | 272 ------ docs/archive/faq/README.md | 5 - docs/archive/faq/data-loading.md | 124 --- docs/archive/faq/deploying-on-other-os.md | 40 - docs/archive/faq/differences-with/README.md | 2 - .../differences-with/fivetran-vs-airbyte.md | 27 - .../differences-with/meltano-vs-airbyte.md | 28 - .../pipelinewise-vs-airbyte.md | 25 - .../faq/differences-with/singer-vs-airbyte.md | 28 - .../differences-with/stitchdata-vs-airbyte.md | 29 - docs/archive/faq/getting-started.md | 50 -- docs/archive/faq/security-and-data-audits.md | 14 - .../archive/faq/transformation-and-schemas.md | 20 - docs/archive/mongodb.md | 102 --- docs/archive/securing-airbyte.md | 28 - docs/cloud/core-concepts.md | 106 --- .../getting-started-with-airbyte-cloud.md | 178 ---- .../configuring-connections.md | 30 +- .../dbt-cloud-integration.md | 10 +- .../manage-airbyte-cloud-notifications.md | 71 +- .../manage-connection-state.md | 2 +- .../managing-airbyte-cloud/manage-credits.md | 12 +- .../manage-data-residency.md | 12 +- .../manage-schema-changes.md | 17 +- .../review-connection-status.md | 23 +- .../review-sync-history.md | 2 +- .../understand-airbyte-cloud-limits.md | 6 +- docs/community/code-of-conduct.md | 91 ++ .../getting-support.md} | 21 +- .../connector-builder-ui/incremental-sync.md | 4 +- .../record-processing.mdx | 2 +- .../tutorials/adding-incremental-sync.md | 4 +- .../build-a-connector-the-hard-way.md | 2 +- .../cdk-tutorial-python-http/read-data.md | 2 +- docs/contributing-to-airbyte/README.md | 2 +- docs/contributing-to-airbyte/writing-docs.md | 13 +- docs/deploying-airbyte/README.md | 15 - docs/deploying-airbyte/local-deployment.md | 12 +- .../licenses/README.md | 0 .../licenses/elv2-license.md | 0 .../licenses/examples.md | 0 .../licenses/license-faq.md | 0 .../licenses/mit-license.md | 0 .../{self-managed => }/README.md | 10 +- .../implementation-guide.md | 16 +- .../{self-managed => }/sso.md | 10 +- docs/integrations/README.md | 2 +- docs/integrations/connector-support-levels.md | 39 + docs/integrations/destinations/chroma.md | 2 +- docs/integrations/destinations/clickhouse.md | 2 +- docs/integrations/destinations/csv.md | 2 +- docs/integrations/destinations/databend.md | 2 +- docs/integrations/destinations/duckdb.md | 2 +- docs/integrations/destinations/gcs.md | 2 +- docs/integrations/destinations/local-json.md | 2 +- docs/integrations/destinations/mongodb.md | 2 +- docs/integrations/destinations/mssql.md | 2 +- docs/integrations/destinations/mysql.md | 2 +- docs/integrations/destinations/oracle.md | 2 +- docs/integrations/destinations/rockset.md | 2 +- docs/integrations/destinations/s3-glue.md | 2 +- docs/integrations/destinations/s3.md | 2 +- docs/integrations/destinations/sqlite.md | 2 +- docs/integrations/destinations/timeplus.md | 2 +- .../getting-started/destination-redshift.md | 70 -- .../getting-started/source-github.md | 12 - .../getting-started/source-google-ads.md | 42 - .../locating-files-local-destination.md | 4 + docs/integrations/missing-an-integration.md | 14 - docs/integrations/sources/dv-360.md | 2 +- docs/integrations/sources/e2e-test-cloud.md | 2 +- .../sources/google-analytics-v4.md | 2 +- docs/integrations/sources/google-directory.md | 2 +- docs/integrations/sources/mssql.md | 2 +- docs/integrations/sources/my-hours.md | 2 +- docs/integrations/sources/mysql.md | 2 +- docs/integrations/sources/oracle.md | 2 +- docs/integrations/sources/pokeapi.md | 4 +- docs/integrations/sources/postgres.md | 2 +- .../sources/postgres/cloud-sql-postgres.md | 2 +- .../security.md | 4 +- docs/operator-guides/browsing-output-logs.md | 56 +- .../configuring-sync-notifications.md | 55 -- docs/operator-guides/reset.md | 25 +- .../transformations-with-airbyte.md | 2 +- .../transformations-with-sql.md | 2 +- docs/operator-guides/upgrading-airbyte.md | 7 + .../using-custom-connectors.md | 57 +- docs/project-overview/README.md | 2 - docs/project-overview/code-of-conduct.md | 48 -- .../product-support-levels.md | 39 - .../project-overview/slack-code-of-conduct.md | 47 -- docs/quickstart/deploy-airbyte.md | 28 - docs/quickstart/getting-started.md | 105 --- docs/readme.md | 14 +- docs/release_notes/july_2022.md | 2 +- .../upgrading_to_destinations_v2.md | 2 +- .../facebook-marketing.md | 2 +- docs/snowflake-native-apps/linkedin-ads.md | 2 +- docs/troubleshooting.md | 59 -- .../understanding-airbyte/airbyte-protocol.md | 2 +- .../beginners-guide-to-catalog.md | 6 +- .../connections/README.md | 78 -- docs/understanding-airbyte/namespaces.md | 122 --- docs/understanding-airbyte/operations.md | 2 +- docs/understanding-airbyte/tech-stack.md | 6 +- .../core-concepts}/basic-normalization.md | 49 +- .../using-airbyte/core-concepts/namespaces.md | 98 +++ docs/using-airbyte/core-concepts/readme.md | 108 +++ .../core-concepts/sync-modes/README.md | 20 + .../sync-modes}/full-refresh-append.md | 2 +- .../sync-modes}/full-refresh-overwrite.md | 2 +- .../sync-modes}/incremental-append-deduped.md | 16 +- .../sync-modes}/incremental-append.md | 18 +- .../core-concepts/sync-schedules.md | 39 + .../core-concepts}/typing-deduping.md | 14 +- .../getting-started}/add-a-destination.md | 10 +- .../getting-started}/add-a-source.md | 6 +- docs/using-airbyte/getting-started/readme.md | 32 + .../getting-started}/set-up-a-connection.md | 28 +- .../workspaces.md} | 14 +- docusaurus/redirects.yml | 87 +- docusaurus/sidebars.js | 319 ++++--- .../src/components/ConnectorRegistry.jsx | 15 +- .../components/ConnectorRegistry.module.css | 6 + docusaurus/src/css/custom.css | 10 +- docusaurus/src/scripts/cloudStatus.js | 4 +- 136 files changed, 1073 insertions(+), 4962 deletions(-) delete mode 100644 docs/.gitbook/assets/explore_logs.png delete mode 100644 docs/archive/changelog/README.md delete mode 100644 docs/archive/changelog/connectors.md delete mode 100644 docs/archive/changelog/platform.md delete mode 100644 docs/archive/examples/README.md delete mode 100644 docs/archive/examples/build-a-slack-activity-dashboard.md delete mode 100644 docs/archive/examples/postgres-replication.md delete mode 100644 docs/archive/examples/slack-history.md delete mode 100644 docs/archive/examples/slack-history/index.html delete mode 100644 docs/archive/examples/zoom-activity-dashboard.md delete mode 100644 docs/archive/faq/README.md delete mode 100644 docs/archive/faq/data-loading.md delete mode 100644 docs/archive/faq/deploying-on-other-os.md delete mode 100644 docs/archive/faq/differences-with/README.md delete mode 100644 docs/archive/faq/differences-with/fivetran-vs-airbyte.md delete mode 100644 docs/archive/faq/differences-with/meltano-vs-airbyte.md delete mode 100644 docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md delete mode 100644 docs/archive/faq/differences-with/singer-vs-airbyte.md delete mode 100644 docs/archive/faq/differences-with/stitchdata-vs-airbyte.md delete mode 100644 docs/archive/faq/getting-started.md delete mode 100644 docs/archive/faq/security-and-data-audits.md delete mode 100644 docs/archive/faq/transformation-and-schemas.md delete mode 100644 docs/archive/mongodb.md delete mode 100644 docs/archive/securing-airbyte.md delete mode 100644 docs/cloud/core-concepts.md delete mode 100644 docs/cloud/getting-started-with-airbyte-cloud.md create mode 100644 docs/community/code-of-conduct.md rename docs/{operator-guides/contact-support.md => community/getting-support.md} (88%) delete mode 100644 docs/deploying-airbyte/README.md rename docs/{project-overview => developer-guides}/licenses/README.md (100%) rename docs/{project-overview => developer-guides}/licenses/elv2-license.md (100%) rename docs/{project-overview => developer-guides}/licenses/examples.md (100%) rename docs/{project-overview => developer-guides}/licenses/license-faq.md (100%) rename docs/{project-overview => developer-guides}/licenses/mit-license.md (100%) rename docs/enterprise-setup/{self-managed => }/README.md (59%) rename docs/enterprise-setup/{self-managed => }/implementation-guide.md (78%) rename docs/enterprise-setup/{self-managed => }/sso.md (86%) create mode 100644 docs/integrations/connector-support-levels.md delete mode 100644 docs/integrations/getting-started/destination-redshift.md delete mode 100644 docs/integrations/getting-started/source-github.md delete mode 100644 docs/integrations/getting-started/source-google-ads.md rename docs/{operator-guides => integrations}/locating-files-local-destination.md (98%) delete mode 100644 docs/integrations/missing-an-integration.md rename docs/{operator-guides => operating-airbyte}/security.md (97%) delete mode 100644 docs/operator-guides/configuring-sync-notifications.md delete mode 100644 docs/project-overview/README.md delete mode 100644 docs/project-overview/code-of-conduct.md delete mode 100644 docs/project-overview/product-support-levels.md delete mode 100644 docs/project-overview/slack-code-of-conduct.md delete mode 100644 docs/quickstart/deploy-airbyte.md delete mode 100644 docs/quickstart/getting-started.md delete mode 100644 docs/troubleshooting.md delete mode 100644 docs/understanding-airbyte/connections/README.md delete mode 100644 docs/understanding-airbyte/namespaces.md rename docs/{understanding-airbyte => using-airbyte/core-concepts}/basic-normalization.md (91%) create mode 100644 docs/using-airbyte/core-concepts/namespaces.md create mode 100644 docs/using-airbyte/core-concepts/readme.md create mode 100644 docs/using-airbyte/core-concepts/sync-modes/README.md rename docs/{understanding-airbyte/connections => using-airbyte/core-concepts/sync-modes}/full-refresh-append.md (92%) rename docs/{understanding-airbyte/connections => using-airbyte/core-concepts/sync-modes}/full-refresh-overwrite.md (91%) rename docs/{understanding-airbyte/connections => using-airbyte/core-concepts/sync-modes}/incremental-append-deduped.md (89%) rename docs/{understanding-airbyte/connections => using-airbyte/core-concepts/sync-modes}/incremental-append.md (88%) create mode 100644 docs/using-airbyte/core-concepts/sync-schedules.md rename docs/{understanding-airbyte => using-airbyte/core-concepts}/typing-deduping.md (87%) rename docs/{quickstart => using-airbyte/getting-started}/add-a-destination.md (81%) rename docs/{quickstart => using-airbyte/getting-started}/add-a-source.md (86%) create mode 100644 docs/using-airbyte/getting-started/readme.md rename docs/{quickstart => using-airbyte/getting-started}/set-up-a-connection.md (63%) rename docs/{cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md => using-airbyte/workspaces.md} (86%) create mode 100644 docusaurus/src/components/ConnectorRegistry.module.css diff --git a/docs/.gitbook/assets/explore_logs.png b/docs/.gitbook/assets/explore_logs.png deleted file mode 100644 index 98d159e8af7a0a6abedf691fb93b7169df4ed89d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 105640 zcmeGEWmsLy5(Wq(o3Mf49)fM$CBfZ2xO=eR?rsSd+%*u~-8Hxecemi~Hk*6Tx%VXZ z`+m%inP=vC23E40?$xWRtE;PfRn@zL_0 z0VrxhKtPI_3JJ+c2ni9%*;yN#S{Ok7hx`QugWV>*@6k(@??NIV%JOg(>KH z5486Y4YYR?b$yFT)Y4kTL0N@X`T_A>zN5~Ze~RBm9otal@c|CcnMzE0=%Y8M82?XZ zL`5F>RY?iD53k0(ec0(5zU2q_`ZB@vsn9b}K~!-0SVV!oefK%wAaddiXo2kd;8z~u zGZg^ffSzOXvGCmt6aI!9#^7U-MoN0>K*~C)gR*KXIgFrWgUrUv#74BLH7+1wCIT1a zG{TTiJ0LwuDQ^S#tWfA@qEne3?ib}7MGOs1IYeB@2kYAzEpI_6Nb3y;?cdGz>eY5I zup*o4_3MQN92`*$bYEV99kdwE`K6ixMD(v)9L#w@YBF*}Le_RhM6C3T^o$^0cp@SqZaYI`P6c7n zzq*6};sJefaIoQIU~qPJrgwfvZ*6D7z{J79!NADOz|2eszJt!*)yhHNh0e;JCh6NrV!%GVT6Fnot z|F3QirpEt&y1lggyW79U^>1?AFAw9CGj%btP!~3}1WOgn8ZXOx4(@-E`Ja~mGWzeH z%JxQfLe`exjt;#4otD2k|FiMWj{hR5_Ft0h@7VrH@*ge#==m}PPFXusFg5)b8S*l5 zGyG58U-xq}ya@Oo0{{1H{;L)|oxJee4F6FLUidWKEDHz-eh3L+0VNm6gEUwV3}x(Y zYk0Cu=iL@I7z{c2Y&J+#6uI`&SEu-VnbmP*#v+40%feX?DWn zvTNtUkkvf-0eRDb>#b#Oi|ZEOyC(bXWoHUf-sLqEax@~y-!7m6RHneZwNnZ}+~>FJ zJ!A+d9Z1R#_0LN{#wUw1ADNF!_P1(Me)%r@-@A(w<-^Hgz+A4G>`46HLK)(M6bS+M z&kMi;8Itvl_u+v1x9Ug;5!BJ&*(LJ*M$RvfG5IAUjpFx~Zz zI{)Vwz-0^(ZrDGD>z526A}>ei<&^%L6vTb9f_8pqHj-Zs{Tn6xUgzjY%;E@PYh=`O_cnEf% zF=EqR9)KE~oJ>tcMHQWx_$?0scP1m0DNzYAxI(d!kwlr9#MLOSNerPRzpK#+5+v~Z$%))? zDCqqtK$%jVd7P`ni0Xd^01ee0D(0e#fjh%dT_+wT$oq*kl=SyU5kVfK1clM=amfQ_ zI0EVoKz}xSldr}wi6Dt3^Ia!d{=o+kBn4zyC@A(dHs}AYSpXC-s)+AT+uEoFfUki~;}6?a%w!S+%)p(<%&lEwwjnR-_^!7#xI+-MnDSH47xA$mn9oH2{Q2No8xtQ-F3Wuyt5l?-eAKW*Z^G5g6Qg_S(zHYy0ce&X;^fpo zddnVZTA^I16s3?OY1H_Y=0&rK7yyFn8~EYP2Z>X>;H8I#k52~H3#Aeb;vx$j^^DY1 zT3NowRF9L6FNM5Cc!BVk(Y*xj!lGXrmuMWzQNZf{Ok{JiDquR1+ahHUg3Ul*qM@BF zJV4p-a5kt_n>Hw@q!jCMv&IdFfRMm-+$@9b`0FRJws#V<(Pmkj*L4WaV42%UAj~zH zuOFz~W^KoA#S__B<57x1CT71BFLsx;^hLRSn^AzDq{b!-7zR=~>~k#UD#!C=8G=JY zW4c2V`fg6qFDDj@%8t^o$rrx>#M?*VE{>KIM^c01GQ6Keyl&RS)a#DelDMut8xELN z$6{x!Qn_6f$UmeOmyC*HD3@xfx880`QE4~Essd2w<8?1L+A@T8>t{>jursT8Os>6IA2#DYo)5yrN`z z)rm?p>!L$2Y2ed!5Z|)%yImd7FVr1Jl1arEQuKODm1xvt>$Vl;xSks~zI~|L)1hs9 zYWzU0Tu7?j=%8|`ySF%k3zpX+y36HMLe8hLEW5i4(zp8Z%!D5 z(hV}HCyM8X^9ZM+!A?i+1vedq$T}$rF3hVYB1FS#J+&Qr`{P|hIm~sb(-?XI`bKoxCt>32;{&c%X?eh)z#G(Bg;#k z;dvFfcW(UnIQd=7d1~3`DcRX>Q~CrgU#F!}I-Q#d8shw3yV6hsm(?O@D3wFAX_EKi zCp5>?Rc>zXK(p)l)bai3Kv7P}-BZ1Ky7zJ;tjks$_7rqN&7E_b?MBas(HQGCZEC56 zCs~_eWPGka^jA!q!Q~5*LG-4X4&7Px=P}GG)i;s4Nr5^KAcBPy)RWiuhN-m z^@MHt(6F<+MX&9~E{)rFzLSB6f4R>#wos^A_C2r0Y|6P<>$YWiRL9x>mCRe^!bu14 zk{x97exh*WdX9pCbNRskmG0*ZPU(t^hiRG&nEF_*3|EWg{Y4mYH_d1#x_7^Eg$f=^3)pr55*V4p!^sgZ694y^cJqB~c_t^mUu(QpObo&$|oz z$)cIrq&AJ3`EY~Y@O`(*PTU=5;r;2u)D_53EYB@lo?k24&Pzpuz0N_mrMR39^(0hO z5;GH(M&>P^C#(7+PTvkCX?G7F#Grh|S0`loCU)%0bUfc#wlq}-?*(X8)Yj;>a zgEq36Ar^RTrUHq63bwdiI@BWKJxwawTed!&IXGHAmC&}8eJYA}MGK>76_4P#%5O~v z&kjB=JkCo- z0*^szu`8}SGU{zl*sj}2716m+tnG!Hraa|No~PX)OH-ZN15KkPjm&-Z8nYUYi?Lq% zlHef9!gQjts9r*E;j-uLl?t$!)v2V>UxdF7%dKq`}fc2g;W3e`{g2V8xi*>B`KD$09ne_ChShs#}>tno<;2E2Cy@>#buP(y1=e&l?BI2BHWbN$i z+=}31{S{1?^L{bDCr z{tg^C_W|OLz2W$ZrIxZB)?G;WoCmf%+wR$ldEw*Lrs!J)N}69Yt9zu9Epr*UzjgS7 zxETe#BM5l5*Cj2^Y$c(5g(0l6*sq6S<&6*FKv2kUZ4g1Avi1)`sxTA zTYtOIvTxoChemG98Got0G_H+*)2bhW!zxZtlSn9UOk^a=A?VAM<#Mw~_*;&wfE8+O z+E&ezpPz%lo1!}aYI^Q17H$}BdjfIK^(s2Ly(oc;UNynS`O`h2cyG8Wb=!JOGS(}z zSN)f1?M<0L3#)5qCV^V=-unB7>cew^f3j-PI*+}1M%fBia9 zM7((-fMW|jG5Xq5_~aFA6AOXYiNgt$)@XfZyP|Ro*bHcmg7`a2>Xx$!LOSEq&{cG4kXVUtzc_lDEOb%s9zBs9KhIgAC~`G&N}7`Gkd zZo3b7SQD4s^1fTKHhxAUE3RFw0icMKOD5GOa;HY~e?#?|Rd05Yk00(y)N^+^O7(J= zP2(&Xq*kTh@bq;jtJQ>NsU4l_B*}A=R-f|}OEjPL@?IklUf(Oar!+rz_xsYa;(6Ud zf0b;0m$c47=KB`zEf;mATU?*Z@J?=e80}w)S`R6k(G%vXWHw!#`O3R3bFWnn;e7yN z_o_K5cZDc3H2Edei?QEKhOoMtp)o%`n6ow|1|UdY?oCw#8E3Y;f=4CNI2|t>+l+_b zcy0t72ZIfY5Q@yv`6%|MQ%2V_4A<+W>#483_w-pnM}9Say-l)A%USRK=rTtLtokN8 zT`QQNii8Aho*RN&X&Bj?+Z|np6{4$?H2(lgY4D;g1=!sk2AaT*PnT%cuwcgH0JJOe z5a{!>u~8#ot`XPP^l{I?7U7hcia}?+?ENaxH-A(6C4Yn5qsx-G97owV1QR1bIX= zds?gLIt8Dd_GWw%xHzoS-IC*E1HXV9ihPs84%iGIV|(8RT^AF=>2OvR^hMb zNjas@QG1O?MLn-AP!=7CB+0bYQ8LfK%0DKb?RN2t9FI+71LbQD_E_nv2DEcridCaB zf#<~<8Cy?rJnS77;c#<@l2C5pKrd9?#V@xY^b)4 zoiE_MFIdCz-hcW=BKpJfDujl!pDdwzY$gw%kRWUQjgVn|ttErvjJN*dJhFRw>R!dp zu-VgRyw8QOWHgg%n`}lqBWZhZPqsYtjgI@FsnZNYope~&%;NQB{rmpO#q%APJJ^tF z0X;nE?4Ex7elD6NuQSWvV#cx$ENsZ~tv#%VPHe_&T8*>OxO{vq0H{L)2(`}=l5)Gc z9@Zq(EWoE%u9D}0Eum!UkmKhn&x49Dj=XB2t4M5~cS;Az25uK)Om@dD&imJ-o=>+4 zoEFqXuOnDD9$p5C&W8ce({k9T~NMI1d1tDP=sREp^d@T(L z^Q6Esx9v~uhB}hh@w_BERB8Lz?R*DG4Z!tln?)}cAwSg*BJMCX1`yFdh@f;p0esId z>_KI%#LV8AEZyAG`YB6}BSGK&Dy;&EDlLuW&Kr@ACxXJm6|4wMsjr^Zdpnx3;zJ)# z9AI=Ea--pSO!lA`5)Zn@l2`M2z9-mNRy!4+nUNK^WK_ z7{D%?|48K1YEtKM6 zb$+SB5P_Lf<+eFodDf6WmYXO$OMJJ3kAGN6w9wV~7y}_JBA~8zAgp)mi%D5WRmCJ% zPP&yxgYQ{y=RTR3@wbm+wHbu+=1uOB(PN2X3ah2Uk$c_+&ZqhZ&;uVjTs^vvV;>=4 ztU=8gr{WsXscRoP9L_CI{9xtjZJ7@cd>@X^ev${+`_)1KuRG2v+kjp zVwCmSSMnEoLqm41-rD@I8YjF+@#4R^Dt!eYrZMi-QvE)$1W4&i9?=FZE($|;8xhid8kqWPw{&Z#$!$w7=j`EhI%ICeU z?J1|Gp)0Azj`X&ytnau5qi?VU%tM31tX3kKmBlKgO|Y z55?tl5M{aGWE6y@)#8SUDcKV2wz5feyY|I#ywUO9rC0wO`op=l=Sa7m2_2TFJ-dWU z7l|w9mB*}Z@iHBJ?b@WX6JBakU}|}}oatCrRLG60c3J1sVEiLx8kcUNPIolZc!fKz z8^t9uRIyaT;KjOio28QcD+7l)mx?;^pb#$&(U!#~{amQg*r+KC`Gw{0em`{r8VYUs<#aV|EZ&?#XPL1>3Z=C?y?c*3n>M~;ORzQHBN=)v|x_4E=?p1 zVDRbj%hrtUv(oJVwUjKA`!wwiakezBx{=vpeW@FRvUUqDR>k4Mvm(RhlMJVm#g07R z!Is;gAC!P00M?IeQxt=^tqg76@D2$1Y5rHy-AEIY2>Y6%;shxNm2#5K1S#=yCNFVx zB1&Oy7GE$Nl97fCX6nUN3Z<@qaRZp|R+4t}WoHX@<`?a}p@9TOCbJX`G&*{_IWW|j zj6xRZv{Zt9LF!ira~YO(tMe+~MKKQro%fWKYF$T-492oVs^jt{<0LEf{h0$dnYX&9 zhFjp9h<>ncRBop;6dCElW4g~R|1wwcWZWOclMlcn2}&g;3y4M&iralB{nk{!Fyo5; zbN&{J<7t3o;e2``O4hmJ+oTC!MKs{s7y0g}Rph+LcNGDOPUF1_a(j%?{&dm=m36U>%hq#&yg}t7^YbOyIKmgpk#L%X`2@)@&ZnIoow|- zkLiQX&dz3v2Wn?xW?a!NAn)DBXPT!CuDBQ$cWmqf(0R#lvtR}LCNQ!BFuJ0Iejwz) zznUH^8BufsD=>_h&x}V$g~{*>OOb?t4;kUpN9I?JVDAQ<(Lk(S=?_oMg<3kJsY2Z? zKLqM}3wvXQcQ_^(sPal*VJfdg+CTflMtmBK>Q9K@j$|?Snh3Qq4-F2ExjNL8=#TD} ziGQ0%2~F>O72wp76{juO4pV5UM*w z1V}5Co=q;ZH<&5ZyLsGJXHawin8-`42-iz~EU7p{E(nypd0>8jwqd4qmI3p*wT$JY_&M_>Igahqp` zJ*M!dat1o~+@<{9Wh>fykgL_lQ<1|RIdZGW=;-lD9HNwl9Sc{bAfC}Oh&N1_lvD5G4Q$0m*dD>kw>}@$jklO+8tIHa zCM339AY|Fny=_(q?aX58j5r2ZS~n7IS44`N5F8 z(r6+oTzVs%1q0ceqv4@IBv;8;%F<<;NjVYQg#)y=Yi-X+;!BMz7gD;1Mje4j3wtwl z#qu~j`g@)1mj=BbfXfM+RwaUZurbJ60+G1pF)gnZ6Fj-5Gc67xU{A)zD$b7EnkE}D zSiE*YRiV9#0FH8mFGEi6fApz^NfTHDL9u~%o|qOL%XJUs7!49o{sV1!+rhR$_3$7y zpN|G~#10)WS?fOyY|_XAh(gMd`hg&l$Z{k>>Y;i&&L5ZkA?UanL*sa5p7axWBdXZx z`w-0nGmg;-CW{Gk^aN!K6`pO?X0BDFTq^F(vH~VuBH^qF@>%6_nKqv1KqsNkPEj+) zyxAMi?&G31hn|;40|DCoF$b17+Jk8e&J(rVHAKMWyw!ypJBuTDDt(%hb%D27Lz?W^~IOcD=U-FgMQz8JyHjbn{iq;^Dv9C?Up( zj~B_Nhbg2BRnS9XN_z!?>K8g35#c~Xldz}Cm4&0)bX4D3O^GAL<&M=>+$+52yESXT#A{D#SVu&!a0% zn^{!(8eEF>rVGM+aJTxG8Fo9Cibk`DOPh1HZ0^GO^DvKF{t@ZXAJR14 zWBJOG(%sJJZqGsqx7wc!DxXiVanp&ml<2KOh@TSAzWDHIovZ42MO-x8DY)K@={HX7 zdoNJSag9T?L z0&j=-MZ1mjN{=~f+yOB>u_Y!M>*uW<@68(AjQ(ir8O}jp5rE@s{D-hb`x&W9%YB`l z+En8c0N}@VPm23)qs)`c^Fh4(v6sZ%rK_~Mrg7hqHtZ!iw_}@Wm+)m5xk|2dItB7F zU*|#AjL{5IJ+t4$s(D6q0mG2uF-Q)UB?VB_*^Hbp7$8w6u|>UQG?rklWml%ws-u(q z6ug>yU7Mo&lNQ_iJX9(j_Sl<6m0QbcTd)-FW`kvAdtLNw7M-oH@R%+C7-heJbfg3( z!y|7r<^IT3J^s;J`*SP(gG1b6aq)-mRvF~F-j^}U=}WiZpbC{y+>=@<-c7aiTRySX zo-bbz$=BusU=W}XXQsxgtXlf6AG#Xu?$?s?mZKE&it{p!+lJ=#Yr&4UC;s+__%-MC zCwE=jCzUlf_qAK(OY0pmLi&m00;a>^qdHXAA?vx=e!|^HzK2!s`91S1wuRxd{+O}5 zIx1e?rLPEWF*hxH-j^H9`ipNJ%k&$jl0DhUd>=qQwyY?m>EuDg)(D`<78~C@x)bh< z7HM7etVK<9n=Bw3e!T!ZoIE^bj8b!bKF)DGk>9Lu4Y1T6MT%U%sREQ_E2`Gx;r4?o zY+OrN=B_q%hyM7dV5<@okZnc0{x<1+xo-TFdeG!zp}S4IZUni|Rw6G)`#$%qXJ0p; zVNQY`X2oj|Vg8ML(+5n)7#8(q&wIQbr_Hp}0l^mQd#MyST0)x^0nQRDukAtxw}XgZ zbeJ@8W9$9&eJ4UT8z*oBZYnBfl&UO?Hp9ucIcJDVC4=qAgFareluTDK4oBmn5gBF+ z+pIfIY5o4*Hx-mR+=4rcH#Kq#WDyN6Sx&oYD zUumxNwVS6a8H5i=PMKl8f4*K;KNz|yXguYy+sw94de=&_Iy%Gs(09rcm24KM7|7_V z0vFjMV@5q_Z|aF|(q=_`B*u4A$lIGzmrrtC01L>V(~8@=jo8mqy=FRlbjG_7dIc=+ zfX;}H*eS7r>TPhD=-b%}a%2D5B`EKib!|ux3+#_q2RK1RsV%t~u zYl5l!2=60L*Mlhak>y73y51i0zAyF(yA?fs7u&@mcAY>`K4MIbTwamz01j$ z97iG22(uuJblJ=Z>CjzXOM$VF_%99XX} z1p&U$0Gz#4#v@P6f%5>Vp8?)=hjEv9%Te*z)1CBor(|C*kxzs0+}*f&C6D{Ob%<9P zW|~{xm3RHfnyuAV^gLSg{jp!>MeqIe*owsL*R1N)Ea&EcPbU<66COreSQflS;?7<+ zL9fj)-SOr`j;QMVxnxe4ssn>u*Xyy_X`;zMi4T_-{NmnacO`Xb;oo>&FHA1e+Bw(U zOPEAOV!cPTL9v`Wqu+K{Q-%%z;yPZlsMmI?LzsxS%K}&H4t7nfqY)Si8E%&DE5Kgl zhrBk22rFW*d*0^**oNC_UN?@s6;Z<<*44Q83+3{Nk6e!fMb{?xO_>O)&x|Uc<^~e7 z9aBELVG{UU9I%S=;dHQLfL*KBP~}(x?`O4#&zcbPtuLu*1Q#3N6O+msUPZKX#huZ> z6jQ<@4ma$g2%~V}D1r~(090B*o4l$BDyS2X$aux~hwAr(WaOSD`&#`9-4_W(o|e1g z1IWh~ds9stump-)jWV9}o#P=n zOJ8~7#9hE8ilTSX>4&+TB6ptk%3DWF!wz6u2PxTEE`c4hnjDc?7hCGz$c< zwUJ#IJ1R{hkP|?6lSH1PSPU|bBjV(Hwy0_C7bXzyGhfP$@^~1tbNw`gvSSq&>o^Pq z6(3ALeq4H{OL>kl9^Pj^w6ebS4r@mW=lsNw+E74bIzW}hblYUqz3Hd?`XY~( zzn3&F)75d^bH0n%Ei-4^?+f`90l zssvDogM>b0emh+A5)u-+l;ME)Ekf!+ExC8BayWFVcyFPkX!CuO*NeF5(ct6y#aA3q z&zwI@Y^BJ!zxj{d38qSI&)pOy!C+Fg>Po4indaOQ>R$$mMy`JM=@Kj6*I(=;@QS>D z+MEOH1i~B#=(i@3Y;HX_n&xXz;;Y4anlqufnwiWUa=StGPy`{=RcVmz}Zn9F)&}d%rE@CYS!J? znbydrNf%x%@8jm@`gR!BssQ_`ixrDH-8|zo9&z%s?{q+rpZQ{Ts0K@9Jl+0o_hM~l zYzGw_2Ee!2%YSJhx^AThMLX=?aa*TWy{BF|?6A|?dV{5oT4hB^i zIU24S;?&<22olZ~8jwaIBcsU2!=ia=j-$DLUekg0-ZEuarW@rsPk*hzrieoraE-hs z@h+=sEff=ujiIfiGsbw3G(+3cX5oHi+_Ca{bw{Bz^)ehEAaU}(qY=uPGpwXTh5$`# zd9{bt@RLc4hkVhUq=RJ??!eY4X*DV&My1qjI9?0&Ucr0eS6LU@MLoy@fDxBl#fGg~ zUS^|t?MC1OkSBN1V{t-#sYum*9I090jg|k69<4|yIf-GmoxS-&X|Vg)yyMZ4ds*BV za-L>j)`yHJR_vv$d+T)Zj}~ZFv`q8>I9b#P!iZ=r6Km>s7(kLx`^ZpV4i;_sH6tSl7m`_vNUE0I)I#wAcpdzQ-BNj+p;Z~rIPZJ~mTU-_ z;by#?;F?cJ@pme1wMD${FZU(5DvSIV#M=qsQunP%?f`7Z&rXDPpC>;%`$*gj`}OvS zoo7sKT$e9DdGW?XD56nPLWv4vR5LQG&;ap+@PMRY?tVptv=ZWd(5IX9pUV*+89h7f zymEXTe7Y6gFJEO!;~J3%MUK@E;o1gJb)nFT0=rhz-%>Z@@AcCYBk_VV=J@ia5NjoD zVmcDj?+M=)Ki(fJjpat3d{hjI*hzO)ZE7HjM(RP=FNWwr-i)O#SOuaer#w|?&FTH{ zepDV6LG2k|^gM>x65|nx2=?Ln$>K?NRu<^BIrKy%zJA?PI@%_7~vWXyED(vz>WlMv+P|JXz(|- zB@ArtOx6J&wvtCn6L2&!P}POSeEkDqv);_$X#FzF7nl=oapx{Bb4*no8?)u)7Y{~@ z8u_?1`i%G4Ii|%O$$XHpYfOk*ymMfyjpN*e1zJ@>t7#S%d=R#(Dby|^+0xHQosUv( z(2|9El3vyF#v7~idgS^=BpSqfxAXas)9?H4I|6nhcGNCxEw|I3&}~c7q^q)kuy2&V zRuS}|)}RMb7uQiZJ^J{~LtKj|j-03?BtE_CLBR|l62z-)+h;4esG@pt+-xvcQ_~25 zHy(kTSr|E|AC$vURg{9H`Fn9?wzrAx2temh#u>B7tZN9?_u{U#Dg@P zSD^hTC$?UVSd<5?=EMfssOEg+GgY$Aw-YEPG9bX+8;Y5Dy*S*%%cBsAP@l$gb%RK~ zK8m`ueU{_C|8wqgzX(#dHsBh!V^6tmo2Z2~&q*)!)vMULwoN1|)xv~u9Ja%5_sN*x z`C5e%^)Ng?K6YBiBhz1(+J;ZfTOXXiI~}I%67rmsC>li877wRgSKnJ{jZi>6*$tb_ zXanxlnhgt!AB|#^npWT7&l2(SN?>`gE;lTMGOUo$#O8N2NuwBUb_G)=F>60^rpg@c z@X-!R#mmfW+@s7L#WsfL7D3Q)kiF&WQtHbCVTfN(w?oVqw&qF&RuG_5c!pN@g zqk|11uD(^?;*QNQ2wGggjbyOyM)-L8dAxGLHWlpO|>_dZEf3 zcJYlgfMX(WXyD2a9~4CFaT{{1JFUW84J$yA2p`V>P5T_|3%k4XQ)aIUFejLafm5{} z-SB$a;H-;4PUSKw2y?&UnQOgAzqe8!>14t36=If}t0Ad+&;n;t@vmxm;mJHNmT^@P zC~AGbR}!~&;hQa!aVHpw@o7WxX-cl0O~tP=h({>&?D2z9&TtiotGF^OT*K!O5%(G;~1dWU!PIx_a9wMp`1f+^FzN$Z$PCKgz zulz-ISPVeD8ZE_7k013QU9|>`Y}Jijka1P`o4+q>YsvNaAVJgy6T~6cf0msY&l;&Zc?e)oVMLMU!TtewWE#igxTb ztm8~2fyCTwJ&(k(&*uz_yL_-;=!pLgV*2p&H;(#p4b8v}U|?Wk(shFdsZkYYUD=83 z^r!>K`sjmFaVK9x(RTG}GFY$>*!A~Ipy4?ys8dN6K89?E=JYL-zvTKG%7DWc(-!rC z|A}ZW0s+O8KX9Uza=g~p*Of5xaH!O?{6ZNA=&*oU?<(@DBfJd_vn$^Qrp?4>EuQIn zp;3MU#}g3lfaP9)#aP+B;cmQshDFSFq`mq<1_B9wrJ<%DW9n5(+{@;k`uzAIjW@ez z7Gr8l9%T!T#2EycfwszWtk_JO`WF{|$^87`RLo+p*(8FHUEzEZajWNWI8EWdf{HuPzCKtY>=0Keq`+hs7CB1t z?RxlnAHCNiNI0QJ<{7 zSN>K1A^7h?hZq=ja}&O z`i-)(W0m%I7Wq%W)$pwbBLsh-S!RK97;}0y@xQZ3bOWx2Sz$7X_+v~KV62S=BfHe^ zETVV-JU}6Wf`X(hEOa-YDtrGJ8l+hJeiqP=l9W^_IvS0PlaqDTr@Z5jn>vJu=;;N( z&-u~WNk&GdyuOa+|ApLL@()rrfUNHu9uy(qOlDGQ>X7LyA1=CCz2AfbM1nf8@Hakf z%mLo$QH2BrDcKX-t+wmKe_v(Ko0a|lZ$p-&~ae(vI_+RTMB0IpxqBw-e3ZrU7$H(Ue z%4=yAS7L}rNlS-S5@0I(=#gG3-@0$2`B4(eL;QT*%07HGw_lUaq7&FSt8CfQK#SB6g z-3@64dpHf_m(FdIe%B9h%u1wPEsRlwdBR3l`4hv8l5y^pW$GVbEh1k(($$#AMVL9_ zX_?=rksqpom|s2?Ph;p0RkB$ID=OzP5*63qM(Zy%iNphAtQs=AoBvRo9enVjJU@!8 z6#Ap#{{{jm=_y05!}~Vu7o~~GvO0t7UVrdF8i@@>OicWdcb4#6oEKdLg%1p!x{LWQ z@+!B#@#E9~Xww5Z3KkZt z1ap1cKf6b=L7|8;F`X(jyTGleg@7FrstwiEO7L0D_#{Unzt7~V5LC}6`|TmzQ@0C_ z`oRtb3_`-PhjYPC#Aj>1hGTy`+LZ!AAA^8k&r?2toH$=lK_LrGILg(U`1*fPRfOnZ zKlhLOEMPn+2$T&HfxoRsp#2lJ2RQpc#9RrMjl7vzNcg%Bit|gN|2b%5 z0IcKje*u%e-#|78`F%q(BCOW%g;ubRCNxUrjn8{Si~6e#(Of#qLuoSxPV|<*;0R~- zksOF?PelcV&CPyvG)za0WH#$xFIjK@nR+=rz~|Sr<%R0Dkm&7Sukzi6liyz!6LYI_ zZ*1;s$@1;oGI1r;{`5T3`em#8w;lBLA!a;E@-d~x+k=Mc4G*T(T2=3O=@LuWz|CX4 zGmjOLWlz@k_#cgc@`G5+O!Q0f$=mVCyY)*ke!Ado3x^Sre|jrBE%>(@%C!UZsPy2i zpwiQ!)3*_tB$vzxSFGM9BO5h%h7Yq7Hcayw&F+$5kTQ%JFKM^0FC4gzu?b-tF2& zZG8j1hMiI#r111)p7&&NXHrLdW6{i}w$%_eIr-if69{*%p0h^MIM@N*yBRM3SUC`_ z|L9Ef9BF>X3$~hA@rgHExV83Rw9vm#Za&w*$s!77mrri4Vo0HGL^PFa5f_fFi3o~F zj5&ir`KkgT&MS<&kJBn840_HcU7Y|BbqCp1RUgxUfKPcRkb^q zWMkgxtZ>U?;C&VVLjy}6IAzVFs*TRQ=>rd!+=a_lTK2!SAC3f31tox^&XzTERXah0 zyjOwRT*zko7-w$Dk=HXdDO+tzU$mSGd|_9*TM-h5hRN%=hHD{cfKL2K&PK`N=?|I8$bI4UWd0%)AW|C&)mwsz z>JLrm-e$=>pvb`Dz$dFTtPoFRHVVriRgiC3Wpd5%fz2zrrnz(qPGLQq7Kx`)6dZ`7 zDWLx$hwJ32dTV+peY=)7GleaV>f4>KJeZPWRE3wOf_LB`vqEukr~VGR|8zs%jo=o` z`@Bktc0r6&%PnEDh(#^k^G-X#*!%o)RH5mP-&D;yzuMBiDVAA8x)qWb( zg1-?#P=AccgPa-YH0rj2u+zGc?kRh{7Cl`j(K0`?>#15Q7g04ruDDVq+d`61+oD*v z8@R)F78FP2ulSc00g6Jclwhb|@kQY>1V22b^kJ&zD*9Z0f6R%m$<}lRzXFZdVN#Qx z^G!Vd{apa#EqQ%1bwLht_ZW1^t#n0?lJRJ}a|;7(ECG(TX$OBG zk_dx9#g1f>K@Pjr%$2gdwZ2AFaTX`CDxcG{q`{4H$q;}^eAQrP^ zty0AUzJpg$PtZ!ko~I^mG6>(8&MR|lw$WPsVZw(jFNn#qm$kwI zVwaWHU0nEl{%U~<3%(cYR==W}LJ=w4`g_l==1;Fwd-AnrF)ryJu2a=X>eM&8uEZg3 zS7vzs7j3dD5jDE$VSc#b9$nRpZaC^QY zdCL2rUxMo`3_UVDS6zrNX+&lcXarXB4bQ|ir7@UE z#YJe+edY=(_Z;KG>l?*dlT*d4?#MKN{FoM%|Br02OT?%OiMU!41l zh~02`b0EW#n=(?%j4butBtvsnbp*YUq;K!wp4zVcs?arNojHV)Ylc}HSYAd5) z*?J8KFj=}f)=w4zab@v#&_h|e>RW7=w}aJ!Fhyi@En=_&m`5!%kI-ln>X2Tq>Jk@e{@b=@M{IxWeW);7QcTSrM3iI8{KqJ2XaDFK$?4Q<|sZt*F}M;J&%J)Son+7#eq4m2=2 zZhYPNGI{2Lx5i90{+6&Tc4fvo#{5+(4_<0c&M1F9_j}`@R1S28IGT&8EsvH@Utjal z6p!0e+ZPn#6*%m!kGF_vnoc)|C+lRj{!{>LA(+Jnsz^CckPciHO6a^*2tDgRrm{Ag~fT9QZV-<}jkv|nR`dybx$ z_VbeJpNuu?pcP7z$#_KzGB$B8-#uM|qS#hyXqgsjLnce15^B%ImI!#!(`BS4YQRRZ8aY?Kgem zOnO46Z5JAZ)OW2^=w0-2R!Rrn24&)cVn*RpOU!r`(n!>Ewg#y0Cf-DyZB5yxlPTnd zSAIN1!?h`4$wgA3&F=hCvVlA}t?R83FSi-U&zUolv0f;HW3SRsNp|tPWPG@T48}EO zqVCMU0{VdiI<(J6$qWc>iQm#2FQx>cHf%VZn@6df!dN~vRAS^%nKn8EH~J~RVXl_? zOzyC^C|TwoS0Es?f7&m=@(R|p#Lib}Z~pKjPd6VhLPm3EAV6aGM_0ob>qJh`9l2bA z%PG)^S!pGVTHiv}xS$P96?2N61iz)UTSno~?}^!RpqDcdFdwJvhqoGzzn3DG15TOS zr^=e>Knnn1^SU<{D$;-xlPw$cjTl1z{?ZbZ*MMYJ)((rZ~rHwk|uNy8n_DoUl!m&_OVe#i(vLt+{91VkoLw zdeIW6Oq_6gc)z{VY@0MVaCxA$0JTpN@QzkSmd_MM!$mrH@c$t0t;3?)-uGdFK?V>Q zx&(&qF6r*>E&*wfRFD=VrBk}3LAtv`IwVE9TRPtD3D5bQ@9+Qjy5^ca#O#%Ot>?M# z=egJCuca|ne4G7_tKv6~3vEUqO_H@T9lf-C!gx#89bDIkGJTT-Hbut=D3MlONk|CU z2A%d0P2cNw0x-9BoNyc1ILE8kf8M|0It1(A1O_~8VeZllPQJ+>lfvz38EyJhcWP=}$ z#l!_;hnub1bc}L?r%9^9t?aeuF4q8H@DIBY1n0LSyPumI%wNDjlW1tKBk&rN5Zi8P zTfu#q^YoLWk6sHB8uc_@S}&PEobp?@RM*LrN$+%)dlj7z^R;!0Nh==a416jogQDh` z!u36VKN0q#$I!H6Dh-~+o5hs02jnTxh1fHVpv#efieVb;Ds<(EHASqH@QfIH4rMUD3{$(5y#xr(s1D0hha8 zZri**7{C`!(Omse?-51NXUT4sA%A1&39L8PZSJTYGa1T*6VUDjq;o}2Xe#u(-E~7Ul~ZmLh^Mh%`gWvwsEIeEDz?E&Tjgz@_ywSdgNpxGZA$I}4~S=05pTY+gug3NG~y4z zptFe#)p{XLDrP%BiS@&G2>n1b&!ApaTTG;oZka)cTL#J-gBA}7TyAakvr_uW|?Gp z3}c-VT(%DJRgaau#mfl*I{^_>bS)lrvm&^X%Pb-+wP2jjVsr(dUU@*wd+nM=g!~a=CnQdQgw-wo0k=tUWGjnw5K@>XY|LvIZ3_15~(khH)?^Cqm6WmMK4# zAcf;AL3x0qY;eCN#^S}XeJLCQLsr1WOIw|6kJnFd6@6r3m_whOHJsFKvx>7D8WxZb zia>>>p>#{@v^lx9IzLffb8*_M%F#}F@a{erABJMTAgx0*_pCAtXGx&z!^Xzt?dRbNsmOXZ@&4%20JS1-PQ86NrmmA#3 zErZMd>NeCo`va!g@@IiZ9Y4s5=9XYDTvp%708^G?J?6`1r1=kQf<{TL2)%kDxe_$E z0>R>!>&xY-r%gvjQA5G}L11Svr@-rD9UB$3Ssbgd3A$O=bOsUQwww{hJc}9$)~bFV z8tN+3L+?e%{DSwg=y@xr-1<@-gGyZd!s3svvfHfI=-IWlco!boZ*8}=<{bg`ZMmay!7 zW|LfM=!rCh2vSpwUS*^2@-h$?NB_B=6MSo3&P=YG=FuBt*PtVjJESOWu93;CZLWbk z`<1G*$-wLBD#FdgL?k9?GkvrjM_*gR1 z^?r~ENS^- zltUkWfBOm><}xw#Mq}+8UK^P&WI_Hxq@8$XF_AHw=9JPP;WyE+!VP+xv&S8j-A=H& zgTiIlJGf2)I)2E%lLnwSk!083b-{W8v%HOSMM`u0^sUEx6qt`JW9gv}1m!B{tKnxa0MI8Kx7(7M-JY&<|~W)kz%{zpu(ogP5OoBgAIWpw=e1vEG` zTUfx!mivEx9p$S9aE$KoG79nKK&AJ|{{mt^7pF9YEBx;!pM1|Wn;osL&3MlcyTTQsl};6M#unwnB_a!&hr3@7}%OMiX#B?CAjs-946%#7gdu`y+U^b3w- zG7!~Tefbt= zFg<+)d2I>*w@-XWf#JG-&PxA3vmD6WvH;3`@O1gc^`B{y0q8cK^1RgivwFYZfAK&U zr0%k84*iE<6o%cBZD*E14}b^`ka#jgkW2NP%u+??^oi4Hj@_eJjjN05 zal{@UropcU!|v!S!5#juJZ6U+ltNGtSxufeITs*oU0mWYIDu5HBFdLeYFORM6B8RT z7e8R-z~*&T7&2J~A6aZ6|IEq>S_*|PwYz1&ji0#b}$jf{iHd_ zfsZK&zzR^Yv^)%w=21+?=-Z)5d|3fetqcf=V@@<_xecxUNhmHGA2)X6DiISyVfQG4 zSgJN8kkU64K{K5YAdXn5KECFa^lw2yr~?R)|?k+W>&lvbMI(BP2LDbgp|Eu>d#Muph}!Q|FdO$F%oV zR9-&yuI@H!zOD$M9yyEIs)IKp%kW8-O%OAQr7NgDMg zP@l(i^BpDzZ-iA2XH0&W%A==HXh6nEzj9I30pj5P8HttWh{K-u$nKX1JxqBNR7D6& zR;WDqE#2lVI#*jq1`{=taBSx{Wv4(2$EWE^kLe7y%x| zCS=Tv#L3NbInLN+@RW%gW~8GHr9Yy|6{_~cU-6KCXGo?iUEpwe#plkZ`zv*+5fSN zdeDEgSElWOD0L6coa*v3Hp#hs)A0R?MLH-jo&S(C|kkDl*`ivTd~t2YwPbYkAW`gR8HsctLa{T6>s8jrI{gyN4hE4`Y6+ zV3~y{x5O+t4|`}!x)NlD$&0YSNfH^31GNik^xu-cNb)?*$`j z6l9F(ew5a83ILtEpGca0K5#hj>xbc%)ZdSI>pllYjJ;*)?sny#(#gCpY<;&iaYq9k zU1I&?oN2CI({L$I|J1+o_`uzXGK{)2u{!MR9>iV`bJlh(1H=Ai7XqGFxt`ZOk*9aa zZ-8imBtsMxPrs6TNl>4DW-HxZZ6cOH{GOl~w3b%VCj{7&vVtQZ5 zT$hew&W9-wX*K0P@)YGSo(m@C(lYa%@m}o5<^~bt@?g?ueI<}PT~vx=(LMM>cJXg} z0qS>0$>Kf{G*P9+=f@&K;Hu@5d@>K`&nhf!&}6_6AO9}(-OY8rPqix|ti}2J@__(H z=U+KbWklb`1&W89qR9eljQx80bECO0AmO9ec1t)_vPu@Ccd8+kt*QmeGdRg#Q{3dP zPxEpeV|7Qo#@)cRZCr;_d(htfg*5!>XT7Ld%f-Ak6U9HWx%Wzg4s@ozW!p^vV08oP6q{$Sebykg#2`e~s) zsSH`&Vq|B|T5u=z&h^e`umGdWY$=9nQCvVAJ-tNwJ5kXE{oL6g*Jk}y1k}Xpv?hvb z8S+H0MckPDREx2~T&@C=rV;b9Xu`MNjJ>8mfj z*Pwn&}CWrI^;_SYyW`KY(SFqkCqXY7R-sa5>>bm^O_-ajgm@Dn+2Xe|{ zwx~zGR`UZsrv5#hCS8ijyB=Bd^V(;Gi<);|Zj`K+v&#SjX3;LJCRng@^?Y^{jctjz zfF4;LtK%z5HqAPT7PhT16|D-4h*?0iV@g+37>50qfPjI$R6xOv4%7VI-9vU&!gC9J zYpUF+4Qc{aT|{#89+`XBmKPTXxP8lT^RfX2u5*-qKgd^udC0BaUC7QadySNk(y6A2 zH4KMfNLW1{Ms>p7@Yc~%QfHOw7AZr=clKWVTaqP;8AL^2G2S$qr;D{-+sSURx)JPT zaQpMJF4cb(z|V$q(Ar3Cypwkr4yUzn2Ja%Z=%{nC7*juSlv0KzDmvS9v%iVh&F^Je zqEs_#);- zFUDLz^8@a-p|l2P^5IUdpdHr9F?)~_5UU+ez_&B5aFaf0CI>onr%zY5(Fph>(6|F zBda3qu3hScMQ8sR=*}zbkJz%5W1)v%Fq0DXJ-J~ui(fmRskxwaSi`OnNT@p34xqTw`$}{?1y09VBYn5C^ zULFVs4p}S;AmNal_hpT`h4(Yf##uRKi554lOP_+KtL!jE+fVOT8cik0r3P{#SrEjg zqW1*MQxsk)7`B778q7g6g^X5wafHOnjxJnzc2{!H@c#)AI*oiHYCtY6TD9q_PX~yF zC080h$j+U7hux|tOMGU5OTbx1`?fDi;67jUrIxC_jU>cG@BZoZPrT>vT1z&MlxMPK zFgKC>xu(k0cos5IK0gDKfb6b-^zhvKyqVXS3_f+IXbq<7KCz6Gv}6OM;cuEQ6bx#+ zIKNobjXRBc%?_qnhfiN8A@PsFW_~Vmb8K2tZDg z-PM{A{(XBfo0+!OJI97kBrmqpugYAPxqsiycC>@y>z9%9tss=a; zS#1o-d<%)i*9J1&GGV(Ry|_H6B(6wg;GE%JY)bATW{DF5W9Gc$EI7)YLPQLok4QRr z1Y|#!c;%$0>4@VE-=4R`aNKLis51GY)+1_`Q3Mj#%fY|?Vz`36+!pyFf_N5v`#IgJ zwB?en>UJYcOs#o})CT*%gDL?TQ{VUkhqj?ytEzc?Br!8iPbb;edU{tq;`pw&>J68N ziRUjeNxK^Ra|i0Ss%;V$OiUV2HVscVOUhkV0vXsF?UEiQ1b)dV=<=1!DerVDxR(M6dkWz4%(6le+IT zJsMct;k2hO#{BIbOI{H{>?Fd9j_1oveJcl#DvA9_)W_h-Kj_z*h#*q67{I$4|68~m zkZtOVl-ltHFE`_UMUDnyQVwK;EZx#`2{fMePi#!)$xMc?j>X=?@hJJ}hQ7`=j?#HM zJs6X{IMumC)|fY(#(V|ntx293u%m_gvWYgjOqqM!Qixj~@>5UuF~FlEX_$sXHX;C} z%-=D2&|}0E0g;EOm8TYO0*A^J#&^?XUq>QKih!8l1wVtc3trb04GNULaqAn%v8$+y zP==cewNzOjM;-o;9|!$HCBS#MnMtX#k`2Zos&B;JU6#mx_h#tKuf0`b3R$z|fGct9 zsiV~Y`i*|R&=uTl8h(C#GIDat5*uosr7F3y4wH;1llLz8G(!tz&JFg;f(Kvh7Ro$} z27HQ1;T{FJZ1Ml`RzXO9u(%RW%u6JCcBf*#5SGQpl9$Drp)F6p=kE{m0%Tm!T{N=+ z#T)oR_suG|7{Af6%6|neg9%{a2`oDl@lC^q-)7be3Q$V698Uo*C~dWCllcB3W1wPM zLM~e#h4_8XtOm0{sj{N#)1Qf}Jz+M;=!=`mPyao4|A1iDKlpwRCgadWFrd-bRtn>= zg@)8v%f7r64^cH~Qq%Zmoy&q1VJ@DGuHF=@!@yq^vjaH&jz9LGY<^in0 z(SyU}=L4tJZrT=Jet!9$3}#9cf~grhr_sIE)TpR}dA%3k^=~1&cjaxh13Q!Bs*Nr( zfYMmjtBqwQA00(``52+cMFzVC!|fWXygVMW$4v2@c%4A zzWJaDxBZy7>x`XChYwu#&_H_vCJdFS$)!?HM8s8T#po!Ep0ryY_48LmJugT;!ojQL zS9J+~I^ml{(%*qPg%-Vm?Vv0JG|N?6J>3hnE3zMqiTA#sxQ}Q+ha$t~Rpj>ft2@w-&E-*h};pPN_p8mHNLdYthe?=#-1Gh?+sM z$P?~)@`@5?iTM&v;9IvQHh=CtsxmXkMawZYC?j_P>gNU%Q+zr9+D4#4h%X{CqH?MG zWWH$->PchtC__;wUeet7QRAyS%QT$inlDMNbyDtr4%OTp6$UzAHe8nmwRi(5sP7R6R#hWB!W? zVO9kiV`)=BX_Cq`j>%LS*n^!(k z6_GU6LUaz+Yz2``daugzCs%QP>a`G@p3^!Z_ZTsEWE{2S^&K<5w@EJvr?njUJ8~Bv z?-je9zbzW|7PGxf;mB^j4x@3nkq`aBqbmP(w}{6*ECIvZOnG^bGwyQs`RV{h&rE}K z&&I=fv0U2N^NoAK=+lI7w1qiG8$Ob3ile5Au=YG8RehUN-X$(&k~cex;}LU`6D?M zoJe^=Jvpr@Fovn?Z0B}Lr9*H*AQ%y02;|oV3I)APQtmcbRM@Ct^&3cINq)(v)AVdz zAw`BK;~{839xk-d@MC3$r=P8bDi&*~Op!vj^XNA&Mh%^ceS26o%!Lxfo^)z5o(xYJ zcW0c}+G>MFb$*rUKGTNGqZ>}%*J&ZxvrF|b8w#niI+6Th*ZXZGT@;MB@pb(Pp3-?= zfdKsb3xoj1V6{V~wf%rlQ-j_02Ai2}>Ix@<9xjLM`SdpX96IVx4|7J3QfguEziC1s z^0k;4NO$tWAjpRo^j~4_3^QF}bbKZAw8}?{B%nlN)@i)v)H-7cmUfs_Yfy=A{t&au zK`e2LvCQS72w;ty%$2FaRuT4eb1OUbfiVFDz&{oBW!Q^Nnq8!Iuy&S$bfG!SqrD#& zw7Vt@tm4NIEFUsq{L818TM(?a$K*78APB1n2P-1pm@o%Aw-=1+*flm<`Lt@A0L+9k z%^Iwd$Yh5tBmN{E%B=l9UJ|JYsEp%@=aNyZ!vGy9~Y)0yU*~NEFbVIck%o~h4A}8b@ ziq?{fLex*^`VM{`*9li0;z^v$8YnQie$J;?(0J~{&hh!BMCe*Ie~9{Z_Kr~nl>}4( z3WCzk@1xv;tH{LhzPTOfLZqL08Urffy`N$4A*kU5T-C7zO>G+3AbP)K0u}`s84VD^br$C&tOCI>YE3&9 z`aFg7*H0SQ1E2WaYsq3*3X9bX0~B{BXY4)jrIwoNv<^x%ih2F8{K8+7c$=_z1yB_! zrs+m_U#ZtqVEp?5dIDN4CsA~KkD$6d%oFGOy-WQjWfY5t6~}xHx2a*RSM32{3i)~- zX80}0NbV=j-t4dP=hNCunZipr87_2)g<|y_zY)PE6=gW;qtJBA4U>_E5~d7Wh1e|U z9?>ykpCG#}{K%2tw&v}u!0e(I={vg=tvd7$ewo7cxt{dy^sZTRnARkK1j|M>pu*1$ zF4jvS^h+em8(6YTue9B%!mpoV&1%SilO8zZUV%S5j1{z_J#7`!^bdK#A?kJWZMuLL zg+mEqsr4la?#_eDez6P;Y&)*5Y~^}W$VP-Y!tdd~hH=BV`)L+SMMfyj;|FvwFbfDw zR)Q^SDyt5t+<0NYI5*am(hipbRkuXuW~r_eaIFSSwQBc*gJFj43}~O|1tcQ^h~12- z^1ZrU*rM1hVMxbN4cv5$rt^vTE(%!pm(#x62LH8}{BePpfu=;k$3Yg8+;^+xEuWmI z6-W3oIxH=Q|jfcy3l_&WwtL4qZbXMITB$|~}3ZlEn zkXaDlBjR~~lS<Q#y*VR-55b+wc<)cz9PVs!rNES;93GX1!pZ$}V9rN>XI zJTHGq1VI)kX;QtZBgiu94t@Q70|6Kwr1zYBF~#>>qf|H0QCf4Jv>L5dq_A+Oq(N5s z)%#;1f2i%v`85)E(lu!$718kqjN~DhN*=M!;d}9f@%8H~3#$%TOzkKbil_735NU?p zmB=UD#)bD4naQxIV5z1AM!n*Bq&9NPhr6C{8MV^7f3N7m5SnGMaYw*@Uj{Xy{8#PM zS%-D(HANM9qFLM-F0PZvgE2)bw;^U(e|pgeHXiWQd?zB@v=U*4Qzp7B!Y338TZsCgg%qs z3)gL!i|)A6#ZFw}SAz@+cPlF7-V0&O79*GojS}Q*+rzb80YMuUL~~zVI~k4^4=#(Z z@E&Xx2XNC}az7@dB~@9x-!q9g>dgK8`FP>VlL~K{6EhkXi)s@he~e6?)K9|eG91&d zJ?)3cbLTMZKYeg|>cqI_GGC~+H6BIk^)qV&2U*nD44&-R88*7I6J#b~(=cq_v=_@m z6d^F}=ZqMDs>}XnmGJ2dUXHwF?K#rh!`-OY|4cgw$lw2oQjy;F)N$*tH3RJ*>7wz$eImmxkGeqitDdZ&Q7*U@MW{Fvb4Uqg$=QKGR#Ex z=u?jp+BJg%W?OlXf=kk1*U5?vOdLK=cYn{2Y^OjTQa<`tp=IS)a;iIRA`u)lt0<{I z5TntM-U&-7PNR1FLMQ$x0zz8aB&ZX=R*0JP{HYj!jTBgg1(Lw_&&ah?gOW0@#0jn_ zz2skZz-ls?6C$FMF>cBWPOqtcp?z{tLnf~2TIt}ayvxB!svmS{)Dct+k1H)OX5B82 zylK9<@kMM}!Uu;KtU(yssNa!UASJx}k7ebDi~{3n##QSoC)C)J!F3(2HSuo$RkpeF zeLpUbOphm%Q?IBl+202AUu&N@2`uyD`|T#B{exFNv;=K2iE8$k8ut(d6?K$E4#4vH z*S;eR0b=e(QYmhJO!`0lfywCOsrULhPY_rVV1H~wKh z`UQ(a`NFnGNrF*HsxfmD9=uH1F>WYwX)q*3=6`z!zZZ6i557sG*!btF75}zp{-~-r zEUaiKnF&@d%71F^3sp7)gK6p5w)0;8d#e30kij5O0G322jWppu*EY}W|F#77S4%peGMN9jU>BgeNJO=& zmH)TbV1Ql&P^a9GzZ>w|DTD9=Hw;A8`+rn4`-xvvnIJCweIgaopZ*&d7Vsq$xl~>~ zRZ&z_=g=b9p@yw+6nDHUYE;M1p2)i&W*7kKl{xlnBsc<8|0zWmBpj?d5>Sgu+hU2{ zKT6(-mW#VyAU=?77dgcQ#^MHbdzRIItUuTK%iGw6NXvyaQpvqX_ItwN|I3Ml82+j# zu0x^1zk4(iyuR@+JV6@hC}4pV4HbP-ghl)J(*nYTNzw?xA_18-hmC5>DUl<U*7||}`IGKytw+^QXd z8_ERQu_g+Nrp0udv6z5PqXLPmnZb&q!uzd82zz@An){rQ&w1ZXbJJ+Qi+k*1j_bi> zg5-SDcGNgWsn1rRT+^?<3Tko-e{)w)QaB9yUS<>Zx&EXLvYg_NH{eQih#q?b%x>r+ zxPPB=F<1^bqXR_l8=^Mzjl`2${6nj+I4ScWB*YeEmP-E4(|?IK{LNyy(3N~}_|Ny^ zq~-dvlImtgz+f|p10!z`oI~e@0fH44PF5{7lsliSf8vxe+UZ`E|B&Yy4NFTwNg0p& zT>5jB#VCA?0NrVBzD0o`>```D3A zScbIvkYH30i=)4g@k3L_)#6GL!OYVmmOL>XKlz)stCtKdSF3v*Rue9XZu^T3DWB!9 zhiDh+hG|9c&^{}s$p8q=g|_LdO&yO!R!Nf4G-RQZusFwbR?~>im7K`d*BwLF@=6(k zU8X|s71MYRVgwgqTEov$d5|rq3i@_R>jX0`((%QZn~y!vYnCM4-5b;oi#v^G^XB4E z&C#!wHs73?pY+;@m$%8@b#}RSzdAKA~tk?bW zu*D@#i;E+4I*EA8d3V-`8d594xZLah`0yS$@G6bEml zcas%^`umH?%?1AS_74$tbf#;(HBM=5L^>6z1@ibWRVAul)z};&dg?fE8t{hUpfl=j zf8tssU8qC<_}JxKI+>4NYNX_HDIk8-Ea%dfCQf+u_JU#bn&Q{yN`~xLzm?&0yvtE_J%>w2>SA|L!-P5&xKNu}$-u4h?*(Ah z0fa-;B{yUb{ek?odDJRTRkD?O?}RtGL7P=@pKxpJ-{K15wE^}k^|1+ z%RO16o(#nY7ljP@fjnaUS zA{l)AWd0xb(Lj___bon1K`Modk(P-`aXSBzh}R|Qb}C?ZrX~1mk=q2e*0awTl&h{T z+O>zh`0;$bjTUbP^{T^P4koGvfE~Y$-g}X`rBOsV_STDFz8zvDw_59Ef9KntRoX+X+R$gulIP|7|#H!liJ4}&!AsKL5 zWyhA|a)l{w-B~zqR!n9u}Su!z`obOu$j#gNeF6%712$d1)54&g1C<& z6c*ovsc@A}i?mhpy0ufbXu3Ff^O(_MQt}->r}ekGtH%Rw`NM0r?-lPYdtJ!vnK&(l zJro+xF1O$mN2qgnbY(x@`&3LZs8mu?(!Gv0II z$E~lx$Q8-S0UyM5zrWt;LAOR{sK#II^*cT{w$JAdZ6)$qcX0Bm$gqLp&}OA|Q7+9& zWWYo_M)$pV!p#2it-p1`ew~oAuKzc;(n65ZOOc2#Xah!d(UrtLBdI;!;n+L%@^J@y zmti=}hKj&J7kTzeytEjwcCgZbIV2hT2?>y$^_G#~9X62;3puqZzbDZs){NhsEiVp6 zAN1HQ48p$VdA$Nf@M*LorsJ*BCgKX^NgxmCqm;q3yN5Op;p z&-E8Jcbjes$JK5t+xe;w>E1Uc>DxWWxpRU;qD}j^W#8R)X3G=l2;C1>+wdo@!(r_B zKe8irYbV~c&4TAiTO?kzs<M-C1sJNi#m7ECn5m-Cjib zKN=(*Y%mkZCTWCnKP=!hq6Bgacebt=QP^i^xi)bmdFmO)c8J z^Q|U@TFI7vEr)`dL^a}asnK;4QH#NKXCj*Og-ufKaj{N9nE@Z&0rCZKl5PE`FS>!G z4?!f}8oHUlDj9CydVRD%d%~8+@BS$S(snAqtfMo>;d%9ayY<#SAAzJfXk|~W(S_Dy zH3VTA%?ycvB4_2}<6e22^qcQq^dE`6NMegxY@gPODol-2N$<@tYTRbp1&cd!k66<8 z^6ph^P2qG)2Q^TN_pfcecu&KuR~yq;&S*0rOY_p@eA{up#wN08)LMRTx>yVj;X>mC zc*`1MA}qKN;~ar@95tWOExB)fJ)e+MDmrt#yP}1OWFLvHvEd$@)-o!bCmQjQRdX5q z*(gZ}OW^Pmw&!w>;3tETVhLlFjcuk3jEPQ1HwZSi(PlppwGC2nbY>;;o|(L;Hy`R9 zT>kY_V#~Xb%@NYZJ>qOadoqHbXzzF-B!Z>SF^0@`L-K=(SKh`ocoU7^=LFlgeBACO zOa@hGT5KswIq?pPZ;RBT1`YtHi$OT;tf}p#VHnZ+F+c~F3nQ&ZheGq0`;+324@s8Y zHR8CJE{Uq0fH4fkKnFI`kQ_2Q3{YqIlV!RlO!@Tc>(nNW&EtB^e0@*K$ zx{i~(r%6VmmA=o@%p+|rG49aM;V^BZFs_b;>Ca}3oZ}Iv$_(BSNr=3gt~6(NO^s8R zvm@kO)f(7>r?T&hX)7<(J?L3Z@_2^TrRH?@0po=v4?6Yx_+n~WV(0s z!ePEzBRR(PXv7*Wq1H~19iOqZKM(2n#E7O3e+ypz2D+gmFjk}y=G%wA>1X6~FJRz0 zZ60t&y@;`Y!qh=WY&1A^-<^#_$o-9z4=QFGSRjqy8GaWp(7XauEp>Y_ukgbdH8G)= zpqwGQD+o!}X0Fnhm#^@c0vL=;uLM7)H8o%2(5+NvX}-mUoPjD zu&&_KAXiHbC6;GLlLyhqN@H(t7`^RlsOHUR<`|&#*qb3bAD7Bd8%DfOGZxWk7#iCMyNOtXT>N$CZ6wQTvw}bo>wZO@i+`a&0Sp^ z3TaZQl}@W?&FMnND*S8hSy+r5>Zwf@%izrGj!s1247;$1O&PmJHT(#$kXx)Bx9Lak>y>gqWcAnT5EX(1JrGk`s(BPgN1@f5F%}6Z+(qeS}3P;7BuH@gRxD z<<0rcSQa@4aPV5qwtp{fwB}m)^b|CU2-v|<@6lexxbCHEv8JM(bB$=*uH9cgetqSC zurs44wVU19+VgfxU9p-%!Y}-CPEtX?Oh75kU$Q{A1{RYi*F2Ge;FdFG%rP|@O9vf~ zFn?OLE`3QJ^%LH{n6W{aXY;a5yVHuVSMuwmQxW6OL8r!z7FbiJu5w~mXx|qU?w&ql z=#SQ(w*MT&h4a}9o^mk#mk~z*V`Q}Xp^-?n75E8@6jqy-F~!SvIHu12JyItHegN3N z^n!IT&HLeJc_{6bg=sAL*{%(I#mKucVq-u5A!d?X`EcV`0VJMMnF6pp{uGrLi2m3c zUv3Dy=}MF`!rc8^bdpCpI*7vP`0tLZID8&n##4{^9HS)jyEoq8j9X-U)VSXp%lep8 zU!+k?<1EBLPh7ZapE_}(1`&B}OZgCfX8pjz5-_3|YK?c7Jzv#4od4?wO&UcQ?B5f?S|gB2cj7jrGsnp8?ot zC&-slA*OR3ROZ2Zt{+=LY|A4^rWayRleDJKkL7*3I! z%F6M|<9AV`?}ON9y;c*arCw;Y<(A(ZLrA({)U&>n!Vm>vNr3#eg@rjJ##t37WJRE6 z;1xSI`GHlPu9%@P#BM^tC`8!Yfv5_Y!_MfZ=H2=eO7T`R+jv9mf;g(}{zwN4bsF9M zcxVXkN>GEaJ8mwIN^!I-=PG&b&nF!~2e8ZSrhzQZcvMSCo-bt%zq{jGT|Gcc9KLtD z3y8%AM)pMzUQ)N+E9SmCYWYaiZo~75|7Eu_0pEqx&1{>X@!6Nzc#>FhHdr^ndii9* z`57u{!AxX5=i$?f58v+a%yhik$~6B}c#8LpeYyM|i@l~Pk}AjB<0m!xyq>+wZ`UX} zKWE`tl)MOzM>KNZu%qHK2gt&qLalPxuL*UIy%P(TxZaNsms*NmMC_Y76GOdx5&xDtjZEg$@HcCmfhpUf)wo|2}AgkSUHAA zJ6BZPo);M`*As8|kXKokb#%69-t?LkUCaF_+k?A|_{?U|$bOO4P{xY+LobR9tGCt9 ztbMevzpGM*{r!2KEWX-J+hfN&w~r9bjIJ$gl_0*asH+tmFF54VL9I$2R*p(Lo*~87 zY(LH@BXfV5Z0+cfvxfGH*i`>^`1eni?_P$iW4ve1nxcOSzsS7K*aJu%>9jU0pyrZ! z4)sb0D}T_y>qM3Vt~&hjA%aqLq@qIEUa2A3z`XiWs(0=T{4$k1dCJ4SMT0?pdo|fME^ko~Zd+RDvD{{zhkIXFz=WIa>prgNiW_!};tF=_#UkE@$sbFHgBs}*96 z&bs1M1+Kv%&@-xCpJHo=k*f{V!>jjRNkt{;M;5pFAJV(t(LTXeRHvuavAMdES*?*5mi$l;Kal026lp611LA@s4CIg&r!Nq%H!9d|?@Q+*Ki;nhFgM zg~Q(}!26uZsT#@Mk5;zV-0Tzz!7sPzF3f8-eQnp}v)y?@ibN2l8U+lae$+286dI(# zO9{pzhEZ@4_F%ED8`f62r`d25?-QRp8Q`KS4KM1ypG`jjIJm=(X5&cXh{E478{jUc8kPwzud3%yD{~JMXL#OE=VPMQ?@3k+R zvG-1-#3#dqaoZxhJ3gTJGdLmu7nqL7%*(1NK%4%!H3R}XMjZT<6uM0j`)jA?f!*&) zYL2wL+UZ&`+M$cQ@OS0^hWgtv zVF|U+@3|5zSCz!`l{{p3-sx|MXBSIiptOA~aHal!cAcy$9i8DT^O^J5!7f{x^{SN3Vimd!G2&?S&XVm#B6!dCP|! z+sK*$;j|&F_6Wp+N0FtK2Bzx2Ye@4HoH|6|aU_vlK?2#>yqWnSs}W4VNnJD)m~5zZ;${uy|L~)K z6tvPQZ2TQHJKrHE$v{uwqVEGdrUx3;?s7(foKf~CSN_j$#EE%?t6vh2PwN-SF7R`3603;JBrG$*kG^|zZINu#VKRn`B}BJnh)j8jaRlE;bK0cR{Hiqh4}lRCZ#n>N$F@@Ukcs-_QeJG?h1cg zZeJ)a!vRGiHbFeHRhGjNuMgptPwXx&2Y)SO*(=?$@73OoscFc=fVU*~q6N_rWt?-^ zUz7UZmzAe}quWZ*r` z^ogu(C8qnYN$~eYm{r7h#4LMjDJiEz-u^E17?VsG4dX*&d|`TzcIH>hCK9sTezY$X zg?~Rhe?3FW3=rw%Awf}Y^o@-Mx6Ap31A^?z=5`Sk#N7`GNOr8q^Wn8QXo8_pWF&qu zW`CFV7da$cAq-_sv^6aPS&-U>yIFk?ragtB zkohkaibV(#d>3o^Rdx38q9uIW^j zF9w6L*?X@w*PK=F`&5;UjSV^bBNo8EgpK`hHwAc-g_; z?|dmnN;+lU5l?hfrldc%;ZQH+`3irZEKBx4BlK>3D}dyZg29WpjauoGp%_W>Cort)uYa~pJ|-foi%pc}HrEbQ z)1CnP!|YH~br#pl-%7D5W;zmeibcQvAgRSV@1I6$`}tDFL;!tB5fGI0t;Fv2LaIDo zSvYLT{>ast_2wyZFo|YDg}@Y6U{)fB;8+SPtUBEFwRx?{j01`d@x--`d4LW z^~b+w5i}?PirBg&Q(QtL%g8}OFubItkI<1JP7Q6X`jzv9*~j*1N;EQr~Th%ogrKuzY6s z_V=Oc5Gnp7ei{cs;q7TevCR;*bwdwV600^R+ZVt`t8)#Gn+k5zmn)(;ofacIe8#y@KL7=a6e z*p1sst>JLVq7hwja|0g-zBO25I7a4^KMb_!E0_#GSYg|ck&u*R6?v>h3YSz@hpaQ1 z2?Ox$l6p3gMzvotk<$Pa3=_)Yg)ES{#Nl*7SEkjHB)*~jHm=(xxQ5kWTVR`dHq&gc zaWI*c6H(0PaCgp|`}Bv>$zvAWi6$9~NKgTO(39y?^W=4E! zGnz0gG96DFk$=dZ7>LYZQl@_XyXe57K)!Zcjraz1_ z7B06E|A$~i3H?B}LBy^Oj0zTmz9yrS?GYk0fc0T@Ph2(Q*Ax*U>+9<)zS{o%yU=uQ zc-W6RvID+dw+psz_*38w?~cFnC3>bn0p^u1q)i7R zS9+kU79IzER<6w@8&_tN+edBd z1@P~fnFag2RejI|_y+lSfQv{DpV)_JW?lIN|{_rXa z86}Z!jS;Q#9PtSX+0Cn-}uDE2ct)z~G{79y|5!tUK=ir#JB=^*v7qXum zFvq2GCFT*1uN2GtyV*nsejpnl0)v5p@E}XfC4sl5r+)$Aj2TU?6Z0oQaI*(8%`*Af zk^~_}Ln>X#VC9$5pgdrZHN_bCAhKIzy~SFY)LE3%M!P{AW-^{wooSrO-l*^&nu-iL zsdSA()c%;vnw`Ehy7;hx7P*$la0444(j(><0}4r~v#wM&gwXt?3AR3lX#S5@mZ(3ec?{k5G(owBWwPu3DO3Djw5VLx0@HY-sO1 zjNHPBAH>IGR0Bu!ei`hM^0l-RfT;j*?Jx>8DkIef}okcKq2B8qE_4G9a@W)W~O6J*L*A`810Q

mr**ziu%mSCV5ko$=fAM(0WD#&gLs9?zb zlI}$B)H_&=r0l*6nFxPs7h~}EL;`e&VCYUeI5NFS&CNh}o%p4IJ8+Zj?d>>27_c@@Gf1Ti8FdAA0CLiND)G5JKrHuo!r9gj_0*P?7R)UGY|T(?VQ)^ znaz~9o3HRn4!kwDu(sB7ZGN=G!|foO`<80AiPzy<8#B835)paM6ilcz&K1f@-)h7l zK3sKzPlf1aXI%76f4%oM#>O;+hJ*wbgB%WDTM#IQ>jy**JP(nJdr&fHUoxpb`8c?D zCQ+12@RHI7{*Y(ah{kmRr}I9 zhW^?;_nR!oa~e}?&?vUrGVARp#NUB73A8#BCH>|~LM4JwElB7XR9@mZZaI7`-()7z zAr=#ly~&_MnScT*S(7ln4|>0$U?3q23GKaJ7N1cB(>Ut(EWH<{?=s_3Sj^LlD}Wge zTr~Kz?JuZ*4n%+80Bk^AFm1a&yF=xdZ|NEmYZ^-h$NLY{q(Y89T9x7@IQ&IcB|1*ZFaq z4i)_IlJsmD?7FR$YjZneBEF!RROauQpM1mUd19B#i0m7H;{Ft9yZD$Ro#IfpbyBfv(3N9LFgjWYf4$@}A{*wbLDMze+nw>_2|JVr7O z=?glbq;E3S`6$OC@aJDl>;lf)?irL(#`$9jCw{GwIzP#!o6SePgI z!hcShaIg8)o6{BMpCg>lH4g#?Ihd-)d<#e~vA({MTUB$E*?uerz#D&yx-G8H3Wbp3r5AAAxI&2Vc+pY+v&j4NPs0;% zOHi5{9q(oTyH2nGzP?&tbo`$=C%j6-iU4N2WFbghsPc*LaS7trpq`l0&WG>*tsEhZ zcZ`{8S)lYucTVLjAu8J#SJT9DmW1w!IlIaexok~>Ne^QR{r=8$qX|uht6ME0C#RYOh}JM! zK)I>JrmCHVL=l^BT7&?4q&=9|IuY8PPBIc?580k7_n#y~Ah45aCM1T-F;5VZaeZ=* z>a!>)Fc~2&6Hrl$^MPfv8SVK@6evHi1Zw=+pXf4afQO2P~GyGBw)B5?73do{< zbq9;G9W#6<`4eSZfsjm=Qp@!ij^i7If+b)0+F}vLQK@x*rkVx?y4Sm3qG?lg=MISy zrl@(=X^F|@hG*CA`-o;*bMMnAmk7ivuCYr+&`5z>#Y99#svx0BP1e-d!mN;pMwATQ z$5V42jwUJQr7hN(b$|jTIXy=Y50^eeoVC~h(v<}Sf|JX2Db&m?Vmd%%JuQLHqj}L5 zyw-xm#-voJI}p)^h#=;1>bjB5`y01kH0>f8FzXSD7L$?&s(H^>&=(X3sEdlW)?1;o@OXis|hG(rn~mJ z6G&mpTve|tE)jnVK+vF|uli*Qb+lS*!pUs>O*Fdt@jBsaHIOWW@?P9s81TfY*O_+5 zT*C`nO_nD!mo&A02AA{6^gs??s=+K_m};mqYp68LhfpesFBHRj06`Jpm*F%4tZ3 zg$7I3If9ozsh0i0qy&DC4s@cKcRbXNa42>IP`GhOUe2l!-31VYY~6ezM++T;)}@Z} z3Nvv-+BqV(g;Gh=wRVTD-{&|gIfWA{A9s?QH!#;0Fo%8nEQsTlr_k%_>;_nejgdA|MV>4fl zCrFz9?>U(a6>utRBj~QNz7P*J?iYH@u#uK=I1Qk9u zWaD)&_rb7c(W8xoW5 z!IvOxXym7yJcrZdKUtr>YOx~4+<pu8trb}sF6=HeiyyLGZ7xRGrp5PtIVbbgvWk*f&D~tpq-b;sMWF)wvkXYu#MsK`NQHc zK|>S6B9up~Q;-w4mhv%Jj0wi;2 zvTbNin;pMtsxCWtRF}k(xpFN*bg&w3KeH|aE^lTb1syGV*W9hz}=TnF>;HB?f*6^(Os`L$gAQ|(-_WdKi5{@{mz zlfTvhG9@_OqpVnACf>ELT;P%;II4lMA>Ad9`xCruD;(J_5egCCB-ZW0l$`XAby!CF z0ST>CZ`gO*nplQ&hJa)DSh#Tb(Jn~DlS3jqx*Xp_EH|x9PO#tEb!3SLDrKg$Aj|a9 zFMRTRt<4VR=X7O)lu?2=Ly5`@NYbdaKi!|u5=V$-P$i@A67x(z)O;Wzmf4`0D6vq3 z&@G@vpSPeO%;V)wz>nNS!%$iyc|bU+C)2&DeDDN%psMAovfH$!H={7X>&+G%U}nKiw8-imeqJd7uqM;*ogx~2_P+?8j~AFX#!nQw2h;4sR;hBic9(>nime7o zt?;bqh7JqtsfF{VdN_B14Xz;L5H2qgxxbeY91f&JoC!nAi&vb;rw>(^0J6&m?t&R0 zMndrwOA|9kE9{yszyw^V1iXS7m;hSpbIA=`K<6mJ-BzNa;ARRKph^UBdaJ1ds%k8T zQD4X{CH5drd_{l>(UJdNH``^PA&hG`>Qj#1j&P>FymQ zvVci=eq2ySj@&*TF(`0#{2Nq5gGn3gI)+XdQYd@h!c6MqQnjsm+kNdH-=Q~vwwO7p z;RO+*9aGw~VBXGn-9#7eJ^={rGIU7b^fNy60_su4F1NqXwq|%_#Voa*Ko&pf%tCz0}O>|VRf0+ zp!B>jAoom>jr}wAx;`PO^AYIYLC!EB<0Sguo*b6qPnnEqP<^5^8kc8Y3XF%|fhV&< zei*$unCgm#Aal9!1Rdf_E7MqUk5;e$nLUUmhVv8~^-Z@N>J$cp)GHE|>`fIb+h#YU zcyzt3rO=H?AT?YixTN9;stBOyeo;!QM7#l5b?``1ju>jKL2_)8HJEkJC&W&@-$HMh zi_Rts<)2aGmnDuNsgvS#HT8yrTqZH26i?rO=*)gfa1X05WTT4mvLxo${Ro39fUIns z%=eKw2~}I8sYU(~m&)dO@+tbx&$&s1D$sc7%a^nERb_L{dN}M-MFh4|_3q zwPaGi>3wYGA^zpBbkBxDZjuqhsg%^#byztGHEAK0!je(mnB5_%mbfE0nhc%b7vRNg z#zH?tQ2_mc3}|^sJqUyGk?t3)pmK)8R?E&<*MEKeIhdbtC~sqzEG#Lw`z7fCiPhwu zo0!>xyx!;uoJ6M|%5sS$&EX+A>}yys2tDzT>he1^W9Ps(Qu}ABqLGR5Yl;1dg~)rv46N{ibZ=`HRgdFl zR#9-J4%SdU)pki`hn+GsU`s~zrHDDl78JNU-Hi~w^Vcu=Z_67f5Bdpy<{okoCtF1sWyU*^_Sq$c z_sQjlYj%h3a6cR&xE4UchjV{#$PWIo~Gf zAD}=J3p&WN*PlLOI`rbtb$Y0bPX-5SA9W{KT6ibw!_!k#fC*#V7nIrSzCknu2Qv*m zir#^DDAeh~wYrc~RBvR{OHvcJOlIcyW~Q4j_^cC2n)?1|P2WHL0js`+AxFVz)8qU+ zH2sDgWkyC$CqeAPXGdZc5j^Zl`!W3W17Y-a8EsrKvYuW*b9fNt1ytjRQsurc3puqv zV92**OeQI`#|2uht+6B@MAHl9C3&IC`=OzK`GdO3EmyaSIO&y z_eLNV{%4Oy23?s)ZDR{D-tG3PhDt56rOp0_6R<+r3;V5LAR_cn5dH74Kqn_C(dTy! zzMC~2A+2X8S)E0+LNNQRaiWi(m_l!5{-J~Z_dNg)>g-FX+(3S3DgX3jPBqn=5hRlV zUrDD>KO8Ki5>ATafh%_`lRvnLAM&XAcM$?19qb?4iX4y0#HN=R0EJLkNvDbUGl3$* zKP1LryDlh(^kVh63$}TQ@*fW4SHk)keP=r#nAGlXN!2`f(?kG_=%(<80W_pqps164 zSww0XHyEH@LVD$*Szth;{XTdakteCdt&DwLz~}w0Q?5a~6b^xTQKB4ENL)dbpcsiu zfO^*t$Y`89^8f7)|4aHIf(Del>PZ38@PJz3|D&=1`dEZlRCc8x4f_AZGJN<*|9qBT zB7)2R9IpSaDCq%MliVV40_@+!<=?M4(XT&6TjDvy=Ko1QRFVMy@FYso|398TWbr@W zHg_tj{(s*N;5WX-Kk&N4V4lGLh(rH+!yru%@JY(ZkOhKbiyD^@+mv9E)wzOeaQX6vdPAfAs4>GCrH4BE1%g@Z1Hlyq z*VVaw$sF$8TeN-2r7jsTc+hAPQJE>PcBH9(gqAhKj@&S(+k+xck zQ<;JrVe5-o)Qsu61uj{@)uaeH0xP#&njW*WZg}>&k_XffeWu?>`*Y6Ex1MB8wTI&? z>))&+d0wI=@jyki?-^bndUWEg{b8s^+gj*uMI^Mje7(Inv7c|znx7*YR)s~~oR~Jg zk`QQ`=*|Ln|KKI6OsmEh62Z+6@(y0gvFI z&4nwIEQ3}1U3PeoZ9izE)dXqHb)ln>KMf^xbJ2Dj+G4~ZNvK>LwtPOMd8x39@x zr!99Y0@3ZJi>>+S>dRbHAk0iR#4*10x`Vs|BlP|@uH~92(9^kAs@-V{5(v>inN8FO zP$D@9aBkbT*%mpK?IBwt%tUT-_Nb4h<^l6r%WiO_huvm|+LwZiOk{5NxG6o!IfCqk zKBK(F!z(8|z5Jfxhs2bnXRAbH*jrS+CFkbgw#oihAdlkyei>_9@vkks9`+PPeXQ_z zeeZ%&g6(vdJ&|_p>4VuJ#mK{#a3*aX70o;FHIEUq*i^Ufjqg-5+ew!%H9C_I!Itux znp=cCT3=bBoB;GDe9$u zLifH(Y`9ug(0}yeJ6{XYIHQRcL5OHUBYRB64@Z#x4LxKjg;j---ndoq^ z7+c8+Tt%N1pZ@&GWcpl{WkcEWz?6`56f4ZQv#}?t-t*23M z7gHwJW`?iVesZybyQ0<5QS{5uAhnL!wNi1aF>w$rS?EbE% z6E(7)7S9eRilR?E+@edWsJm?ah)*u!?^7~*>C4IS@g`=AAllX39UgV5+Jd#QjNV*dH6ihuX@Vx1;OdU7x?q zts`x`&-MFB`fs%wFqoTd?6It{zNcmXwtjtK*UO@rH!&-X+3+Y~(8gz+kKW6VzOzX> zD6Xt%Pc1AL)fB{X3?FEB7oBzDW;^1xZ+9?uZ3)-mS$qg@84j#^ewn#sAuFhlAG46Z zb-P>wM`yj95U%gPt*DtR=b7!NrT)3bu~F7-jl{?6%{^S=Ic1enWsU__OIy_cEx{qj ztI2kL(dD*rLz(1K0(TrDeYVKv66v(F&Y#28p<|+dX9lIf@#^Yo7K@O}NfWAw$CYy- zIwW<(u)Hfn+Ete$Ng-iHe`!lmq zuCF}$3e^~u*{nX}`=@VoJ$f5?bMmjTb$BJHSw-MlPhGzJ5;^BEcW-j!`~lq!a@OQz zC~%RgO6Qptt|L&q9dUa0P5fAbxC+NB4G0tM_n-|S`;jJ;*XdyoOF zFS-$~SHY+hltaJzaFwvq-Pg~KUwqWpQt0Co-z(AmD0^nwa2N28dKf{RO_kduxH}$% ze+`Fvo{bY)o?4?3abkjlqDcw(RcFVcVT+>X_B;7@_XXhoijU-d8N2H@+c*>)z(68h zZM+pe3fjcq_EihiX9$KVc)P^a!_OmF?GHw_7<~y?Ly5$fQXI>LMir4twoh72=Q8QI z#A0+cJ-%INKft7z-c_%Ivc6xtu6HTi+zJ?CIbwk&EC2lcsTeAOUh`a|ZvWNs-N^NT z@VtbSNTG44N?F86P6HX1WQblwy$*eGuFO3g0ZDRjgx2Fn%ai2b_j`^wR{a5E(C2aM zD2`NFb*r-W*ZD{muAnJPsbqe?@NOJa+>}>r-=FQN$Aey%EV8Ti{J@jd>n)a8A(^(& z3c>HCD*(+9Ce9`HA(w7DPaLIO9~m(Uk2<}w$7#dyeuZVe@axA$_Z1KO_Jh_Zu3m5~ zTJaljtXwod2p8E*r z={;WX-w-Z(8iXNH8zM8bz}j?W!zG1E3S>W_eKhql$WaTIEZ&wZVw z(CzT(l(F_`_7m=chxME^1pqe4*4=u#YP}H0bW&_oDA#seC?KFVw^2Mt$^PT{Yyfi+ zt=Dmby!N!rks#CJXSMg8QmJA((8ct(qG2YKwYBw4WMIHdiGF()5T0v0y?tJaFH4Q!55r_nZ%R+~(#RMr3_SZ|2AE`58=D zUhA}EG@Dn#9a#>yqCYTRxwJ=e6A4w3b-Y9+E{V(XWd{b){(LGVnYlu;X5YOq2l}82 z@YAlFKxO{jus$qx>A!!n$CoFWAa5w&Prr`|ezR$8q@bZQZcpaK6>&LRucfb)@t9F? z%+13Sv0P^<_0qpK6gI6Hz*FA47mvkwlg9kD_#9zEUup63H%xxF!~7}|-hMgjiPbvF zJ7BQ-tUqrOuSpGHz{y^uIL#UMZol~Grq&%?FF1w0Vrn;so6bq(w!ba|bK}M)r-oN6 zT_fis^tTa5SK`ZG1hl}QJ>U9Puu5S6Jc|RWB6@eUXG_UW(+6xw_G+$(VI*O7YF24T zE%``4uH9XbQ`qhaC#NpquN}@=67nLsxmWCP99P_*BR)!{KK=+-_+_&zF>{Wpt&%^6 zbv)7Th3ex5J_I!?e zQnlW>gZwPczwK_+eR7~tw42Zp?76v{@HphF>qX6X)HNFdecAh4GHIIcjy}#@P@EFs zICJ@Vl5kSe+-of;tj1QD%{$ZM#n=;Ej*MMepuZ1aD0JGxaP(y-u$6*dS^$kf-TTW!7aSd?!kGQ-S1yo{+rRjZ+xFSSOJ$y}~&*$m$}+Bqbc z3zlDZzUfC;m2luk2c-48^&~c2=nDoNTM(JsQ};DQN#ov6(m4FiVB9#vga5JxbK?3T z119G|FenNs7XtMQ*M;g=e%6hPkmOYCkzTNoi&jRFrnP}y=ZB#7*rcRBnTZLx1qBhXkjg5}j|G@`;dh4nB7r2KBgXh$jkj@R~XIwWP z_^MPdAGBombM-ol!Vsp6VB{1WKY;LldqnH>rX{c(rVim+`c&(U)i(05G|iNK z66Do^_wb!BU&|7UuDG{ITWGw!Z~s0D2l{mOj3jT?{;oF_z2T82ne&hb2@VcHYp|HB z*#623CFJqgrwk!;O;hsBkxoqF#2?T3xVyINR$d*P zxBXU9V&!-)(t7#mcYUz>=wHW`#@nn}a(BSCq|FA6*7~W*>Iv=r=aFbE#rbuky)q z-B~`JjC|4jC5YMays9{Lg&`LmbENTWaYBGm!p5D{)jE78SMs+V2uR0OI zu*$YGF=dCXnf+S#n!KCYl=V3XvCiKPG43xG9So)Urx>en7*3S~iu~ebx9ZFN%03!dHELl;rV0>)O)@Ji_+FTvPsU2f`q_>xD7djX3l95oJ zFfTpvKpmVniw7&}cWCABJ=eqJ(%6ysFYp0gxq394CBTE=6j6|CfQ+FCf->-1u)UNj zn4`zN0)D>Es2^?uBGvK4b$lM8Vfy%mq4#H;o;~-u(J6925-Ik5`ql1;{QP5+e|gVZ z8}8*#j(t_*-GmGy_H^0waYC(fC09E8X3Oi}};9k&&&HFH)P9UUtY2(w!A~-<$YhaWzqw z*9qTbF8293Hwiy@MmZhn`;M`L+gBd?UG;eD#N7q+}yhygS>-6FNfS* zFnS>&oosrC5@4o6U5t4V!WhzcXP6$*-BR_dePlY z@b}@?gCb;O(=z)R?m_?b9~3ZpNsTd4#ceA0`j#zoWqBIJG$9&R{qY-UFSftu6TpFU;m% z$m*a$(y<#ctRptpgZJrQHYdNhpVCiN%@x_#HJ4kR>bcc+dLsYWY7aX`Uuf_^)N8fY z>2Zg14$!)-MSue}>0%Cs_~6CcufS{h@FUukWZB<{Y`Mfev^9-a70*W+e$E~IO{&pI zH9X0}cO9dzjbYMEwqa=vV*&c5iKr79u8)svZ2@J1wsh_jIh)aCt9Qm`;qxg^c>3Ae zg^I5yT0hJLODdcQuZ81&WW!yiG#k{V+tiyJG-JqIUBPzQ9tDR?{g$ZLhRyJiZF_e_ zAZ2Q94@KgKCz~30qZH60xELO8y}DsbySs)E)<6(cOMMU{70dCGJsa-2sZ^Fp{Tn6O zYIo|Mlc;k^o|_KK3c2Cvg+zi-Lfxya(^i4Bw^~#<0^Lgxsi-l7WsS7Kg1*Lzo^R>a zzsq{S)=as!9wNjWSf}APuh!jAfFf70@7~AbEm{i?5lyC1s zD|^EU@rCjCSCV^G_?QhlwW)_oRt`uujKOl^V;;;Oxn1W`m0Cdl4);eX1*b(C-43%` zA15rSc7y5ZCc=`u_Tl27?MK+pkK1&aB<;dGKV3JM(U~2x4L=v-g97Q08%SgjbH!18l#cJxKt@<-^Mc^#j5 z%wcGGG-~~l9OKk{2bxrnIFhivNvvybfM;Mn>o1R&`o5`yEJMHC8BnvXy8yum`LY^Z zz*fRld3n!*adBA5pi@aH8`xFEF#Fq5XNrK$`1$L>yPvovO2zs>KIB^5Bl7S=bVrl@ zbD9dQVj+2JO`M)djuE0>^#SteJpXU!4*j8UU*J3D=oiE{Iuh&>-=02e-|8U`0*pkvNkg?y5{1LaH zfF-*)$GT8i2#?W!p>(pK=-tDEw>buM>6zZu&@0#mT~kl9Y>$PQ87N|0&*FT@DGrVI z*t<}faV~RvMBYDrnRkfO^mL>fBFs&L~;F{G=P@gS;rU>D;W%5f9(axudILVlPT0#XXcJwd3D#BP8i0S#q<}R{YHDhV1z-i9 z50ArHT!}v<)&AI>PT+J)1HTas_bcSDE|CG-S zfmrg=QD*boaB*=n*(^fvA38453vc)`Gl$iJ*F#BKbb0-vF zT0MUFMf#s^8yqIkW#RwRtFo~{r~*&LA9DO+6B5oT#3#2yvd1$WKlwora2o2YuWnFY zoD#n!7Edfain5lw%yB+KhxFy`avmL=Q;LcLHl1Na2OM4Gpo@hj z6TXc=lTWFzmsYw;1%W#rqFegaJ%QY!*ZXC+oR)G%or$+c z9vvgh2P5o{qY4=_?;gp!vk;C8oM-lHEvu5p^JjJvghybjP(YJ1#b%QoESE$73i8jN zbG|9bWYlYhGxxg!rd#|EXO4`Cv=L=zub77CvL9Q*cvwfa_}lgqX_ix-9m#m{=^hvB z6*q$iNMIgv+lL&NcWy|MP`=-W_M}}8sziD6pU*siuAJJx6@M>Q6ilu;w(g=S&q8Im z-Ly1|47^um7N*D>+-`WDbN(^)O!gyuh36rQ7CC`t2IKm*4r7QoOcy9PiNWZw~gdJe)&>u$q$GX1(P^M0LIQKYwKW^!A6x*lQ^azC3s@y4D0 zbnhHQgMU%k9Q`cO_@8#1f-4vB@?+2T#6ffSZ*i{I2>Q^3OV`2|N;avuV>sct7HQ{M z#J&V1#)9*u>oR4@YeL-3>m>wNiK?|fT}W=*A3&H7@m+Rj6iF^A<1imDOA?q?<#t60 z!*WV!Jmu3|8?$IME7wu>&A%`+`&#j|U(UPFHcJbR?1W;P6S)<&iO+CEIL%&87T)6z zbgfqvGcMX|GG-GpTgm~&h)b9!*@D^xF`RcAYUmFy3~aYJ2Y98rVA?Ilyk@YGmYSiaPb+J`n%xI?^#A!wr%tw|#Ucd*22xI1|+lK%h zG%HFhh8bNWm=x>XJR&RKpK|zfkPOnl^SUYNb!P6NQOJWgTLC^vh!1A=hB~?SxtGZ| zp87NsaO*nO%9vk~lRq{hH(MAzO6RGKG5VZr@#3Iyq*3PpZU1WUajnr>quNJKr@?{M z{y;Sx`?M1awxOOzqdGh(Hupx`n+iN@RL9B;X|!4dQHPJyK|1qvLn<>vRx>jrBIBCe z*?Kt$VH(!L+<$56AUYK2sW*|k zYRbHR|7>^TJMC<;y*H)UYG851nn_{4$Q1IuFNA+Fvsjr{;BtH56SQV{N<7fcop1p; z-|@CN3ys?aZK%br?}K`S9@}RzEYW26J?ry^0pqcR9^?J9U?%6~FSE5%$a=rZ+-+RW zm)vpY3e{o<7;6+hCbRrNVB|y}eXw*O$gW^@7ya_c1h?(e@m0lQFfmV7OY|a!QVl_m zz%BcD+l4GuVCOA%HmY7l_1f1X6w;UL38+CC zxlvpvYLU;wGAHFtm}C z)uSO7BjP*8WkG8)O@=|-VikQ-YchhB%vgcRKQryW@CAa6Z#cdwdJ|DUM~ZPOKCiW( zO0!aJuYZD9Co>E?W`y(V`%ii@@sEmGWC&0xhF)wx`ny2=dTfn6Gbo($SjldvM+RBp^v4KQ_9kqgnQB~uz->H#l%CcJ)dDgPrd2qIUOr=Lq;fZbGji^ z9SBjT-@fUEaQ#K$>4|G!r6hkkWS29SmQB&;4iSpspvC8hWCLpL*Ro|rdHyTA6jzIF zh%!>EKsq$Vxt(# zP-q?4b-D5_&&_Fi&PT&nB>k}4k5G=P8vJmJ=d1m2kNn%Jgk2>MLu;aL)7v z{0dmKH){AdMB@mR8M@luw4GTDd#E4m;Gc$4vp}|x3oSF$uEHH21vO&6l*fALTVzWa zTdsa+g*^gGKe^-HvB#0@U2C$KR0T zv7i%xlI-bt#>GUV)2^r4Y?rnVv|MYo#vN@<3^yVk*o0VpF(m85VlXa$EYp4fAJ7#U z)>Zh7Zos2JR%X#&f~h@;rP7h)*?L$)o}-a41j(4V$0|$ZE89J4*;*gBZ?)< z;fpC*BNhApeBQu4z1dS%)>?@3e26p8|w5ro4vgrKdPXJMwM@TG#` zQ>${K$sVg&<)g(ydQ}c4?x>Y`vf08Lol_Re1KRMYZvqGNGfr!!vtJQXOFgLLXcTtq zmSlCiyCKe~TM?B9_mu2pD7wpiss)vLquSGWSk+X?0F(aGw7KEO`OzWOPqdaShHUL> zg2o=(5OxPz?V_L~qUot)d76?@178o2?0KCoBC&aFpVWPm*nC-4d|8dfg4g@wZ zX|-9ig~&~*bn4eO5k_?Thd-RDlK*n{<@tmJht}0U+aiPU1oQ^D%F!WDNunbWTy~kx z57#+mwAnW{Oy%{Q(;Z6*JG{uc9=rR*y{B1zB8^~}nX4TOfyH8w4zdjTx<9cptlK|3 zIv|i@z1e~a$ynK+aC|3*O?lOGw+(Md0MA0fG`TKtUdKEDVy;^4R8o66U)BO?CY?Jr z7+Rk>@w561)3{X7WDF&yt$lAL1f=C2@9%W zin{gh;85IMTO5i@p#&>ZoZ?WtScAK}6mN0Y;_mJg4G`R23I&Q=vH$5S-@Wg>cjlWt z6DE`7BsrYyz1H*m*4lraO7P|O``GqheKwuI?tA5qJ$nTshmR|bS6;DKr$?!*frj5r z&7nr2KY;C7(UJE-mqOLg*35x?$;f-~mgv05e`=?Mc~ArG%x^KwwK2e}0%hiyr3vAn zjR{+WI_9jN-tbfu4%$^-;mnGWr?YNEU<9Ui`f=IT5b7{#0lj2k_XTT3ke z1#mE3%ncF<6GTfRVX|fVT@dN}i{(FC{5LBDk*RN9xqREHK@EwV z14D!>4)+I9=)wG)hMdG=$*!=R2J@B&M^b)Lxq*QS|04?W%{1IyM})Nvzgm_l{EHTf1%&LyAGmHeH`xkAwE z`Qn?SiANBl1Es|VKij^nBex0gGmb>lX%bhn6c=+5X_`ts&W3)p>wxOReudd{^T z;gAt@$hgNm1-D{}-aMLG5e|hr;$p)Y7J5ah|KzlM^L5OIt>)?KfKT!oeJ0k&=`bni zL;AArK`6qkx}kTxTv2v_akttDnaSj@-Ql4~o~w;%)Qmuu4XI$0w_q1(5~ZBf_iB@S z@d7w3+Dxow(1aBXCod}%H-8VZ2Nj(gO2~&Uhsii>vX6obh)$)!on6`(une%0NRItBZg}O!CFh^F8a=$B}n?5=emw}*>u za2><$eTHx5c8(vfnKaDQ_>tCJ_J&eJV<^Dv9-`)B2D_ypQV1ymcAmok$!7a&E}x42 zJ)U7F3Z|8~i;m=M3xx6xiyBNp33irK@^=tA!kn*K-%=ZiE!MwhQ7?1l;t}FW$LE$5 zKiL~)V)&@%7A3F^o0zj{Cqe-wtV99P&xX|_!BOLe+c-i|@7bH$=`^Ar8S%r;HFd=ln^Q=-!DlhqN zee%4rcYjzFR#d+D;cWPU5SvHtggc{?>qHC!vRnc}C?&N!Uh-~uV#vE|ZhhW_H?w|5 zJgts*$g_600Uo{VXm?!%Q72saMCnB(r`V-qd*UDlcKqIui}miF=&XP;yK4nJ8#JX! zVV8#}&2lQXCA#vnSx6^GnlcQ&-UV1b@r0x##a=S$StYTJvR9a59;_iuf5M`%sRBnM zGP;ME|EAER5<`gM7P}vg>j+|c!K!PKYq*a;hXsOhQj4WHzwq9A_pmT=5oooXvsk6o z^?m2!X~O1rOEmHM?ntJACCF@VhtD5d)ngSc&8{@-;lzvqiehK4J5PN|5#+Z-I>*@) z7)WY%QqDb>Qk6eYk4|Jk49YNG(}g=!C7l~;4OrF%MWgdv%(R;!G3RNYW#o-%K?2Ba zdW7Jey&e|coEv1Tk__P8UWEeDAd#9dj5-H$)7pzr>}(4FP&Hx6Y>6`E2I^FAfn^%9 zCzJU(!h>I--k9qGJ9!(u+Re!3Xu+2a;us&<5TbihGw6JvsWlfaC4Y}}$530sr4RsT zTi;+l`}$A4AwE}~)Y7*Z3}}B#7foC#_ZYw8{?}$ z(nY;%$1_&Up@2}CZH@THcKg-xzDgyYsq;7%7do!TT0|r-%=dahgj*IM1x)2Sd{Kod z5H-(sOYFLGGe5o>gKIzPp$Sdgh(RVbsvgRC99s|4OflPzNON?#Fgs|M(2TAG>=xf{j6 zEg{t*7wTT!t*zc$zaW+(6jxn*JA3cShx*DD9+x{{j-3CsO; z8A+t~a__X}jd^zYUgex`wX4GET&oTJ;)d~BJ{C=&{p0tFg>Kq@IC~rrSr|?s4K}*3 zPHmxjHX@Hc#b*MJZ=)D}k)rij@x_`IB8nO6v`nxVo2B+HKoj-dv|A^EJ7F+Z&ajeyzCn zv|ewX3xz8zHY;Vc@YlB=?LN?A`aNwrYVT3cTAhc&3R4lRK%Ae5pPz(&2#2&)Al;li z?$#9MMisXYtnM}_1rUmAE`2x)06Q3+pKju-c?oRx+qSD15`wGGkp-4|sY-(hclv~$ zCPX{2U1=A(34E~AK@v$1c;6IBTuoEa`dGuD(H zlK6&yX~N)jbc?mcI%pngUc0iG{$5|dZ9bo4@1{@~D!BIPxIFOgXu8OIW}_@Wfk_d4AIHbvZET`;2*~Cv%7g9K|Ef!GF%6A;~+^%rxLlinUbZ>})+svtk#JHCjn7kuYWM5(l~Z+gIe z;)(i4Y1d}e+dtww{LO={JxgT#Qy29%p@#{N69y&-bp41U_=v>l8A^jCjsXz@72OEt zsNis{=_!Qil(1CMQv0e)BBd)^E*yc#ux^o={c*m(uI)BnhQDrMQG4BGNlsK-ofXGd z=l?ebe9g-g@tY=vQGr02PdQ7;3DqmRsjxq7k~zET%&|8U$m@i$Y9@5JAz#~JFRu!pRC0v$0%}Z$^EWw7iUDFxm`n9lEI~TxT>@8f&Wi8Uyvb z7FPOqaxZnifF(Ls^A+S?o`}oeK5LBkUzRuG@da+ zaYW!h0WG}i!$wj0ZX_Jfe{@u_xT3I+&qEi0TH*J825F??S ziulJ8Pr+>v!|C0mOv=5m-4OVGbhvqf_{>?4;0yAWPdgJL#Cyiwk)k}>VCzIVE$eM& z-9Q(@=mbgpL0V_%>{0fP#ffdC6FC~%Z0c^}dtors?2pAccluI%mF;7h38rfy(Neb{ zyhVy(#K6XUh~wtXCT6Fl&%;8&*8OJY#JsoGfsU}rmQ=_akxg>{D^#%TNHSz4h2Gq8 z;Q=A}6iw{eA7Qr(&X3J?A*r1S!QF^GX(xpGm1Twd@g`#cZ#OXPou~@+HF{^W)Splhs7=Ks3W}00Zu`d}=Asa9G2-V_7*4f9gvm!YCxP&qYEMHFORULa<^?`DC4EWZ9ni z&1W{;`$hQ)3&w8l3Jg9(nUer#zZ}Gnth^cQdbAHG2)BKR*xiSGL9c#ys zFiO_m_jot38E_-`G8^yLSqvYvtt(gYuowlT+nBB=UNio@Z9?*qdsCz+(+TlMuyeaV zK5E_cVFNL~n0PJRq`Y()<&I;4Gxl$xQREBQM-ANad3%g~-)m zzD(@?gs5Fz=Hv5iFUX=Usi;zxIEO2TYXcvFGrfnfLNct0$inlJkz!GbS+B8RJxB|$ z4-qRFeWpTds?UO@{1-+=quyvh3C?SJ*|bZW(%~mFSJ%tA%iOD&b&8+^+s=nnc$PT3 z@C1kz!WE_@Jrq&(Ti~%(PoDCR1n!&&!}I>;oq$wFF1_F`Lz+EL4)V__0&&^JvR`SG zs^!Ic^Qyd;Q*IA79kDQTgW$|ZcAL^qz0^2|4ck1E9~y~$<+h~YQ~qo<5!5L8jddE^ zbx|HleeSe9ME8edC+GkLeNQju!bv09h<7;4tQN`z+uSu*gUE?M$tc~_eJ_kviy9ta zo)*B)xWA=5fGNvxt_}odzg)gsZWB?CpKoJ(qzs1pxCZYT%7N^y!E1@pzE9Fk|9br* zwy2gU@VxdU`-;1SxWKO51z*(UcD-ah*GqM&a{120NDN(yV?gjg^D~C!{~E!#$d|a< zB($2uWx$#KpX2oZ_r>)p1kzt-fBavEZYKe9k;zCdrPf3;g#SH^gMrf$f@|VZ^S>V> zL*PJ0u2<|glR1x`j|MWoWObUVAm-LF){Fx8Cc`{GHo_ya| zy%?wa{HP|$p3I#7$5*WqLhg+@^b_auy4)unc{V6Cs>l|#!#5jU`4Y!i$-sE@{B$q+ ze!6v6RxZ0W#xkk9dv9pzU1##^MTRpVPn1QvXv%HcPAF=ukWpLv$o}fBT!JhCyDq^b z0pZYmTHLE*3D%?&&O;IR4GR|4`urU)0XKBr7L%g>&H3Uw3o8X1-RQiU{A87>V2u*( zZ`cPDq9>^yKYePxaept7w^ExcymJyzExf$4x*U?idq#N9ySfiqQVVn^EHP=ZJuf>p zmrhQ$)i0Dyy)C#=qE^hgtr=CzHsE=M;%nCGoB5Qme=Ik3BaX0_Lhn;|f#cJ0APjU* z!(%t7mwKxF=s}0t+F&u+%g*aH=Xqf_{*!!vt)r{pNK!N_uC>)ImCdZ|#beLJPB<$P z<1n5^!L`jG&H#@6W-s5tOJ1h+MS`6yMFeXbw^{?wDm%#M;#~0i&j0<15Ow+e~|M@d(|MfQ~4}tGi z2YX&}bX;jV%QN9ZqeC%MmF1JMbEspWh4;`DY+#9>Qb?N$m-GFaT>De@rN{PwPkbnb zo#HH{odfE;jT~fo3QKZ6_{B1kEsC%Ic;yybi-LI@Sg3R&0qLZWegr?RL`CAzzP&nF zPO1E8sEAH{Q}E;yw3XBG=hxw6y=9NNkk21(%DV$}xvU3D-A?DW9TDFnHgP7ME{9jg z!h-t2C4vqnQ-j$vd2Mdbc`SrpDnJr5xfnT$y2OIY^VI#-n7srrf#`4oQrEAJUwxj- zYyaf)cy+3t`$1gD#TM;Ygi}8lo?cc=9uB`pG^Iw=@iyuKIB_vM7qeN0I_`lWvq!Jn z({DRtY3ziQB3#N#_E+4zD`JSK$(bl|6s|G$G?F6q&7<)e4>rj~6YpyP+t6N2*gP0nEXn z91hFFmGW&J&Tm}u$-b$Olw|*RcV0en86lP%8 zor?8+w85OMFpL1M72AQk3;1YUL`_sLvj09C@FE+SVYEN7xUDVSU@yn!BSHIp{cgjJ z^0vB`s$>5lp%s;Xqv6~;>{^i*@e=ri-#l-5xp2MJrb=T-UaD7xs8RY+Wy%JS@ia;_ zgI^Ghu&lcX{=PkPIgw>D%g6l`{$y!TQATt-wK8lVoqRsnXGvTNXIBb7C&dt6s zd~>QR=`W|PoZAsPPv$?3OEroF=}pDaU=3dSzgDdB+9mJd;E-(wsd-5N6NT9LAX*?# zmaOO*ro80`6N@69?|DgDk86oi{~o6sCOw0sC>3wM%Oo`tjCefT-7_X~e)o9-kghzx z-#me+jua?XPwW1zNwG?LWVX!|vZrIGNxeMTlq|BG=wzk?m3--Xev&~74 zIA@SvFB!kxN-hoT#|WYD{=*$4f!hpzsqH2u;pD_`Uw(8h5jCx9w+vYo$Rz)yYZOhB z!2a8CFX7Ay$s6oLQ$!_!E%AZP*oQvFiicY1{<$1xQeL|>7bNjR`btb={nq^yTPm7N z2Pv`|SL6@AF+$*v99r=`Cl=KK1?CxGbM%r}1Cc`c)ZLMvMThgYQ-*#bMzqpDxFs_f zzM;~gq7lcCpOVl?duLLCAGCaGlW1k@0zsEA#e18n9)|{(Sa;5dS+tl5*qC2Or|@md ze_m~jn`yGz_U)p$^=T-Q?Qvb%V+^D_ZSq`I!Q~9(>t*dm6LmXTE83sTPb-NplI>n9 zMBEcFpn;0N{Dib2YSNF2cpL!_lT0dS-o@X>ikAiP3Tlc`tE~OidyIDElV69Y_Lo$c z!>1FY*3T|{s$xdZ#OaNxS!C>`Grv|^c?X{sb&936{v&*fs3=1Jj@%EppxH=n8HXfh z@bV1*Yq2v>!nmEl?8bFjX--hbtuqf0@LgC{B1@3zW>~B#f{G#@-owEMCrnm&TSN z+|SnVE;>-KsUnQN$BCiVA)vZ)W=lQ-TD?)lfHRNI_{W2Ng8OR%)-3rgBOzob;mct| z*&+BK0qakSWC{l?wP=*~0@g!sV;c+Kr>FcCN)}X!EqV1?b*jX~qd%zeaw*aSvDEVCbX*F{TZ4Y`=i;22 zMm{nm!8yxKCuo!#f2M7gP9io1|^Nu zG(IXhL+D+EkXwZ$(DAHb`9UlHvt1f*1Ah8c;`i7^L^iweJ%UK21afa$@iwg4@6sPj ztDi>g4!k;VK?19PF${D9KbQ=44Dm(%U*`a>lm88WdVUYkf_f}(OX2x3S7)YuSmn>b ztp^#*uJQJD&pIkuF7`q;5?Oc&PisWHDD;7*Tp^q6(=`ev>O*?%MF=kEiG=6|Bd?RI z8wVTWbFe5Tuh?>TetwB(Hn+7>|*+Ix*h*T+IstU6Ic<=F-A3doIT z<*n6ZDLkdgh2Y0`xFJZ!sG%>V8@{n@{0xdD-gu4|mf^uA60oJRd0~Q1bdwlK1oKN7 zFjmm2pk#S6BN4t7b=>)~()dHY%@(Z{XcLESRAx0)t!ZM=@|8pS%Yb>r&55XyXzjSk z^*)4cvf6xXbQp>9xfUlOiBG{}eT-%s<)Y=|VtRzVda*sfT)!Zg(wQ-sa>O}h!2dr7 z04xEib{&( zP4j&Naox~gXZob;nfdNvEXWy#qLJNVzV~9 zdj#q9owT65W2_cxdd^Ebaoo!-h|_NT)C{fjp+`bS`U zYQL@31+LrYOqbql`glZf*HQacE4%qUMp`LFc>x?kXOdqSa+ekYdmMguIrb7JW3fgX ziP^|Q|IILSCf4%_7clKU`(hXr5?SW%n~^E55=X=Ra|rxBKBh2-<;sCt@((ubLVu4% zYPr>SKW8W{X-^T#l>A2#H>p#Js~7k! zJ+VR$J^7G(K?rFzm7;YOFk*w+k&sec0gfeYIfN z4b(IJUD{B4{i_L&V__W9e(|Gv}wP+*BYLXi}H7uw}UkK|yvtwT(zU~dUH zk@3~;pU;P)&u?G#ajFx-iDWJX2}!o5v^5Ntfc?mhljhmSvuh=VG3{|aFzGfUP>MJX zaCx2G2v4O>CyQ)l7lLC0DZod+oMp-KHd>rI^GXvvr^YEoLz$laQV)L3iWYabRRoQR z`&rg{&6m`>ZxY4s36HszyPWLVz7O+#yZa4XE?SKRz4JzxEnCe7AB)7>u6N$3YuG+~ z?ur69&GohIMzfHFEafN1Z&O8;rh0|?8^BUuqPuJ09PIZX{2gbiF|;0Rc(<~I&*bYp z8{&TCk)y+yA_5WNv*U&`3-xcNxOL9Swe_x^nkKJhb2f1gM(fB?`v^XjVAWQ4-EXl! zSpAGlVJP1pL?iCpUt~7a4iU9GMl16Tt^deQATqELE1K~=?gu!}L<;;#Iv1%zuWb;a z=;Egh9j9YKgrCCWz3*?_`G$*6EMKdl*@|5tSNUEyPyE}l)WY)Y>6k@3;bp1DPwhr( z`)e){GhwdAbq(+&0geT2Dr290%6q`;t4k5n2fo?kc?9oEr*BaF&UBLa0`$BJ6?COe zh<}OEN4S0mav{`#Bc0Kj&bEf1^M-y!WEx(hK-Nwdb$HVyp z<*rB9(FQicyV+15Zq2`0S9^=D%0XNv>e5&0&gT&B!U~zY^ZEqEql;9-523s+B}PyD zfngE5t*Q?TyYw8E=zj0PCrr2dzZJ%it-}pYIK75FFq!VqN|kIUI_gbdWAeSw26*j zQr(?EA!r{6KKPJ6RVd$k(W@VSFh|u-=piZ`oeujJ$KaLT{oN}FUnE<|q#c|y2DoI`-)}F#>iUC`y6M32trn&%r{Oy_}8>@)bv)*+4XA`=Tazi(uZ27%g(37tRK_Q+daTDRXCc&W zZ=Zqz>Bjpm)aC;;{ayIhhe51u>+MzenrnU=-gmcztg+LDvrHq|O^HyHqoVM&%AFhV zv1Yc~$CIq-5Jupl;qy_+!`G;qii9;D3|uPt53xtxeoa&vCZ zti7dBF*#w4_3MwT@8fNcw#NZFbHM+@aH8+J>NG2%&0|?RZM-+?W|&xMn&yj6Pelc%Fqw!WuFdjYYJy(Hz&#@JEH#Ly z-r>TM#eiGjiX4`}Z%y_Cs*17LUH0F(OnPs(hvrbujT?61$SSw~9vv{xI z2l|x3j&-VhK0~a0KxmoTvC19_MP_(|<6b3ZKDaYoSe{E%!IE;ee)RU8q1~StCGNg2 z4n6CMtOB2)_9j>l-{mwVgZVNpUt&0i2L&GKkWCZU3CMaUMPXVUr>&Z|%n?H}jW$#U za^RexuW+Ps`Tc$~pr!MfQs%y$S+2Y1b*k&M-%LYIp4B0=sGX3{igy9JeI3VOV`5B!PCTrX-WZkp=E8Fl;EyHs-F-gz zBYd|`B}edvp)*+ma zIrsm?V#d%(MbIdiM3IPZ+en{A;)}dgES(9re9BX1MreZTU2O7-(XET%rxQi@UqksI zix>@$HBoK1VT7=a=^2)DhrIoKnme7qqo;=br^WrOwVu+M$9%?QI1&Nh+jP98fzV~k z9|?sYa10WJR0VQ`s?>0TMYs5n4zcFYU@c4eIfUJC7(+SqWNr`zu)h7R9}3d*WMezSm4e+XOlkqfz0Dq2iD$ffiDzKMyG#5(c84zK2q zrR)d>r(b?MWdRUv-itkGY7!C0A;-N+(Rf3#%BJ;RZd580fgv)+XQjp3qQYS{(cG#| zDk+~o{DsQkEGE8<9Hg)yP?YF^?NCn3`LVPf%>5<1C3PS*mT>hGlYM?Rkz$xCfc{wl z{8sJ!hW*89E+d&UMe7zP{^mk5XKXd`A9w*z*Jb&YYyKHv{%rCYs1WJ=b6(mH$=yGb zP5wiJ9Z-YRjrdYB^wW9Fv+vgQUb_d6O8*z!R~YC6%7fV%VzH$2;=Mfy)SocrJ;Aw} zoSxQRBYhsnAm}S1?JJb4jzH6LV-|J6GH+=hT_(bDc|n$}%psb7i%YLWp-eB3y#VTY zTg-jdFHzQAQDL*Bk*nJfsoHEW&N#yoTUDgz)f$!itwPT-L) zz`==uP3@UQ($>Ie&@OOSe`mpr(mTTeHS$RRqHj^S$;~sQX%AsV)a698L&?c#4 zaM78&YPK-ww?2!VlRauTCXtD-#^bUnZ=$)TlQdbF7_hdbbW`LLw~g7k%ypDXZ@PvU`~HZ7%r32e)t`h z+DjnG1NZ+4R_GbdSmcvwbCuLeXw>M5rD`eocAa;~K1*H9xnK}U62SXRhp@61OBBE} z92;XYY5dVAvUDhLTLb)fe2QZN* zH*(nzre-w~MEw|AKjRva&rpiBTzvo89%(g`a_!B%nrts&6M}hog9a}5ISrdrMLwM= zb&-NN?C`H^^in`^0QEWU55}!d?BM&<3gb=;a_;P;*VJO76sd|g9$-vLihfx41Gc%@ zLcO?Y4hP@7iq(L^@WJv);pD-E{JUxu376$-4JNMGt5x9}sV~CrmPr(qx6>+&o|+!X zwIG#vq=?olxv9WA!!jcerS!#+n5+~T;&Hnbk@nF==5VTH5pN4Y$k#qQ79!vOMs=4EwtDn5+Qn9Zf&Gwu7 zUR3u0H$n7Ab$2sd1O&YVWY7a>NI~t-Eg14fnTTwi-@5XiZ=@a4wvHOM@xiZXKHjW` zZ%rw#&o^B((~f{!o@bG8{7%&Esh#6-p#6=*R(rd!Ra*~r&gz;`(+TaEh4i%wp6_01sE2h#yR z%W?6h8YrNrUXd>-V1aT(o(^{4mpYG8_%X4kmRcvV_vml2%kKqOj@IxcCQPgl%6;rP z8-2HBm)z`IQO_K<`V%Cb-*22|bNp)}F+6ryvhZ#g%q6e-czRg%G1kZV_OfJjCP;bK z{zWM$-92isXdJZXNf?WtOd*y zFyoKBpZTkNmcjujDCg=IiZ^td-*Jna7PnKiy zoOTrY$e*o-@3|n?EL!YrWZmr^)ACt-Wo~LsR9Ejn65Tnp?8UEngfrWa?>xlz2g0d% z8*l{MH~(Ded9ys?Hkb~O!FL5ht(8#mV*o3NfE|uR0%@O7p)jsUmPQ;lw1tTK7{U*) zz&L7!)bDayJtXzzL3$QVvQ|?emeAX0%dRY3L=go1gr1y zwq}9Qz!`m|VY)e5RoPSbGuN%G)=_y3rLGps9Lv8Yh_bU(vCl)-4b0JKQ*wCUm(Aec zaz?zePiiIxK?8Ubi(zqf=>=JzAYAfoUs{ddy2GOAn)}NLw=0wDBE)Qt`D5Ha!3_u? zUJM9wJv|>}G^eG9S!9h%>5WrCqOIK10KyYx`iNUgOD-3HYsc2tEO6~*M2ZlI4>tFS3a0lY}R+e<2#NyZ_8wsO2B?vMxCt8}Cz(oTs&%g#+L%`tqVq zkc~sWMNs_AoG7jEgSiRqy)98EfB!o$*!>j^XSFx^go?o6zxj?<>T@&$c*mjjf44 ziBQz~rjPou<9WLWDMd}Y!tDAjdefAxHp`7|PjE_&M!t*@kmUiyXG(hFNEi|)=;?iJ zfJ5a7+1L9U>gqmcWf}JD(wAQudAU_u7!^pgc>P#7h&)mzE3`X<;zSMHaZxtO$VpR( zk^TaoEN@|qDWy8zLO-R0T+JkKK;77Aw^zuPe1zQz`y2!L@NGgp{u1b*3Xml+lq}`* z>`yPZlRD-V{6Y>E$VE8{8WXufM5b7aDRt-XiBc6N+vj43t4587~m8m;r}#`D2|ET>01o*Ti#p%4yXS zh%YUzvWd#Wf;1V3C1?@b9iuEP`8Wm2CDdepbqqRTr}?tJ`y@`RkcUSGXhCna!vgp0 z)FHTt_@60A#1LMLB3d@^+mMdVVJUlaig3SuI!(^X+#jq|IH?{@_RH6<37Rv7WUO|E zHK0KFq55Z7#)N^@O)7|LyA0M_(G8>2Du?N!R>xD&v{ij~_7o-)%nwT}5Zx3s&!A;( z-R+}mrjy$8byA*!ugVZGl&g#|a_Xb*Maa&o4TtLP;y*58i)>Z1!y*ynKqtQI1A{** z#Q~eJn0VV1?r!zX2h%z<1%i|h#<_jr=t|85)5!hZ1J1)+Hs#TzGzyxQD^Wq1POq11 zBzDm>&XML6N~U9qNTaGP%k;duDY`$sNXG}6;~H64fR^1B)?ez=_^VkiWV!w34KGwU zpg=@~BQR#l~3JY)OVN@!BAN!p~#i*){aSS4Vr7%p;ZSjJC1Lpy>tc~xG*C(Ko>!e8K#z)J!SvugTYq zUbVSqy<#l64K3lU--ngv#ukjWAJXo>jHa~yi)a!}ym!%dLbG}G+VJ+C%itTBpU2OF zibn>A>A`I2>wI{Y@$0ZN!}|K{hn}~e;qE1o;Faosz7u6?vu^QrP6;{tW<9r8LFoJ$ z0=PP#sF^h1CT|cf&DXzGKE{f#HWY&e!Qep0TcM&-Za7eUh6qN7(@Fm~ES8>vkYQox zawX1Xjg1*9!jtRZ4O5i#&LzfsHcN!gkJ_d)6cU~(T75;hpm@ilj!uvhcHren7fMAU zXF9y?3@z5VFwm#caxiOE=Ij^TsYm}VUH9lKFk1-Ci)mLRrQ0*936?JW$M0qoO!(ZE z+s6;GZodWl!5+ivs(aH`>p?`MC?x*)N6X|KYw-;z#@Z(vqfveN7HEP5mcw&&oEPdK ze5un<+J=Q9*9aO8?yb=UdKnEEZEkJt8$XEHZ*{S-j9QT&kl~dsj~bJU&yV-}Dny=+ zc$AJ+U4$mFw^p|#Mg~hfv(tYX`jJXoKCQM~z>LDzjS}fExHzz?^8rs%4lOA2c!vY1 zOB!f#(i+1m*XbCQO)hQhiSb5W&P>|`Q1In1WHl+@7^K*T2glu|FBl}p+`*3t_TR-9 zEA(P`x4ks(^DoHDTW?U^AlUaC zBmj58w?I2>G-wp-{R_bb&Q0@lcFxsCdQ`hMHljxY*gon|n1?t}*p(`&b8mbSx*<=G zF?U3AWNBTE(9(mo{{dOJhpe11?uHI&8ST-Z-=ERW$cZOc*p)Nse9}(f%hzGf_UEIi zi>n{V=l%3q?zYwDC8RJ^8fr<8 zB#?sN{!NYoJqua*h};q-yO`|A&Zu1}Bnq(k%3Mw!;32$qof#7ZSb8reMaW8 ze?mOEn?8_2Ic0f)-Z~*`5LS;xtzOW-2vH9wzT{S&X3ZaD&=oWrqjIh)t}cRT5uk1` zVRXF7-1BNHa&9nI|8w=5FgaVW&zM`{*Rd!>jy2~XJ3EkwsnU-~4W$8cA5sDH@YE>9 zb;M*q%Ki2P_gwDt)tv|r)3rPqz(qZDBIzBDB8~h9?lh%enzni(-_xR1jHS1!ZY?gZ zbpHz%y`3@Gs#eNl3(f_%U+fa#6+K-PTFlA57z0`pkCR$cnHi0nWKs!x*5ocl)XpqL zHyXBGhxJGl_b@I?1JGqu#$uYR4*<_3l{Q*~jZURdk=JXP-OmfDXQ?jRR38I=O>ZBp zY7!0;+mnQ^msoEla^!zyDhIkKreOaqbMxmwV*5^hw7_vJ?gwL-!+6g-fCa(1=D)-~ zyv(ImE-gQ=hs2Wm^7}R@54tr@7TdiW9OZ(#-768&fA<+1&p2~e&xF3?N)$}NHjyV>U&D?)Ehc{q}BvM&7KzgU?XX2Hf8OhDpYGbBO72 zV_!5M_MW*csGfq!e~(7Nvno*#Btfxs7%dE;RJsMiVTf;=+YDr$xE=`w(ib+~G|F)7 zvV4b!x8nnE>A(e=VH&m7eC zSp2R1?{}iM4nyQj)M0NY$QQ_8i3)g@|16`m_YU&wsY5J_9jGf8s6Py34$3SED;;E4 zDyRV)UmQZ%j7&_6u$q##A1jY0I{G9pgkha|wrz)@k2d%U@4*}7vqt3D{^2;4$W2R% z%ZY@tnaz&GPozNUVp2L#byZ5}JzG*aAx(AI#)82AJOXDBA~L21)K4I5`r9F8WEk%a zlM^j2KA2M4U&O<$>WsOT?{2T&pJN1%W!9-GM4bM{w5`Xo@LJgH6L_U!guGu9+fUL$ z@e=l6d%&g1NsK|Ta*CXtT?EIsr>rJT<56?%Ud7m4Nul(c)2zr8Y7mj@Gp8oo8#54SFi8% z>&2Fh!`uwL!u2Vi8V7B&YmzMHsruU{WA!M`LxY>dJAa}X1Qjyzk^}oSy>xxiF%}d5 zj30ImW7A76_{#&w1boR7t)mTpezTQ|0tLTzJEbS^*mHdD4M93pgxfd=342|^DyKW7 zXd{0d%iprIgMu+{$=;%Gs4d~xF##~`l7OG(V46V}N(|hJ32MO7J&fiw&KV2vivuh| z;6D0ah^(&+rs3Ur5V_#5)|IZeW&NxWSv&Un*43bLNf(=C+cw}&jLd6#7=rV7Ab`w! zd>hS@sq!yZd4s;+ztiV8J%#!z{eL*n|MJA20lS>;*`Yw~e$mvh5%)|7|Pt~t| zVfvX^?0YKPRVF8axKQXkQjuptuZUXmiVyizK!^v+SJq9@M?D z{hj2zKd44^Ah~UNU48{{FV!%nk$j901i$@@9S-8ZK65=_jKp%EZW0yY2>_0s<^Fa| zc1ezkmkGP|q%(;R_+nIr6(6ZUUL)+3oIwc&z_mTYCGp9D^k|4pLr0<6lP`(Uet%X! z9_!L`=g`*Q9$+ho1yek$hjA(0hyRkPmh0mbn=RR|^;@4P0YJrE-EF`GCdyFCYMyFX zsZR58kh-KMHq ziUTc>&PZN9;JqPf`S3$D$uOWv+V%Kt>WdwSq!IXV?w8EX??)SHYEjoDU4C+Cf@9fCa2ih3;>F+UySXewzejyvGqUEMX6U_1K7G-BnVz17g5Ic&4M)97 zmy%y^;BRd22IK4~jirPJX1huv_0~^}f&4otPd!ahR)?ycZ`niOK==u?iux=mUC|r4 z03u8v>gq9!zSL_70LEi1qU23c09H%qgBa9wnBHfe(ZwMd+K{J^szxN?W*^Eep;{h z+*K-7Eh}D>Zn~qQmwTn#>6pV=)b-#!3HWJ&&N-@5buYs>>J_v8eRnXgFp8yzeJeg4%LKuN|TW2Q(_C+zn_pkXu3Jv565 zl<3v(nm3DdBF}kkC1hHi4+L6W4lC)kV26zQpVji-Ih2@&IrMCKBSEv1x z^Zl`CxwPGn2-~qj0CCR}#dx?%HV)of2YR?hk0#rZ0DE3qnDs>>KxfFv=S~ulFG7t! zjl;UFA6n|V$Wo`~1*qQM%Jkk|j%qhJ7LiV%x81D)Xqo9joXws!ZG~MM2lgD4B1OM7I3eI6Rg5_kAx{Dn$M|<+qBBRg~h znnPi37{Wm`qre^JBy1b+f*Z!tvT%Y?TtWIugcA4KQ>W)gy!$DDk~43h@rTAh8PL*3 zqH3{@m;V+B7~Q!nk9!srGuuauHfM&uHfGK6*o=H;b^@$yX#i7?LOf$3^R_hJ8Jqps zTZ5dF-7PXApHy6GY(qW(I+iEtGN}f3|2SUuH@H zwyoaW!!L#B27si1M30~P>lq?D^`EovsiDff(LgTu{|I}_pgN*rco)qX2zX*`UmYmybS_{#e3m-!qOT-=1DL>hte!RV({7W>kaz~sd#7WJW}-IT+Sj_QkYU)B8u$|$s)%4&tjXMXf-AGlh)F#if zPUH~GzZS+vfBaG>=_wj>h;=(j;Z`>$nhU9LZqSb@vV_j?KCwHqs>yvqdmJE~OwAc^ z`BaQ~F^QOeIlR!`HoyqL+XA=;oJ`)mv9ur{GK11A48=M(Gg?d_h@87TB(!JtgwjrL z`VzS=c*=ZPlYHC`kfJ$!$@t)Htr1Sp* z8NQhN{EV}-Yifl`d%v=@RA7wv8&LLzq28vZtYs23+=OCML@UG(OW7O76hJ>&;V5}b z=P7ThAKQz*w~$R7I}-LY0fbuYYiVxa&It_+$F*J(6%L%LSIxAgXmM?@q2a4S(Ll5o z9_oZjrAP|)(*moCBEDmt2v1o4rs}{%m6a2d#JB5{=VFZ=PtcnWZC2`<=7IGDEb%uOg3x5?rnpBw``o-AJ1zAbCRy(sDOIrZ91tZU68aXxFt+>$e~sjF6*H*A0Xz zc`)YQ7DnY|yo&D;{nKS$T@aYdQEx>lIY;`xd1j0m>ST@TYiPuLKu22QO=nTfoQvuX z!&G%YN}{zwvK`)@t99scx(ASMFCuPUEzbL&y@kq#1Ji$v<-WpW5%<$S*ezfdh9Yly z%|-~E@1{9U7CxoDIfOmyR|Qg^3Q=`+)WQ_tm6m#l)qY>K^rFT5AE>)7-@agiIdOnG zUF@o8a~gDTwbSgFKlW96*?C&j%SN%;%^h!NA~T}AT2>Q!)YfJ1{e(BcOcCdae#r`7 zvhCED>jyZBc~PJmT?Fg(X>LjL5)AO%maHoNJ4Gi~COnEkZlgx0D?spj)%Un2DL3D$ zY3+tB4BJ4$7gO+8N})V1E^iIpkV#|49|}ve9wlB zc@&moi3;rxH!1HuPge>lV#YT(Nu;F^CunSF@G7yW3ump_Sn?r`1Jw9j+PF=9DG{GP ztn|AKRwi<>XW@nS2P`ziA*JHLgTTFZzP!NATDp_#O%Wt}o2$aZ=>OPI1jLr)5HYB8 z@tH+!^}tE~*+;yI_o)jx3xN?|kW(F{u>qZWrIc*siAwcv2kY8U>Q*dS&ZFCt%!A;Q z_gr^xeyZ{r=pJO=M2I-Nw1ZPy666 z4=hrxPbx_M{=i7h15`U_J_13hmyiHn|E84HD1Rp1eYV!YyAfs^$l!b{LS_MCz>E2A zAL>5Sht`IoZA5R|!|&s72&S9Dt4*D!9mO^E7joocBu5e*3cXSN46{u!g8z!I>jwCB zcVtN{@>K>vCGCeSDdtt(i$w+P;lfsk6`tHrq{90ilYg$S2LjY;&_UF4oF8SmJIpVH zGW}E55A$6MBf4v?0NV$AlV}x(K}HVM_feBs-Q;FNzEhvMI|T0Eo<<{5v`g!?=W3WZ zJfABSbS5v3j6P9j6S5kW0p%8 z_AHElZ>M%esGi^9tY=f%y#n9neR;Odhhk5{TaCUm_9uO^J$Y2#dg9lTfyLML9`3mH zaw%=%#Q+SMh&NNYq#P%!%}Ur^7~M_5>&e)WWI|u=eqWxZ^m?A(hyR^O0&A%4)z zQ)1Zt3X8~hTZC{Ganp7D7SLsD6}w1aPyVfGVsHxY_|-=86$*wb&nWCxuSi%y`%k=b zkY|Q~OF=9;4v?rr5i+ces_8wR^xu&>5>PN`#M8ltT-=MVfjVC%{UDQ!OEr#>NThFU z?oI`{b>2q$OP?$uB0n-QadT&a-Y5fkI`XzNZ?nsU`b!uDaBWuWx!$VAigyEAI9X6N z+fx)a4Y3Shp!u##%Bu;;2)`s-|A(#W!~VclI&sGLfDYi+goVacvU@eOumFY@AfC-U zSQ8oiqF3`4t@XZthQa_b1fO&B2o8rt$SLjA)W2)xm8({c{3S*RKp|BSZ-fsN=xOwV z?I(olLWEJ-p#Eb+o+b%xy7|RJyr`dI7cLaBG=_gg&GljQfSKhhLJBXv=>|~B2y%bm z!$CUYdKR_HhS{CCTOVPu46*u_H2C%utJjpetP?0cocrGsD<05$;#qxCV5mzx2h@97 zG4TG54EjtBqtK+JrfclU9e?sh$d&{0rdb$%i%K{J>vNsKQb9wdGm{#QtW`ZB3W6R{ zQ)-e8u{eTEphiQS7e9u*vkg(QT?JOP&$eq*6qBud;v5JfdeGgRN;A$j3Bh}Gp;pIs zf4rh(JEWRDq`vmA)WXb@BJDbWzX&B@9^r)&kdmLQU~7ewJmpRN(CRPHPE*i;+jRwq zIPy_D43gp5Mi9YiZ4fm^5_wBsADI+@!?=sFd3`BNOsTRH?yvsn5uK&&AJL zVMT{PpQwW*O(`jg#0o@X;+UZ(A=i75quPLs;- z{&3m_sx}n`TyycyG#f0AN~#(Gk|2*ipeXpLeXGpV)i%|s9C%JY2Ks-}PNWcV z81(OCfvQpW+|5vlE$aAKPHGXJ9wkFOrz&r+#`J#fUc<)Ec6B0U$Pm-k+^D_cABaUrhz}9|KVUlk2*#h){YhS3LWH&TG@$M zn~S;_fCobd=SgY<{g=~Tg5+b^R~EORE*;5Op7DE(Y`jaQpgK~UFDjKkz7+B`PBnWj z{qV1ez~g?Du#2oiv-8#F4{ZRTdR2XC0^EiMEYcuWBovi#%N%KfBn;qc@?=2RLZ+hM ze~P#cBY9fcgw;C=qPpmQ8Akbh!LMqM9A7L~xc6uYi;0m=kV!9+-zdCwWbNzODGy>9PG7ZqN z=$PL*7&Be?d!&veA%eA#_~4CA$8h2D3&URdWLl@u<~B|mAJpz=Q-jhg@Oop@DKk55eXoisp)vu}|RrNVsL zHqNs=9UiixzP?%NRH@AsXtG`4kepBPO+x3}g-G9mv=!KKv?9u!qjrQ+k znZubM{na&~CAaqxeodnmO*MRR&CMPa^aURPfs-nqzEBC2Rd6I{r@cd@{`w%DKL=j% zKWQa09pb-fWpdeXRB=k_;Nou-V)%2WaqJnB?=~wTj(0q!*Q89R&gYO7D$MxymqU2Ozm<~2xC09giDx{Xq{2> zCx^)I32W(zxam+6@d(5!RkP&y*;Js*elH&bCO%a2uu7wrvTN`vy64vcVq3gNYy|Ibp(wSc|^67!$c`;UDc-ff|&vT1U(;Bw;Qol<{__0 zp^IB%f2ZQNpvhJXXv?-D0@hAOexLX^vVcr<=amqeJ4Rcf69S(;IRHZH>T z-9@(mK1Lr3SU%b*uWM!*+aSNjx4Nt~9&u0K%4d|e(TgfzO~>U zuMfe-Je_aktGOU){|4kW4<=kS7QH=TZ187QHh)BQ;I=w#`UXv_&Z657-B{qIor2W4 z^a~Bs!(kB%xvG(dX&XOuZ2M9qDhn72gWel|S9sX<0h|^C=jp^GW8wfq!_vsj(;={lbn^EM5n^oVIQ_;+!U>G#uJRUmtv5DFA(Y*lH9%yEpFLopNV)xP%9cuj_bha-xs?Nt&+S;c7!^%W*~6(`uG8M#b%Yj-$@-|De%0{C(zRf z*9?BXRH0?|EzCT>kwrJ(m43)J8OB2=^F559#wA%S@N5pzej^@;^S9C}*rC$H)Lz=R zOS5Rid}TkJXOZhm7IrDC-Zyb(mP&cA*Z7uFsyJ_)x034F@RP$cqayKRyN}i7RTS~1 zW{r(q1eN#({k*o_Ow|tN>ERf^nEz<`&OSFNtTBWHdF%=n@CfY%Tg$MX^y)}A!i>Qd!#6zK)z%WffOG~Q7jz6B8YS94llm& zx%7;cr{L}^eMwZiW@;h2oXLi{#h&Me7O#q&+14p4HA0iR7cGgJHMs;$4piZ;%-zy? zrpmZyKJzT{3{Su^vONXJHPA6xL|@qyG#3%4011IAJ^ej486W6hS|G;6Mynd8YU3_0 zrE|%g>?Ns*nh9gW-)4({1jW6%scNCiw~s-8uTlv4DkWCx-YxtY_5o0LEOYC{w%cS7 z85s-}*l~!V;X0{Hhu53$Q<_9$Re(vls$X1b_0*CP#YvztiCetCox`jh?~3fQZ604_ zPfZqeEsSBTY#vXSaI{ouIXkG@RBd=+9Kjq3OOTt`S;{XTCStxrP%du3%w;$5JjyjB ztu28F!01^ryolTR*vO}Y@+Rr<_k+vACqACk*Q>V)ZK(9Mh^+1-I%nwyESNO-z(n-i zr?F~EH6_#h011}$ca8)?+lvx(GJJHdUj7ctKn>2T^bVMUr*9olrEoeGRM7F^bj+{`jtoS^O$E39EHx!iyLjD zDk>5w`me060H6j(ebseeejj=vsYofAf|AK$S%pH+!P-4$$w8&TXI#u%MDG-gik}YU zwnvLag-NJtmw}WfWKC0d)hvLq0mWx9Um%}M4PzOj(Lc6m51SxCQxJ6JahZAe$FkGk zxrN&ZD3&k>Yxf4ud6_O z{M1rMy7D<}xeE%5*NJR5$JXF24I;w1C16z;s1!P0rvUP>^1pj-V1)k;PD4-`SR@l8*lUa*)#^)Zz@>i!)Gb$ z2?JF?-;jM!)4bGg2ut;Swrnm&BTq>lc%N;My?Y`j?l`Ibw8K~Ha))vM(cdw~=RD+B zW|U!nhun`ZJ~s~g*aXv<4eN@?aGuZer>J5;qEClC>X@5@*LC^817AezR%YLAPk7AW z!+_yaF@YRs`@j@izW(+`2g3k2t}S zH!RBCt-mIVz%gRadH3%3np53mMF6*P1HAP9c&I^E^WWW9Ci>r*J-T#U6iE-nWaKA~ zebKz=ozwbp*XsX7cm3dopD(YG(Jlu0Mnpc({xAO?Mq?^Kq9%7n_+Z8hx&J#YMOft3 zXG<84i211W>XdvFIw?;(#E>wD_Pw# z)hT8S%;wGafMe?nb$)*4^3@XtxIY=p9u>4GP3(#dEqf0<0pP{xe|&Kz5up+<=E%%Q z^6FTl?VfvIj=8gic@4ATw2*VaF`UrGXus<6quS!L{HwRKCtT&d=wpNF^Fpxd_xFHZ zbANHF#wwk}f8|gb+O$*M1St9!S>M`n$sz$Ft-Pub!0ixM6=H?56~jsN+FAhD{L*<~ zf|m6AOE-MlSR>$F#ZS+T$*MZk_L=(GXZZGc4~Kv-$?$*O7(3sAfT5_l8Z^t2CS+-oR4PgzVrssNd)W%{L7&ATi(SkMj^C5Pst#J z(oHY>h(4}(7Sa14O(f1{@&~?FgGYc}?UvB)Y@Honv44n$68jf(!IrOpYq4t5Bsxbn z3YW!)Xd3B!7TJ~Q?;VEHupWpDhMDIYNYe!sXBt}3Va zo$79Pt9H1}W9eV_NmO#4s8=Fc%hd|6LddIuT&HnNM7#d~%3%;D`^RVsz@`o@6!jyS zNjNI8-PmlO?Ks_|v+4;$m0PDseM@I!Jz>PzbN-X|J!q-cy%Y)SHTK91fO|{VJ&b1E z{68_iYYO8>j~nSR6%N#Wg5xX;39rxVP510Cbr_wLDF7O542=p*<>Ws}cM1TJHyn{k z`itX{K_-0we4M3FSeEQWN3ca-S9`~XqJk1OUY zbTB{Z0SGm|#dgaRZbn9YU6lLaZ^pyb)|5akXor%##Jkq|7CYkMQ_lypMfT_BpY?wU z+08L)#Gww6h;PH`KCpjiWpl=Qr0kR=@jhI66G<+D!fM!@vCDc)a}EGs3p4%}Bx(7U zqviLr$DFJ{RLs;GS#QvKlmJUL_XON-wTTQ(V?5HqQHnQ~XR1dLa~ zDC0iDLv^-;iQ4wlG1tvhj?8(&JH5yKsuM=lHMFj7?8*N^9QLi6StR}*e}BnWC9|!M zCB?2qUccOw!>s+ziNHjU`3ro-#{=Jq4p`XOxX3bsZ)s^|aN(elC7|EJ)55{S$>etl zu0QTog}a^#?X5ig^~Lnc6cY2I=bZ=uy2^N1-_T_(I)8j0FcB|MXTj&$g(c|hLuB0U zCL(>K)~Q=gB$rfCp6i1?oB~;G@-tZ?c!FFVD7rYVTDl(jQlU(-zk?x$jtZ0&r^&)b zLV`ZgydvafU(-X%sx#*nXMG(%QnD}nC z{^{F=%2U_4^iSGz+wo)N3lx7sLs3!i2FcagMrLTj1G1PtB`t5%kIWH3iwQh$;$%XC6I> zvFXDrwa%F!zIgo~OH+ixwFymjyIK?sQJ$xwKinW8LpK)*<}%RzB1#2ZuD(I{H+IxQ zrs%|p1Uohhzlh>F%#7~DOif+8%OwEnu|ra&C}7j+>w@`Gp3~=|K+dkkCaFfKb2%Q= zV6yawTP#w;xXi~nLdZs*S-l>&z{T=8D06J~!0&DJ{pI=Y)3k@$xH$5gUOeuJ3p|wO zdAa=4`8HAaeJ$qDQ%qb-P%d@6-^(*iBmw)?aP;t&zR~Jg1PLE+*)o3+!0*`?$A1YW zS6eCC>EJX1bF|ET`Q632vcC>4@W`b{(H=JsKny^UG$)CF8b2+f`0jD?2Ub zzyFOriDU!!kD(#Hh&8D&W&UXD4>Y%V_q#__if=J~uPa2Z+I5S)RSm0hWK(&~N(AtD z;kpXsgaWoD&J1TgwW*jt$mq372Yn&+7dwGO>kbPOE6OvrzjmH*RFya>o-xX^z;F^TCro^das6gkTV z{{e$^-94!QH`b$@ILF-mP1@-W`K~0;+(&6Ktn)9b< ztj5}&vUJ}N`2c*B1&YJJ{2NbwZ1dI{@jRX}PX5;*pXcB2kLh|jZU_9al|oYuCYq@N zzLW`UaWSTB`AEZ4O|D(;VOx*m6EQ`UnYs+QYjBq|O22)tZKXL>zY(y1B|mpaOh`Yg z>OXHE_&XxZ%N#ll-z|abW&O70Uau$PZxZ{%sFsq>d33IZ0dwX1KaWjaEk2L}_+!M} z;q(dM-Euv|b~!E17p2|gvR5D5gFJY#ROx&(`XoI6J4>sp$I-lqK`q#v(>Qb;%juR* zs9n142fqFH7q!rt0!6C#eshSiybg;}XJuW{g$Y1BmLzJEXcpe_rB1z(zg^`@4+T1+ zMQCDW&vfX+&t>E0`uZt1azs`E`15_xrEci@ajNLt~$A4!TeRGCyec$-$mh1!KGrmj3WO zd8ba7K2um;kR4(@bzbw7kfvSC)5~*z?%Wu# zfgjJd%2?i*3Capo*J9!PwmbHP+4(J3|K%s5(j7QJtJI4vM)=&QL&AGFtp>f%OsoJ* zmi(<+2>Rt4UP*ssrotnf#AabUtu4&b*_5=xur3w`Qt~_Jrqa(_#yd>6SoKqs`LG z+j{i9O;_Nyx=C`$ICEyMF%D04yV~~_2&F`5(B|d!)9~kSKQ#9*?Lk)}L%o2;SX5vb zJ%Ou{oSgN%ABILkCHRcpS1$0-r040BrL;L2C0&}_O<;Gu-}$T`t$w*k|_E6 zK+Ni|BOsLaU)+uy%)9WkE4qH=HNhAr^vOmB#S~&pdxrni~o4r3umAq})Zf^ucv z>DVSa*d1H`{1Jb-XB!nq02~~_fy>t7C?U#m65>rv=e{8U5uNR`r#=deF}&-fe7r@M|4SILGnL8`_KNpWRYz0_lck@zRmr6z3}gi+Q9w7eBCUl zHl#dEma-eG6SJTh+y5QD*!TN)2m0}M?>g8^Ems@QeZ}>`h(+hQHP3JS7nEjdq`)1K zF9FvrnPi1w<=f&rs{0C4RXi{g6u@y@e8N#;S{bwrz5`OurgP;21nAsRU6p#FRd$Ns z1#KUdltVITfKV#z>}J6^L`wugtme6aV&MR$lf-$}(ahE!Aq)>2$2 zU7X%?1ou;6m%75;u&fg_I7x^tT*f)!f;I;~Rg0ZNVz2uCI!a~S=+ld*B{0v?NK;&e z(}!bbNR@~#Zk!{lJdstGYn>FoTZ-x)_Y=#*Y*%iLuh)s|qM}ZB1Tt3u@X~ccW z-4DTA7P;sQF-(qJY3d>AmV&)+EBO$%j8|Q0xc452O&!=sr043!^vo37`;$}#f8}Lc zZR+y(A+xk%NA%qybr1>A>8t|x7zpxH22$Hl)#tB@?TTJTPD5=u`)!5Q*RgXZtnbT<-m+e}{S;G+2hxdU$_!f%wcLuTAl|N!4 zuh!dZu#PTLo34qzygt{ve46ly`{7|gZz)=1?T|&lbd~=p;bE%tnOjonMwqKwxN&Z8 zsV*%PnqV=@_;oS=bNXq%to(A%zt{bQ=C0nJC;AMw_<6E0lQh15IHm4w0?T~VCmjm; zMfAJfLH*acE`ja*rxFwg@^D-(PXhalrc;rLHDSJzf$ns0!clYa#=q9436p-vcSulV>w7b|yZO_yLm9cz$Ql@GqJ+qG+!Bn^Y^&@Y^WklnrCq zT)G4X-M`=6?pqW$GTe%Cu`g>RsX~$T7^~xX>5Wg-l6WE~yXJsL#R%bka?smO^h~zZ z!|5xcr~SfA|7?GDWxNdkCTmu|791 z&-$Hf5A&*QQZ&xn!!}>c@5d==B*GF&);jp#*F*{l4aG!kW{U+DGml649o+p+SL+Ze zeiK95D=>a>%|M!+OE6E}yg|Mv1c)T`szzsnE<1?J=sLaMj}`(hy-sy9v3qt2PuZV) zznf%#W)IQD7_(jh8+vEmdhO{f2zmY`Y5i^8FGr&4W1ez%JfS@C*D(#0w;4k&Fk!kg zV7jCBA&QuD0pc{Kg71K2tpEkX>{uo`6xCO1tn6+ZbF zB}OsHKk){L4EcR#w*f#D zf5&)8(rRNC&oZyQCWQbN3KF>(i7&4*Dii9_W$Zb6JjjX!8x4p*9@ra4M4HS zGZ3e(?fM3Ik;M{>uYW#M2~5G@b9geA2?a^| zNlK|)pk1>kSG1XOTTCj&uuzeF!#}hD8ehpT;k1ITmQTm{|-#r03zt@32@O4jKYGVY6rK%Dg1fYx4sa_eWyd0BZl~&QB3={Z&mhs+fAcj z{*n;Wy%->RB&g2QEdf za$peOd3V|%=^4Kl*@fiL2sOMA5w~?|9Oo31+(OwZ>m8NqU~~-MX_EC|y-7fuCI66v zcD?3{#zy1P5XBj%1@ZPgb1s2=fVeQ0#W|n}lHT1Lo0HuxgFB<(-NN(lX^@}`MAR)? zq3UYYd+UmR#??>wsQW~OonD#7XB`LP-nHw*a#<9_1c$vLx>Tq0LP}=52#e87kw2Zi z1j-+tgmxZ;)Yo_g}T&+R`cHUx!h&7<7)COVMZ96 zWRPKUZMml`^=)Xd2p$yr%=PDne3HXC3Ok>gd0Le&m`T;>H*+nBcyJ5rjpm+iAAglA^|U1R1AM+P$I zk6OI*{*8*bC}_f+7z%Lr=DXDtpGMRN6Yl4PohGjlb=HmvvjKm+ag;c_wWx{wblMsq?X7_Kycr*Ak zO}N40Xz((|hao9RYYSyzl<4}gxo_h+g?K_KzeRKJ;xj*kkC7^d6=Re%>7&Jsa{+nw z*G8c+bn9%AeF_uaeA)(|#iKNH1z@n>D`;^5Y0)iAWT(&aPnt_8b>_fJOP%FgtRn;0N$v+FySpxp{ zQYXpt2#Uhg^FQ?!@SY5e3hHjCnlk*#_p3{9)`x^75KccT>I2pB8#OcpOwdH~^_q`% zs5ktJ`DPK)Fr*i`YYWYSG@ctoaF2*f&w#4~!)_iN^}aNLlDl#;$Sv6`_mn9g>B;@J z+_7{jio1|~q=uxT1WpA~krjOa$Cr;r!7ITUxEV}eFGAgZSQBPaEmV1-3838}c6U=ii1Z{8%vmhI zks!mqm{rl+cBgXno2FMq`#CILA9Bjzzdu|3{Lz|?iQ0TnEFKd?)lns%Gl)*n>&!Hx zyY5<|n$?juSNR91h-r>(>hr4yhdPh#g@PtZOm=IUH@c#S zbs6NnFD-Q{j4DnHI(9d3$#_2D0#+oJeC4$7&yc5b-IVi1(xYggBb@Y;kY_qWfj*PV zaD0UASo8UzdUphkF^QIV!% ze5e-E+{0+sZ-px+@dLzO-(FVh)@QTnJaj*NP41rdu8BT@Wl8%mi9=nO$%wzjjyM1o z5b4WuL{@;h6YOCrAa5TetkY+$8?gTqzW z*jggRYN&hJN!DmJl&Uj|J)pXO7>oNDaSy0|J&u!Sv&_8D<(ejTb2>3zN%u$RHPRJXV^#GHaIjjHw}BZ|H3iD`O;j`tgd{3Yqn}8fbwTkOG}#>I;{JJmW+~ z$5fO-1F2j{IKuJ)P%BM{CKHi+z#K2wd>cGdRPEH}yoJfzbg0_)@Gb|RwIo^M?oav; z-JD{rgVuO%U4(nkt&kOAI}ROtN=;s@r{4n)6?yNDFAnWjMb3}XNjD1pRtoQb(qdh* zm{uQM(!kY-3__`u@Xqbe7E^5hqQ9@*`kR ziM5X7z&Q-;Yh{`84lO^#7zV_UixA_n#7D7xXgoBNJ;(^3;Rf+{w%nmwBrvGw%D%1r z#up?eo@;U-UaU!lu@9(P%VtBz}zjyt78T1TXGkgJ#($IlL+CJxrQd`;ta`oERVq z)-1e5kA-C>n=P=v#yTxoU{KkIZ9Qav^6$Gip)>ircFhVTbEnSoUGF)D>T_YteI;Oz z-i9B=>z^OhKgc#OTuT!D)~;O+f_4Rghgja;{!9~aK}ef!K5yoEsa)Ce?YvxbJ0HLd zqbNN!+r*f0`vA&fS>ZEV`|ezZHDUwu%&PX?47Hr*E3=f?o&DIlI!GW|q*-h8;~SN$ z1VvY)UYjiP;Yyv#S0n&&@$E2-swI$~^tqrF+&>|_!0L zx`KDx%!-{aM*FYGTAp~QX2lR24mgM~W zbH+-E@Ns!^tNFBrwz#&ik|4qOOY(}BT3_9VY@|SNI2@>rk%6K94n>q1lcoOL!!jqD zaL_B^M`sJ6S{!YJqyJ@F%PphB3iv$*7e*w>vN;v>DF?$cqcjq1MZ>Ayx5d0jpck*L z%KU3(9VjjS2%_+YPmInm`slBzrp7``Q09e|nzLIN{<* zDA*O0GtE&rj|#u|Oz3iwn6g^Q^NFn;qs)Yyx}P1)@uX&WT3tPfX0akGJZ? z97SQq7g(n&_eqhT=>x#ap%-{&8`L_b^nNEdClA!Vn&;GrNKbqO>|8<8zZ`uHamQ;w zV7N56$d7R3n9TYFzseN$im)%GQ@*?j@58;)E}6!aZ+gBG`Nud8n$;{srht}rVAF(#;{!)Pf=8LC(7i@iou z>SE8v6>RkxU8qYNReTHCPZ2C>d26_G@z_^nhJm)oLk z{xN0c`G$)iVkhc~^(>%|>Xs1aSuiCIC1Pa_+Q&=cODHTQD~@8t6fR^QyAS%$d%Gc& zvv~yPd*@UX8O^ix#wVUz?)=rT%l*pCR3w-eg&pRc`|}i@@{FzIMADtGbUJHVwR&u^ zuG+uxae#cB__#Z)NLo75V|tnvcd-6(-p*)>1l`~uVk;MrSUmJ|EWxI$b&OoNyTCOk zloCd#piKAxpN?qh@w|{Vb~J*W^OxzuWVE2$Z&nwI@xNeh889ouDWBt7qAnn5UWCk8 zsS(dgh%DOHBpO%v>wnaAiVLrDh5H5FblxnD6fTy$2>1;drB(PLL1VoD*#P67G0{ON z8C;#78#JVc9qa@eF)=~uUcJiEHzbqkbRBg+qtAoKzws#gXpOx0Xt`!j2+DYb-d+6D z8;y+w$~zn2-rg)Tyj4p1B9mErpg}2*(cUS+0^Z{L1C#66QE*katX}*0VokM9(PS6mH`l&&R^%Hg~Rt)S) z8c12E|4M{YG88Bj494(v1ff1!o{fg&@cZWafejJKcY@>|U7YrzA7#O=Fdc}@0tpd4 z$ejZ=B#;kX4ZR9yu+Rujf?tdUH-k)tFj^i%ol8)8_i-vS5UVyBMG=Y+L2%#}7(*`z zYLFU2<~v4z{>ndcEzH3)qUjFj#!esmgWjB>LuHqL#T27~oPh(8!8uFtAgFar{=1yd zI_28+oAPLD;zHDpSL}V}XGfClm1DPbCauv@?BK#p<@WoQ$e12}T(-lE8N5fdH67t^ zjzJ-Hg8YGa``}aa6-qg*a?rO<=JSubJdk%XRqNqmIyqiEqdnpfLPr|E!!N<8l|9hN zky=gQ_x#qmWeKoikJXS`xeXyO>I)H8rOZpX*qg_9Kk(hdPYVnQrzP>T{nP`l%XFTD zH5)YzyXi5b*O*A#y>x59i7@%-a?B*!|qw-#Cn>DkL0Q|DsYrc(XBVOa@_Bdh}NnoO8Z{rrOgYJqOgt+~d zc~xWks<>q52lAi;NN8znc@Mi9wH3)5=}+{HTMx!_7@@g3`L>`1&zZI>+ZrCh-M0(y zZTKC2zlxnnbcD43tHCTqA9hwQDO^KPjJ{{5Vw=1O@cDv?*EJCm^V_ zBtJqnnd6C4eLhz92F46UH*lSlDCKEdwCSGyptwM1*YNtt5h`i@7Ol3`m$B7+0J9Sg zB^(FZQ)oxFZy||2SHQC&p4;!;#*g8Z2Ov$&mMD}0%sE)E3 z=WA``j-=;_zX%pT+xa;z=$36@J5#7bvdTSNa8KvVs2$7;(vItoOdKu{Epu3px4F8- zJJ*Cr6~vxlY0_u+q<@5|_$@I;E&Fcod?Q#na~@eP!)(f*F#~ERfQUKLSeU=s;KWWU z_8qvWlC^$m4KaO+TLp=mkZ<{R@*Wo&$HYG9CNg$_X2NQ9A@=bfd z*6#aW=2UXAN#xA1=nT7uZRYp`M-f2BEOzNAcOIdC(UZI;3f(>&8q!LA)aDcZSlW(t z=$EA%Mz54z`pf`GnNJzILce;3+k4|DcRDXb1h%B<;{c)Ti4mYgwm%B&*F}zL`6Lxv z9KlxynqQyV?LQ_Z2@}Px8}(#!@e^)&QzhdHUK`dXX?Do1yDNk{x<1G@aPS!&FHN(y zqmb~mqduHl#P__J6>`6R%M4pqVkmf)e4EWy6vO^O*B#>oZ+D2_Mv5$XGlTr(tG|U@ zQQTq2{8=GYOWDqbXbu2Ve|YK$3`a&hMpODXINZ0ef4igI{1f}{(|wl^w{WCm? z4^})n#8L!(lMHs@eY7E2_s@A&6S4vTrwN-Nff}unAm>Fh{5>Te246x6ehOv|MgyyV zYG2gDd%OQh-^?V%(V~ZHj{j&7{-fr?q66R?E*-?}Idt01SyiU#iUjC8`%m9}MS8oL&*rGHd!;Oy*JJTAr;D$V zh^BlnxKTsZo!LO3Mz>r~t_tY63h~Q+`#chROV8tp#S8k$iV2?WyuaVe4DiF(J_zf6 zBW7Npb@lXz!ml4Q2Fby)IV?=>k09p%b*kHe^N3AzbwH2E=y{{_+Qx=SqcTou(j>q> z0?`9?&#?d~;%NdUa~q&&NG?RM)z6mWR!pom|9#9Nc)f5z>%H&pm8C)5h3kDrLX1blk7y}*(Mb^}I>J1=?q(E4}01zq=SnN?*PB*`^ngwy6 zTv#p_sXb$803i>B-b!CG4s+k+vK2AMV=$wWz5Dj5RqyA`apwDfl|QV}Jz8IOlEiE- zhqmTh;Z>u^1yp|_qI;IbS*~_7|-FKlu!|Jz%`>R?)t6GnbI|gq= zEbcj^vbpZdc_&MBv(%CziGtR;qmWS3d8*@VF zvf2GeZj#zZz?LsPPNmO}yzZHOZis$jv{z_+zsP+?e>UHHywFTgYoiyECwpQ3{ZSwk zXzz_bZT#5hSBlLn_wkC+$WF;nUHr^!vzkNl4+S`v(_-SgIxcbpIZHX8QfYr{>sR@! zjH$iF`&uYd%qRS4*@x!v`O;CRHwVyEZ}Qp^^YJpnJe%8J(jKw_;?$=`Vmhxien7T_ zW9xDP1z))hONq-ih_;3R?!14=%;Rlgkj~|i^t&h9Dq3!8eC;FLAaKRd`&$e-tv@Bd z5xO$fpH^i;hBj&O`<8&g!~r>R^@jM@-^c!!qnl6VJfPIK)BM|E{a*}>rI3}6J zNb=Vkki9aq2E%u&S1cO!M;FEV7wZ8Q#Z&+9x%dPkc5CdbDaqS zKaAT#)5yhhOIkGiMzgNtCS81Oo%sRMGmTBoqh*?1XBM?GLd@0EI7zX5enA1)LOQaR ziHSd4SLf~RFZ?v=oR4>R|u9X?T=yWJ(1?om+x2J z><%Q0Qe}#}hkRGkQ({zpp93`TV6VF!QCv6=7J)S!nU#boY$Ayi zVoeUM)ldCIKJ+_UX@HKDrSgCE09wU-*p9i>0MJ!YBC(oATffAjBf0j;0QDl}K?P^p zCvsMEYdxeSF>&9x$bsC0TCjfh?9oh1YmRFKF>c!{j zE2(Y#smKqUAR67W#4W}Xhkve^+>aDny5kKV=C;E*-M&dY%%X33X(X3^*p-8xAV_*h z_|PRt6|@z-zecKYALA5)z=$>oqTPcoH74=|=^=-q%{G_AxTTdOR#PoFfrUc9JB<%C z1s{ySVvyevE{fbuNq)mAF^)M05Dloe-lo>{50xg?L@n44I;Nm1Gg0O@t)Ro1=(*0b z`3X&wLR@SV8k6~8kXHwiWZ7b2h&xd^i*Jq&8<%2QOT4yEWe~9Yqt6usi-$E+`M=)% zuaH!%^y)ev%Tk6?e5_a)QvD@&5ls86q2H9ggD0Nz6?O~bE3WU&QORCFs`=dBGd>ZV zK0U&q2#!xUW+8isDp8U!(tzr?5S>ZKEf}_C?WJ635 z*`AkISnX**!g)<8s(3P(#Qrv6MW0!iB058}0LaPJLn1nxDGfYQaIL~F7J~5xr|xe- zBXuX}%@Qoy0ZPwy#&DrCZ~m~Gh_m#Fa|kv&(awA0g(|OhU^xbMZ74N5Tfc;tGSl1M zH?d21o_!@5!TCXiPzXf*=OM4po}YRE@*Qn9xPc)O3V0)l419l=Na-jEi$D19^G3`cui2+nB z|GAz%0Hg3;8d3-}&|mb=s&@Iz?A(mCSDk*&qV`nM=Un@(Hsaw@yJZ-bdOnfNAH-^Y z?X?c~&_L9v$ko*>Su$qW(y6#p=!yKCOD#&73a5nvh|^YqV3Mq1^XG5<^I7?JbG`|m zd){r5^gLSwBIEogR?*wxYt<=cLg$!!`^K3jKhv)iy$|+KfWq%jjsJp|MgO4JPxx`6 z_#UNe4IA#u^ecZsn=Qxc>*`CWVQ#*!Jex(aSPS1T)^s{?C)(;I(Xx{4JMIH zSR)H63Wo?*Ls`in5sYV0h2AzjRhC{Zl@84#pV8y4Yu4%N1=gB4k3xFz?-Qzw5=mx1 z03%Hob>!H9#yn$)P|>R{AUP9#K3ZvV;?&HNe(aP79==)yp%4!2PR*)@eyuIA=QkS5 zVE>i+07Oqo@h_EV#H?Fy(O8)|l!C9H<9+dWMv>v5x<$D-5=^ohK434x5K7wB25VAa zW52rQ5r^3?)(i|EtMV(~m3Ccd{nstWj*=$=!PjO-8KZl6xOL2WzOrtZScRdEIZ`=&c7xRPVV#1=;9vFd#5@Ys4+IX&ncg*tD3mihD)U@JL*eF=4 z)g<1!et%8nX1o{iHH-jB@AxFjbE(Ny=Pn#p+7CR6?#7J*Scm@}a7f%5+&A8hP`bAH z-1$y zR-`q4m#S@wfuk$$Pnyce&0R%yrjsjSDhR(`D4w@4da2?i#vm)3y3m!Vghx!ZjX4{ll1^swTDmJ@5Zf{JahD7xQ_o&XZ7H@L&I>e@^}QYEQ*j zUhK^E|6nNNkbonkOoY{B()528G59C%DNK`Fk{9on6KbJrN6*j#;JLm!>2CcgZdSRA<)4W_qIp&Yt#sk?`KkR zDdh{vv;Zva`Wu0y9hK$f$)=F&gsZ8j{5`C1Qg6f`?e`2GoSmVWx1Lq=bP1Q7xBE;RE$EX;xktPX878AIlWBH zF|HY;Ce%m^-C*C|Voz$;*s{s0qM6FMXM6%Jl60Up6 zob686)gJ*^lSGY#MVm$1yw{DV8+b0tc(n;A01GK|71PuwmQddSA=dme-@Q$lbTL<4 zpPCO~V3ck`abq9Nl}yn4wT~-IID+IA%f;WBSPH(MCJav{@A=U=h}rYpre-alfB34+ zVQJ5E1E^Q`TQ7JVM;QRR{u%MB}#w(sC-+T86f)_%re{~ZUYf9E)6L#$Co6Hd27(CuqNRo3EE?OqsLd*Z$_I3fky( z&%JT|G5~_F_sLHrlXHEy@b<)hJO`eueC3aUi*scb7;(%8`N;+=^+b%=< zH6mgW>B;4x*K|`)$vME}C$jq`#}dFtfb&4%9l$D@eKC(^ zd7nd=DbhOaO`63IESWC?CeAc2pYL{a8^p9w@D@uKUH7G{__>F(XtHe*T(*cqHi9sF zRs*jnj`4)e&i< zhrNSin9oq;{m6h|hF&SX<5ilSu5>Nwo+m((G5dT;Tv}OI?F#->3{aJtg|=#QHzJM! zcG|}Ub4G@2T)dWpzln&5GK}`rvTbKsw%oOV?TM1%4J&%gKxN7U^bZ#6ugHArF6{QE z3%~Ntx3FvQW%rkKBCq7f<^Rbze&dBKxLAAkvI(-r0?cH}$BTQOGt$3H4ikGXE>^P; z5@uYqjjw zRR+_)gG*9pPnBQTb)_7)&8=+sK*Xb{1!F30P{9_*!n8dW+xo86) zNYT*7AG>yec>$00K)GaA@p%V}f$`}E-+TU;D~Ag}x%u_vVO?byPCR(Y1qrkf+dx~< zzX(YYg*MJyjAq`VRkWm@O!Ou|b*;pjmk;jl!X^M3+lnZwfXv1V|5WV??xvj+{mGnE zp6o%_BZx7V;qmT#ie2`aOLPR%{@^*pWK<9-f?Q&smQD_|Gi<-q+X(fp2K0Kn0h;T| z>OGR4)fvD0;SRzeox$?M8;Y>ONH**XzvdKmuU5#X7V3>HOwVZHO%{z0I2)o zp5+PF%tHkO3;#aza249wM}h0-hvz(>UZ(@3$4{gsjbZEJn-Qj`p`YWM^K_PDU3rf2 z%O8Y;mLXG7w^AWuvIAy@xY{K2F^Tf~WWb2v=u{P0D#OB?VAlJNOq7<%)XH zIpk3H*Q;Mc(JXK=E+pFDCFHfK0mokR<1BNQw7!`L5E2b?sW-ZqFwi24;eo7b$$-+n zxI0uMtkIk^Ph(!3g+e&x+KhW-Zb@HF%ZpMt9QFv~$pQp9N_N)r3LhYWw*Bs6$DSx? z-_N!y2b^l-+_W&~0BmlNSM50seOANAX8VP7-I+w6C>^G(_=Cqc2| z7BBxEEu(iKuY!UBGwdYb84gW50(yZ8*@2}8vmWt@xkyEAt%IvFTplRd#V6RVa$t3D zkOvaA_WdUC4veeD4<$v_a-*IbbAQg)M`wp{BYxU7AMA_itG(GfRvLv}2UrUD(ayc* zB*Zu5i(iWR9Yv^>Ig2;;LHZ(%WdU?*ll&f}D^;G;2Icx4?&kn)q31$v;oQiqb#DK- zuk-U~p!f~$rh4ujmB`{?arxhRGFH_X<^-LHsDXt1T9J~L3A~8x;V$Zxh5;o0#;x9c z2YADPgXTb4tLE~u1>$C@?Hl5ikEJrtS3@t+OiOjl3tAv|kXj= z&L;LYZR#JXOB*a%|1J$$DWf1fPp#-!nhi%s*{7^v=k1=s4VH}y%*5f7 zQ?D2!At+6>b!k3T`PVsu8hceHmAjWCE?2gnviC zhotnV3A7l}_%J8+2oCb2akmzCZC`Qou40BD<#7B^oyids#jhN@z$PqU)o$u(wz~xV z-TZ`$Y+%kF@L^1m`9wMI_a$IpWymLx6vwHj{+Hf2rYlv8*$S`fN`J+1FvprkBxse8 zi7d6j$9)f7L+yw9t|c-yv5mW8KZHJm)Qx&z6Gfq(j(4s5Wu!ZTrN2gkB=3OiiiEH! zrvw2*P3m8NFie!JWyS)sytsUtnf>urKao~(uXGUgm}}6zGA$wiv%-}NCxj&Es5{gT ztGt6?5DVj;EQpJUjfpo^bp*t>(!@9vWh#G(s+h0B)16sZ0b z6-*XET$ej6j0iGn2P`nh{t~J-alU5i67Khk;&ihpB0^-XVJ!zfp_#6M+r48Q_^xF} zh3)55yeEJh>t()po)V28gtW8t8N=r%G}eboz@$p*;vN43qKbr3K>K+8ClI|d0f|e~ zsTsqm^fl^f9cfyiQ+-olU+F2@2I}S)y8h3wLaN69yxXS7%34b=+~ZMGz4Mh0`yEPq zq}Xqw=$QI}Hb@RNz~d*N7677+Pz7eBcHJGZ206de&{740MTEQ4ZcwLrRR)_=_vptZ z;K?^VSEdM~C(++_~u+?e~B;>5=#|pkox9#jE2%?iaYh=mi0%mo!YZr zsW@4jOr<|=io96UauuSgVw^-IryCb(&-;UOapm>1bMQ$TRRcQjj*6&d&*8ik#XYlTWaU(grr zP`QwEW0d;0s*8SNs`A5Ib3V|S%nyU|C333#&dnEA!~f8a+@sh@kzw+kdr87JcfsY` zUP~PZ2v!h4>{>*`{4BDVvF+la)#L%q(Yyq_9g@>_pNr{tkXB5T! z1x@P?b}>Xm0mZq{r*)*mMeZOa20ua#=~Ub&R&fwO9zD*%^WRK84Ma$jFz`L12erLg&X z5V3~R{b`B6xkwd|M_M;j=0g?Rnm2lC$#NtF-J#UlYD6mo0Ljq|xabn;INygsS(ic0 z^rj12&&p^OS^MYiW}2lLN&cyZPS(rHMpK^MAi|Y{a-&F0I@a47K>5I8L=u4WwWs z+(KEmX`MDYsR}RH^s0lS%#m(xN{$N-m*MDEL$Nv>qEtKu#FiF>maLujSk9;$LJuGl z!7i-meq!K$*D-xSP1rsVoSfK8%6`{e(lYj~&0>(@0#6618JzzAtJe51M2Oir9+lJ& zhi^;g#M@90u@FL{AvjzyeTQaB^+yF?16HErIOrWfifZxxB#GSu;XQdN3beFmBUt>m z0zcK3SbN6(Cd~$MMULP60Fv>1W3-pUVMvouQaA_9vc^UY9>J(0m6i%-UNB9=nuLnV z0fXRcT)k%RgIU82sj~zh>a%LpEj~EnxLwST-nA|uGiz*5TfLnv{a~{k+B|HH!8_lf zzk66OwiJ0`_3bX6{ab4WCn$2{ zxdiGTBThkwr2@L!DG@WqjkxuEQ_6|Hci#0gI_O>2gEeA_F~nBzhf>2|k)9_O&w*Mg z@_SV_K1M=4-XVi(>&+JA8BxN~vKQ0w33`H$qO*i*%R-IpQ4$gkJ!0S}xN8&XS}#e% zPRIzg=twk>5jK0}aFzOlDG-Ettou{0ab1{rO_-XaHv)!>z2m0{^~nR8_-y?#6Ovh~ z=J=fjIW`j}#`RwB{kEEmPQwQXU*Tw~MiBAZzE@lJd_MtNbT6e3#jM#^o%%+`_u&!= zV5^n9>+nuqCb>dG{lTQAUi*s#;05{E}-qRe8}@L;AjR=M#7i+4Z8=6Bv+- z$BTTu`lt>F^|>DsnbI=W6A;Cd&933U{LQv*YIg(1j zn%#}u$!o8Wg!8e3NA$;*%u*ELS{)8{u@BrhH?ZjHcnz;*D?d@DSH<%wAd~~=tw9r| zL0_pJdty52u1!9>y3afwwvK7`282=z)hyB&L3tqXOIFmAv*qTit(or7&cQIO`kU`v zesn_X3T7hM%fOwK${x_HmT#wp>Hz2SsYiVh!jn9SSYowd~04v6#8?fL-S2jO=c(qs&J54(L&Ls1ln0m4UF{kFj!UQH0 z|8lm@lL4O6N)~Uymf7c1=GYKkh792pL6na<%V&!XlE2{D9de>edkgEWg4~ohvcwN9 zb-rsXmmQa3pJ^AOV@J;hv5v>^c+lZDZPAX^JnE?Y^>($xU6cGlrB!0|ZQkgcdrEwRzzfLn#jVGbL`P4#Rf8|8&qQpJ1XeVJs9LI})RbUEQmE zPILdB!xWN4msjrK8Yt1h!pUHt!x}}4M)ltWt9g>^3Up)LM$8Mkia)W?tEa4jMXeZw)O^A=VTzG3l^nf#cO2 zP6c&kG=z@7d4Eu6VTsxb_zRe^f~UO1^f5=lo11FTRXybrS@- z6ONIG{di{+=L%jJ(g5MaA^A9Dn(u~bF)@03oS*C>%VxXIL3?c{@*jPXR z5_^4u^c>HlvlHm>%oOr_^ikYh@gC0yeg!v6T;VXj1x%_!ge@~lt#gu-`0 znaIeNoO8@1xCwfc5Hn*#rMkcTRzrF8X?_rJrwF^RE})+A5xQxYZffcYiEDT$Q7CEn5Wewi>1ZWZ@UkscZc5LmlakgKz*w^eNR7KXp(#2+<-V)xe^^N zxQHD20}i^5tb@tXp4QvfLtP75Du{I;u9bm9u}p+D$50bf20W)ZD9X@;0n-eH?~{lvJOmgFo6 z^I@CpbVRxwh%n*)dKDH_GV{r>=TlRcEctb=;(QrE&b~ew2-ug1yqvX6RB$H$1gwWB|+ws79Idn^#e7`2Ar`IjV67r;8wGk3*;@sJSe;Gj_BEfFkX z?;c90gJ2bcZxh6wYR^a8Au(@WtS01Rcy#koDX;c}MLG#z6CZe5YOsN_QS2<{|8%4G zgyVm03Isq(|85kM6_C+yL`3hUTDVK+96DYmOvj(_)wefsXrl|2AGa~X^iU={&{i^5 z%?a7vv<6b#-$l<)7;`|(%w>bdP+_P?PJeYwVIYaj_1>R!#{t57$VlH|R6%9D_M@L5 zlAnK?+=zDuP}iOPHHNYKce9)#cZ%ciZT5p2ko~r>F^t7(i0?YadkGoYi2pdiwF68x zqOaZ}d%3RW-XP(XN`NKXIxu|32K@lE**Gy=WK9^!z=gGwaoE!&0PBn%TT|qmdm5AP z@*$RAygOU&VfhXli_E{%hwF5zV2FV* ze%-9)mapp7a4#+?z(^&t-;vcf6}(xgrcOr$V+057j61Ai%XE-jNDAzs_%XnL`j3n4 z;iJ`Xwk5a+zr^7}cI^@fLVg!pak`7_$8?R5odcjyVPn+KkTl?wlGN*zC#_`Gs~MD3{Cw*f3R&UPrGAT#nIdSi2q=fo<-Ba`s(Jqkd{7lvU8~iyTqRgxpa3C(aniY6}cI%PC5i|;MifdBu%n(oolQ2X5 z3f$?KgtbG)DL|$jvkGmnlisOxk_7W_bYvKkhxBD2>h((v0sOcv>F+K163kd* zWmUjNwT!XM_MW=7$I}4B&e4IjwD!?=Kw^jiMlo^zUG!q&H@{aH2-i7;`)-~S`wjdu zxKP@DqB~98yjOGjn@?TK$1RT?tzqdk{96&iV~fe02kzf$P0Y7cxMb7o#hU!8-jF=0 z#_HO%e?Bz`5SZ`19mn3U?{r78Y6DV? zk3LkzN40DiJ}%c3S5n#L^BNuOgx?#SdIX%|rFXJK-oFlqm%EMhH-{xzW#WDH*>5Z(8B?%65F@{ZRjHt516 zIIPq_7lwIU7uA0}?O}&PM~DR5`E80NfT*4z+d+wFypiR8S9CJm+~n+!@7;WoKO>;T zA<}nKbb?D@_yr(bDNHl}2d4^q{~|{v+p9c7PUpWdmwzJGFq@}5b^>Gn?tkR5I}}gS zlDUz5*8jqW{-v&w<^cfF2%BKn|ADx6xI_bSXV(7qMAFp%$YY=WtPD?=kl(-aSjZ8< zD!8_w*&Qr$brIE%C5DXy? zJMT%^m!u4Zl}?4{PBxNZps6+~cxILaMmz5B_HpXBlSRqfkF|b>i`UWpH9*pNI$bF( z3GAfJ)dC-2s9DimgyY;1uD_w;z^DN&Y&l+)?_uBn1po^-$-3hQutjI$0BcAerx>)& zXY5kA`<8xFbQ=q>(NL<+uJN*Cd|XTYgHtmt$Yn;l=s0_lP5Zxu99V7N!({zYf1Zri z@IJ{Wx@&*1>iBY2TiDpbR;j;xR~X%EonmS*2z?5ssq|eY?VWIJkBJ5BJ5zvQ1}fom zUkGH0=1;j%2QNN_xIlq3O&h`Ckp6^}v+WT4^m?vARD zUrwA}aa?t^wF9*ciAX(mwuht*i)#KJLJ%p#@XI)2;WOm(5#S>MW%ewMHM6WxsVM%9 z{DQduPu5}`g&dHyI{YjFaNXpq1uowFxbB^2^(;?q+`-INV$&_8vxmBDh$(;V1SU)F!~cQZWl^Z{@Ly*z9Y%)lrS26h$Z14M3`YbWkr zEayk_pW2OJQ&dKUL?-IB678d$Qt}M>R5|~lZUE=~;hmP1$Y+V1F6>1p1B@o0;VQ$% z#DR?>8UV%o!zth^^*l{)>D4TuiWI8&>ikCGKs<-*UV#<0V@sQ68}{xD%N@Vm=p;7r zjcPj4MX_(D>F4{K<4)95V`{cy%@c=HJkyTde5KoAr=kp3JNI{jL(mn4cO*V^0$RoA z)dv(gAD?`Y@=s{)I#4X=W;MHJN%vs6Gu){Q{+rAd-O*HRXmS5pLg~o-fLVhS(f$RL zCIcX(8aWG)0!qLJWReENg({9{17bGO;7WkIPw#lj ztxD?gCztC}SHRa(dmaraZj^7Hv7Vk2KI*#z%E-->Ch=9VFTVblnk^e8<#ysrGQb{o z=ti(_4PbnUp<>idE>Ql9{Ws+=r9-fjH?l>K^`ExnA0Y4pv#ZqoSk|@L4YT&AiMh@a zkrVD;&-pCsycutH0N)T3KurIRqkE^m5pfTX)v;J+ULW|7xK_7q3l+`_WkWs`m&PWi5k3ew3D;Sj;(L zzgbri7qs~%=(DgIp52L+Isrrh@r)0Zl^pR5r5|7&bgt_I-#r_Z2M{21Cj zeoZbS$o*6sC`M)o{^BF!%+uVYYBm;uHj6ri@<7&YU8!(bt+_~;aQRn&ly2;UyWup9$XEIYhTBaYNzR*Jo^@V0+^+u_+A+& zarp~le-2_4*ye8A@3BS3DeYj_hgD4gVK3&{1v=}nuFS6o>tBTO+azBwlwB`+#2s8u zwHQFtTVa3#V?#VFy;n53M2&tp-e*(5G)L`h*Wfqy@D7fDmvYHzT##Oi~U>Q`11> zWejj&Xxic)u%rT}7<7eQ^grUl7MuN(QhOm1)1=k8o=Fd~?QK(q_WOI)8jpd`hf?d} z&h^fBF*jEDb?zq|VnL2d`m{c7fK#au^v)6!_@mXQLBis9}8ZsjH9lQNpUi z=%UdCPcI@930BD-@uSW9)f{CI>Y3Ox!LJWtzW$GY59iC)HrU1r|!*HLBh?L@_An$iw)SKEcjk1S35u7DpRI7y2&z}og?@0S7Ddh^Uo zczx;C{PHae;a!OHn>VG28arPJEmJfLfw^kVGCh7Hr-Q|^*RS;Mn3b|7BWF(>-nF7_ zF#+;|h&_nX*!>SuA?Q~zBz%cBFN$Ky z=!VDbt1Y)9@Pa_$bx;on+}N*JQ(5O)SenB>F3#i#0Osx+pieY#WR=YJVfdklYkQ!w zKSZ#l3wOO!3E~d;C0HU3YrR7Pd0cvhY{!2fxtgy0&>S|Nr!P~jiY?7MMagbzBWf`- z0+1wNakThxR3^~^o%eu!q}EpP)a~N7<6Jvzq{_T6&)4U>T2@P1u~Xc0x?iz~95Qg4 zqmrEMS3k#tPu^j&-Vf;FH4HHD>ya~wwdhKYG~g{X+)Mry7U_)hI{2CEXgL;_~-}~F~t|I=N6`rG|GB1on6Q(dvO zIaT%wdCT?U-c3hW0U#sl+en!l7iQkHp7FB7A!oj8hEe=9(*a4efh{FQ`2x7BRQmmi z1@eUB5nF$D&z6sWly2?usYtdz#=7suJU7^k>pi3e?$Lg)1KTQf_Kk&3i)00@ z_g>EH_pG7?#;=P`e8vhw8V?3>{~bmKF^n=cYOH1^oo)}=i|L=W`mOvb|M4f=56Zdm zGb5Grjl)O@$8m{!ma4W{Ps9gVuYN{u$z2VLp4HbI=DflK!(D~%>UmgLvY)%WQ%vC~ z4$@PcvF&*orK85Aw7996x0x_tzsq%P5opG+ogo{|st>+zt5B~^4VAJ;!Iu&@eqf`P z^f*HmH{iSqGdV+$SqKLN78FHILMJf_JD)F#6pLz246E9Wk~ z{%`ab!>J1^oaMYG(_tQY&-2HYSLG3vJ5KkoX8+P2apu-&gyB#8OqAU9=Dp$;ae zALb^h{BUrYIknYVA-=k>)X41tD^-UsO`64Dyvo=tvz{9LQc6~4d>fJ)A~DeDcN`Gv zEh#3sR5Vdh8vNO9bGn^Fa;3=!>(Khw8)zJZ5pK%q&M=s60R z?8T$nGcZ#o_R#MTGt36!=+hT?GzT1t0c;j0M^+e(j^PeaSLnoc9t=yO@jt)_L@(K+Dg9o#`!39elK&t#FGOgFMJY@ogA^Fc zNDL=TM8Rhu#lMND%oFurciKz*6nw#{8PPV~z^JmU6;r5o$t&Txb`{pX)HPFY~a~e~vMyoRQ#02zd zxtND%J^k4=VH;nT&5WYaOyeUnl}b)|oSfd-E;$h;u^OvtAJVUHhzmMfaom?Z31X-E ziU4-B<=5@60}McTx-*=nVUVy0HD6VUqvIUOxFt|Bm9bfA+lU9b`BsI&&~JxL`Vc+? z2rd$XeC?3MFWn-F#0-v#-2R+yw-ivyK#FUYI@YP+rer<7ie7X)( zhf+gXYq-)pN)Vv>@IYyt=_3{E?+^E@J8hT@ao61EXBzX?k$^u_FxF)rZwqzK1R8;O z$$dMc_rV6cDZ=97bXT+D%GCp9#{tJV!z}wqNh`nNQ2{Gb(ST!FfqCEP5TtZ3s6@j6 zZE%0KrS;h7pXVt-%a)yye`7xopbYzlheQI7jnwR!T#b+VgTm4F5wE=+(L~xlWfTo> z3!=KkOT7Wg2C)_2x4q@sEiRb*IDAPjm~;u`$=o|!=C$mAxHy`OFXa_FmaIjbkfR?y zVAv?#RO;(=uavugcI_V@-O*UpxgzRu;Ttv)uzF5i|2!S zxtDdKW5JrY*V)ZUdq-!?G@w5oFse4>(ke~MZn}ohz{Ve8BtH{-@W#<9sL6nh%Dn0? zi_JS$Q28z@J7uX(w{6X7*x}@r8|Aw%0&Z9g4E+vf2*AIU2hr{bS0fP(Ox`DrAs<|= z^#u!cJa&05QrY*f$)R|gy!$Eu>qhDnXJoP>4$5@I%{m`&FHl(?HwlsMTC)zecFRhx z*76vHiaC_B1QOUMC4mxNe!hMZ5q?Mj;Lpftm(V(uyFe4tD52cwYgNn>(tt;_JBx9L ze@qqQ*e4tQt$u*1;`iJ@VFL%M;LJ>PJ<%hIKI}>C+0em`%;EW*sz@FWab?As-CfsZdEcCuj{*k=Ix{Ynk&3hCxdY_HA^OgEJPg38VsWX(lIG)gk?Wk_fwvUs*{JO4EUdw(bvLsyx-w%NCZpuz)BDQPmwF=9Kq(lNAVi)q%PKdO*Ll zPkvzvM^3}=brWJ987RVFbPtp}=r0okb}g_oJ_LZtz5lH8BSDc=OmG5>C3Tr&+d{dP zIV1$UIcVo5_+YH3a(t!-9jRZcI?9hvB$3eA)oHvyp4!z2z#by6ABf`Wj?T72l`x?} zz$xgBDBG~oe%wHG$bh|clSS!hN{!(AMvyG#Q|;ipW5+tN<`HZfIS$7_Hp#5mDS@hS z1sl8b535_i

- -
-
- -
- - - - - - - diff --git a/docs/archive/examples/zoom-activity-dashboard.md b/docs/archive/examples/zoom-activity-dashboard.md deleted file mode 100644 index a141f2da418a..000000000000 --- a/docs/archive/examples/zoom-activity-dashboard.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -description: Using Airbyte and Tableau ---- - -# Visualizing the Time Spent by Your Team in Zoom Calls - -In this article, we will show you how you can understand how much your team leverages Zoom, or spends time in meetings, in a couple of minutes. We will be using [Airbyte](https://airbyte.com) \(an open-source data integration platform\) and [Tableau](https://www.tableau.com) \(a business intelligence and analytics software\) for this tutorial. - -Here is what we will cover: - -1. Replicating data from Zoom to a PostgreSQL database, using Airbyte -2. Connecting the PostgreSQL database to Tableau -3. Creating charts in Tableau with Zoom data - -We will produce the following charts in Tableau: - -* Meetings per week in a team -* Hours a team spends in meetings per week -* Listing of team members with the number of meetings per week and number of hours spent in meetings, ranked -* Webinars per week in a team -* Hours a team spends in webinars per week -* Participants for all webinars in a team per week -* Listing of team members with the number of webinars per week and number of hours spent in meetings, ranked - -Let’s get started by replicating Zoom data using Airbyte. - -## Step 1: Replicating Zoom data to PostgreSQL - -### Launching Airbyte - -In order to replicate Zoom data, we will need to use [Airbyte’s Zoom connector](https://docs.airbyte.com/integrations/sources/zoom). To do this, you need to start off Airbyte’s web app by opening up your terminal and navigating to Airbyte and running: - -`docker-compose up` - -You can find more details about this in the [Getting Started FAQ](https://discuss.airbyte.io/c/faq/15) on our [Airbyte Forum](https://github.com/airbytehq/airbyte/discussions). - -This will start up Airbyte on `localhost:8000`; open that address in your browser to access the Airbyte dashboard. - -![](../../.gitbook/assets/01_airbyte-dashboard.png) - -If you haven't gone through the onboarding yet, you will be prompted to connect a source and a destination. Then just follow the instructions. If you've gone through it, then you will see the screenshot above. In the top right corner of the Airbyte dashboard, click on the **+ new source** button to add a new Airbyte source. In the screen to set up the new source, enter the source name \(we will use airbyte-zoom\) and select **Zoom** as source type. - -Choosing Zoom as **source type** will cause Airbyte to display the configuration parameters needed to set up the Zoom source. - -![](../../.gitbook/assets/02_setting-zoom-connector-name.png) - -The Zoom connector for Airbyte requires you to provide it with a Zoom JWT token. Let’s take a detour and look at how to obtain one from Zoom. - -### Obtaining a Zoom JWT Token - -To obtain a Zoom JWT Token, login to your Zoom account and go to the [Zoom Marketplace](https://marketplace.zoom.us/). If this is your first time in the marketplace, you will need to agree to the Zoom’s marketplace terms of use. - -Once you are in, you need to click on the **Develop** dropdown and then click on **Build App.** - -![](../../.gitbook/assets/03_click.png) - -Clicking on **Build App** for the first time will display a modal for you to accept the Zoom’s API license and terms of use. Do accept if you agree and you will be presented with the below screen. - -![](../../.gitbook/assets/zoom-marketplace-build-screen%20(3)%20(3).png) - -Select **JWT** as the app you want to build and click on the **Create** button on the card. You will be presented with a modal to enter the app name; type in `airbyte-zoom`. - -![](../../.gitbook/assets/05_app-name-modal.png) - -Next, click on the **Create** button on the modal. - -You will then be taken to the **App Information** page of the app you just created. Fill in the required information. - -![](../../.gitbook/assets/06_app-information.png) - -After filling in the needed information, click on the **Continue** button. You will be taken to the **App Credentials** page. Here, click on the **View JWT Token** dropdown. - -![](../../.gitbook/assets/07_view-jwt-token.png) - -There you can set the expiration time of the token \(we will leave the default 90 minutes\), and then you click on the **Copy** button of the **JWT Token**. - -After copying it, click on the **Continue** button. - -![](../../.gitbook/assets/08_activate-webhook.png) - -You will be taken to a screen to activate **Event Subscriptions**. Just leave it as is, as we won’t be needing Webhooks. Click on **Continue**, and your app should be marked as activated. - -### Connecting Zoom on Airbyte - -So let’s go back to the Airbyte web UI and provide it with the JWT token we copied from our Zoom app. - -Now click on the **Set up source** button. You will see the below success message when the connection is made successfully. - -![](../../.gitbook/assets/setup-successful%20(3)%20(2).png) - -And you will be taken to the page to add your destination. - -### Connecting PostgreSQL on Airbyte - -![](../../.gitbook/assets/10_destination.png) - -For our destination, we will be using a PostgreSQL database, since Tableau supports PostgreSQL as a data source. Click on the **add destination** button, and then in the drop down click on **+ add a new destination**. In the page that presents itself, add the destination name and choose the Postgres destination. - -![](../../.gitbook/assets/11_choose-postgres-destination.png) - -To supply Airbyte with the PostgreSQL configuration parameters needed to make a PostgreSQL destination, we will spin off a PostgreSQL container with Docker using the following command in our terminal. - -`docker run --rm --name airbyte-zoom-db -e POSTGRES_PASSWORD=password -v airbyte_zoom_data:/var/lib/postgresql/data -p 2000:5432 -d postgres` - -This will spin a docker container and persist the data we will be replicating in the PostgreSQL database in a Docker volume `airbyte_zoom_data`. - -Now, let’s supply the above credentials to the Airbyte UI requiring those credentials. - -![](../../.gitbook/assets/postgres_credentials%20(3)%20(3).png) - -Then click on the **Set up destination** button. - -After the connection has been made to your PostgreSQL database successfully, Airbyte will generate the schema of the data to be replicated in your database from the Zoom source. - -Leave all the fields checked. - -![](../../.gitbook/assets/schema%20(3)%20(3).png) - -Select a **Sync frequency** of **manual** and then click on **Set up connection**. - -After successfully making the connection, you will see your PostgreSQL destination. Click on the Launch button to start the data replication. - -![](../../.gitbook/assets/launch%20(3)%20(3).png) - -Then click on the **airbyte-zoom-destination** to see the Sync page. - -![](../../.gitbook/assets/sync-screen%20(3)%20(3).png) - -Syncing should take a few minutes or longer depending on the size of the data being replicated. Once Airbyte is done replicating the data, you will get a **succeeded** status. - -Then, you can run the following SQL command on the PostgreSQL container to confirm that the sync was done successfully. - -`docker exec airbyte-zoom-db psql -U postgres -c "SELECT * FROM public.users;"` - -Now that we have our Zoom data replicated successfully via Airbyte, let’s move on and set up Tableau to make the various visualizations and analytics we want. - -## Step 2: Connect the PostgreSQL database to Tableau - -Tableau helps people and organizations to get answers from their data. It’s a visual analytic platform that makes it easy to explore and manage data. - -To get started with Tableau, you can opt in for a [free trial period](https://www.tableau.com/products/trial) by providing your email and clicking the **DOWNLOAD FREE TRIAL** button to download the Tableau desktop app. The download should automatically detect your machine type \(Windows/Mac\). - -Go ahead and install Tableau on your machine. After the installation is complete, you will need to fill in some more details to activate your free trial. - -Once your activation is successful, you will see your Tableau dashboard. - -![](../../.gitbook/assets/tableau-dashboard%20(3)%20(3).png) - -On the sidebar menu under the **To a Server** section, click on the **More…** menu. You will see a list of datasource connectors you can connect Tableau with. - -![](../../.gitbook/assets/datasources%20(4)%20(4).png) - -Select **PostgreSQL** and you will be presented with a connection credentials modal. - -Fill in the same details of the PostgreSQL database we used as the destination in Airbyte. - -![](../../.gitbook/assets/18_fill-in-connection-details.png) - -Next, click on the **Sign In** button. If the connection was made successfully, you will see the Tableau dashboard for the database you just connected. - -_Note: If you are having trouble connecting PostgreSQL with Tableau, it might be because the driver Tableau comes with for PostgreSQL might not work for newer versions of PostgreSQL. You can download the JDBC driver for PostgreSQL_ [_here_](https://www.tableau.com/support/drivers?_ga=2.62351404.1800241672.1616922684-1838321730.1615100968) _and follow the setup instructions._ - -Now that we have replicated our Zoom data into a PostgreSQL database using Airbyte’s Zoom connector, and connected Tableau with our PostgreSQL database containing our Zoom data, let’s proceed to creating the charts we need to visualize the time spent by a team in Zoom calls. - -## Step 3: Create the charts on Tableau with the Zoom data - -### Meetings per week in a team - -To create this chart, we will need to use the count of the meetings and the **createdAt** field of the **meetings** table. Currently, we haven’t selected a table to work on in Tableau. So you will see a prompt to **Drag tables here**. - -![](../../.gitbook/assets/19_tableau-view-with-all-tables.png) - -Drag the **meetings** table from the sidebar onto the space with the prompt. - -Now that we have the meetings table, we can start building out the chart by clicking on **Sheet 1** at the bottom left of Tableau. - -![](../../.gitbook/assets/20_empty-meeting-sheet.png) - -As stated earlier, we need **Created At**, but currently it’s a String data type. Let’s change that by converting it to a data time. So right click on **Created At**, then select `ChangeDataType` and choose Date & Time. And that’s it! That field is now of type **Date** & **Time**. - -![](../../.gitbook/assets/21_change-to-date-time.png) - -Next, drag **Created At** to **Columns**. - -![](../../.gitbook/assets/22_drag-created-at.png) - -Currently, we get the Created At in **YEAR**, but per our requirement we want them in Weeks, so right click on the **YEAR\(Created At\)** and choose **Week Number**. - -![](../../.gitbook/assets/change-to-per-week%20(3)%20(3).png) - -Tableau should now look like this: - -![](../../.gitbook/assets/24_meetings-per-week.png) - -Now, to finish up, we need to add the **meetings\(Count\) measure** Tableau already calculated for us in the **Rows** section. So drag **meetings\(Count\)** onto the Columns section to complete the chart. - -![](../../.gitbook/assets/evolution-of-meetings-per-week%20(3)%20(3).png) - -And now we are done with the very first chart. Let's save the sheet and create a new Dashboard that we will add this sheet to as well as the others we will be creating. - -Currently the sheet shows **Sheet 1**; right click on **Sheet 1** at the bottom left and rename it to **Weekly Meetings**. - -To create our Dashboard, we can right click on the sheet we just renamed and choose **new Dashboard**. Rename the Dashboard to Zoom Dashboard and drag the sheet into it to have something like this: - -![](../../.gitbook/assets/26_zoom-dashboard.png) - -Now that we have this first chart out of the way, we just need to replicate most of the process we used for this one to create the other charts. Because the steps are so similar, we will mostly be showing the finished screenshots of the charts except when we need to conform to the chart requirements. - -### Hours a team spends in meetings per week - -For this chart, we need the sum of the duration spent in weekly meetings. We already have a Duration field, which is currently displaying durations in minutes. We can derive a calculated field off this field since we want the duration in hours \(we just need to divide the duration field by 60\). - -To do this, right click on the Duration field and select **create**, then click on **calculatedField**. Change the name to **Duration in Hours**, and then the calculation should be **\[Duration\]/60**. Click ok to create the field. - -So now we can drag the Duration in Hours and Created At fields onto your sheet like so: - -![](../../.gitbook/assets/27_hours-spent-in-weekly-meetings.png) - -Note: We are adding a filter on the Duration to filter out null values. You can do this by right clicking on the **SUM\(Duration\)** pill and clicking filter, then make sure the **include null values** checkbox is unchecked. - -### Participants for all meetings per week - -For this chart, we will need to have a calculated field called **\# of meetings attended**, which will be an aggregate of the counts of rows matching a particular user's email in the `report_meeting_participants` table plotted against the **Created At** field of the **meetings** table. To get this done, right click on the **User Email** field. Select **create** and click on **calculatedField**, then enter the title of the field as **\# of meetings attended**. Next, enter the below formula: - -`COUNT(IF [User Email] == [User Email] THEN [Id (Report Meeting Participants)] END)` - -Then click on apply. Finally, drag the **Created At** fields \(make sure it’s on the **Weekly** number\) and the calculated field you just created to match the below screenshot: - -![](../../.gitbook/assets/number_of_participants_per_weekly_meetings.png) - -### Listing of team members with the number of meetings per week and number of hours spent in meetings, ranked. - -To get this chart, we need to create a relationship between the **meetings table** and the `report_meeting_participants` table. You can do this by dragging the `report_meeting_participants` table in as a source alongside the **meetings** table and relate both via the **meeting id**. Then you will be able to create a new worksheet that looks like this: - -![](../../.gitbook/assets/meetings-participant-ranked%20(3)%20(3).png) - -Note: To achieve the ranking, we simply use the sort menu icon on the top menu bar. - -### Webinars per week in a team - -The rest of the charts will be needing the **webinars** and `report_webinar_participants` tables. Similar to the number of meetings per week in a team, we will be plotting the Count of webinars against the **Created At** property. - -![](../../.gitbook/assets/30_weekly-webinars.png) - -### Hours a week spends in webinars per week - -For this chart, as for the meeting’s counterpart, we will get a calculated field off the Duration field to get the **Webinar Duration in Hours**, and then plot **Created At** against the **Sum of Webinar Duration in Hours**, as shown in the screenshot below. Note: Make sure you create a new sheet for each of these graphs. - -### Participants for all webinars per week - -This calculation is the same as the number of participants for all meetings per week, but instead of using the **meetings** and `report_meeting_participants` tables, we will use the webinars and `report_webinar_participants` tables. - -Also, the formula will now be: - -`COUNT(IF [User Email] == [User Email] THEN [Id (Report Webinar Participants)] END)` - -Below is the chart: - -![](../../.gitbook/assets/32_number_of_webinar_attended_per_week.png) - -#### Listing of team members with the number of webinars per week and number of hours spent in meetings, ranked - -Below is the chart with these specs - -![](../../.gitbook/assets/33_number-of-webinars-participants.png) - -## Conclusion - -In this article, we see how we can use Airbyte to get data off the Zoom API onto a PostgreSQL database, and then use that data to create some chart visualizations in Tableau. - -You can leverage Airbyte and Tableau to produce graphs on any collaboration tool. We just used Zoom to illustrate how it can be done. Hope this is helpful! - diff --git a/docs/archive/faq/README.md b/docs/archive/faq/README.md deleted file mode 100644 index 1f6a217b74c7..000000000000 --- a/docs/archive/faq/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# FAQ - -Our FAQ is now a section on our Airbyte Forum. Check it out [here](https://github.com/airbytehq/airbyte/discussions)! - -If you don't see your question answered, feel free to open up a new topic for it. \ No newline at end of file diff --git a/docs/archive/faq/data-loading.md b/docs/archive/faq/data-loading.md deleted file mode 100644 index 4ae20d834edc..000000000000 --- a/docs/archive/faq/data-loading.md +++ /dev/null @@ -1,124 +0,0 @@ -# Data Loading - -## **Why don’t I see any data in my destination yet?** - -It can take a while for Airbyte to load data into your destination. Some sources have restrictive API limits which constrain how much -data we can sync in a given time. Large amounts of data in your source can also make the initial sync take longer. You can check your -sync status in your connection detail page that you can access through the destination detail page or the source one. - -## **Why my final tables are being recreated everytime?** - -Airbyte ingests data into raw tables and applies the process of normalization if you selected it in the connection page. -The normalization runs a full refresh each sync and for some destinations like Snowflake, Redshift, Bigquery this may incur more -resource consumption and more costs. You need to pay attention to the frequency that you're retrieving your data to avoid issues. -For example, if you create a connection to sync every 5 minutes with incremental sync on, it will only retrieve new records into the raw tables but will apply normalization -to *all* the data in every sync! If you have tons of data, this may not be the right sync frequency for you. - -There is a [Github issue](https://github.com/airbytehq/airbyte/issues/4286) to implement normalization using incremental, which will reduce -costs and resources in your destination. - -## **What happens if a sync fails?** - -You won't lose data when a sync fails, however, no data will be added or updated in your destination. - -Airbyte will automatically attempt to replicate data 3 times. You can see and export the logs for those attempts in the connection -detail page. You can access this page through the Source or Destination detail page. - -You can configure a Slack webhook to warn you when a sync fails. - -In the future you will be able to configure other notification method (email, Sentry) and an option to create a -GitHub issue with the logs. We’re still working on it, and the purpose would be to help the community and the Airbyte team to fix the -issue as soon as possible, especially if it is a connector issue. - -Until Airbyte has this system in place, here is what you can do: - -* File a GitHub issue: go [here](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=type%2Fbug&template=bug-report.md&title=) - and file an issue with the detailed logs copied in the issue’s description. The team will be notified about your issue and will update - it for any progress or comment on it. -* Fix the issue yourself: Airbyte is open source so you don’t need to wait for anybody to fix your issue if it is important to you. - To do so, just fork the [GitHub project](https://github.com/airbytehq/airbyte) and fix the piece of code that need fixing. If you’re okay - with contributing your fix to the community, you can submit a pull request. We will review it ASAP. -* Ask on Slack: don’t hesitate to ping the team on [Slack](https://slack.airbyte.io). - -Once all this is done, Airbyte resumes your sync from where it left off. - -We truly appreciate any contribution you make to help the community. Airbyte will become the open-source standard only if everybody participates. - -## **Can Airbyte support 2-way sync i.e. changes from A go to B and changes from B go to A?** - -Airbyte actually does not support this right now. There are some details around how we handle schema and tables names that isn't going to -work for you in the current iteration. -If you attempt to do a circular dependency between source and destination, you'll end up with the following -A.public.table_foo writes to B.public.public_table_foo to A.public.public_public_table_foo. You won't be writing into your original table, -which I think is your intention. - - -## **What happens to data in the pipeline if the destination gets disconnected? Could I lose data, or wind up with duplicate data when the pipeline is reconnected?** - -Airbyte is architected to prevent data loss or duplication. Airbyte will display a failure for the sync, and re-attempt it at the next syncing, -according to the frequency you set. - -## **How frequently can Airbyte sync data?** - -You can adjust the load time to run as frequent as every hour or as infrequent as once a year using [Cron expressions](https://docs.airbyte.com/cloud/managing-airbyte-cloud/edit-stream-configuration). - -## **Why wouldn’t I choose to load all of my data more frequently?** - -While frequent data loads will give you more up-to-date data, there are a few reasons you wouldn’t want to load your too frequently, including: - -* Higher API usage may cause you to hit a limit that could impact other systems that rely on that API. -* Higher cost of loading data into your warehouse. -* More frequent delays, resulting in increased delay notification emails. For instance, if the data source generally takes several hours to - update but you wanted five-minute increments, you may receive a delay notification every sync. - -Generally is recommended setting the incremental loads to every hour to help limit API calls. - -## **Is there a way to know the estimated time to completion for the first historic sync?** - -Unfortunately not yet. - -## **Do you support change data capture \(CDC\) or logical replication for databases?** - -Airbyte currently supports [CDC for Postgres and Mysql](../../understanding-airbyte/cdc.md). Airbyte is adding support for a few other -databases you can check in the roadmap. - -## Using incremental sync, is it possible to add more fields when some new columns are added to a source table, or when a new table is added? - -For the moment, incremental sync doesn't support schema changes, so you would need to perform a full refresh whenever that happens. -Here’s a related [Github issue](https://github.com/airbytehq/airbyte/issues/1601). - -## There is a limit of how many tables one connection can handle? - -Yes, for more than 6000 thousand tables could be a problem to load the information on UI. - -There are two Github issues about this limitation: [Issue #3942](https://github.com/airbytehq/airbyte/issues/3942) -and [Issue #3943](https://github.com/airbytehq/airbyte/issues/3943). - -## Help, Airbyte is hanging/taking a long time to discover my source's schema! - -This usually happens for database sources that contain a lot of tables. This should resolve itself in half an hour or so. - -If the source contains more than 6k tables, see the [above question](#there-is-a-limit-of-how-many-tables-one-connection-can-handle). - -There is a known issue with [Oracle databases](https://github.com/airbytehq/airbyte/issues/4944). - -## **I see you support a lot of connectors – what about connectors Airbyte doesn’t support yet?** - -You can either: - -* Submit a [connector request](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=area%2Fintegration%2C+new-integration&template=new-integration-request.md&title=) on our Github project, and be notified once we or the community build a connector for it. -* Build a connector yourself by forking our [GitHub project](https://github.com/airbytehq/airbyte) and submitting a pull request. Here - are the [instructions how to build a connector](../../contributing-to-airbyte/README.md). -* Ask on Slack: don’t hesitate to ping the team on [Slack](https://slack.airbyte.io). - -## **What kind of notifications do I get?** - -For the moment, the UI will only display one kind of notification: when a sync fails, Airbyte will display the failure at the source/destination -level in the list of sources/destinations, and in the connection detail page along with the logs. - -However, there are other types of notifications: - -* When a connector that you use is no longer up to date -* When your connections fails -* When core isn't up to date - diff --git a/docs/archive/faq/deploying-on-other-os.md b/docs/archive/faq/deploying-on-other-os.md deleted file mode 100644 index 0b493c3db200..000000000000 --- a/docs/archive/faq/deploying-on-other-os.md +++ /dev/null @@ -1,40 +0,0 @@ -# Deploying Airbyte on a Non-Standard Operating System - -## CentOS 8 - -From clean install: - -``` -firewall-cmd --zone=public --add-port=8000/tcp --permanent -firewall-cmd --zone=public --add-port=8001/tcp --permanent -firewall-cmd --zone=public --add-port=7233/tcp --permanent -systemctl restart firewalld -``` -OR... if you prefer iptables: -``` -iptables -A INPUT -p tcp -m tcp --dport 8000 -j ACCEPT -iptables -A INPUT -p tcp -m tcp --dport 8001 -j ACCEPT -iptables -A INPUT -p tcp -m tcp --dport 7233 -j ACCEPT -systemctl restart iptables -``` -Setup the docker repo: -``` -dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo` -dnf install docker-ce --nobest -systemctl enable --now docker -usermod -aG docker $USER -``` -You'll need to get docker-compose separately. -``` -dnf install wget git curl -curl -L https://github.com/docker/compose/releases/download/1.25.0/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose -chmod +x /usr/local/bin/docker-compose -``` -Now we can install Airbyte. In this example, we will install it under `/opt/` -``` -cd /opt -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -docker-compose up -docker-compose ps -``` \ No newline at end of file diff --git a/docs/archive/faq/differences-with/README.md b/docs/archive/faq/differences-with/README.md deleted file mode 100644 index d020cfd1db38..000000000000 --- a/docs/archive/faq/differences-with/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Differences with - diff --git a/docs/archive/faq/differences-with/fivetran-vs-airbyte.md b/docs/archive/faq/differences-with/fivetran-vs-airbyte.md deleted file mode 100644 index 9a9fe1045660..000000000000 --- a/docs/archive/faq/differences-with/fivetran-vs-airbyte.md +++ /dev/null @@ -1,27 +0,0 @@ -# Fivetran vs Airbyte - -We wrote an article, “[Open-source vs. Commercial Software: How to Solve the Data Integration Problem](https://airbyte.com/articles/data-engineering-thoughts/open-source-vs-commercial-software-how-to-better-solve-data-integration/),” in which we describe the pros and cons of Fivetran’s commercial approach and Airbyte’s open-source approach. Don’t hesitate to check it out for more detailed arguments. As a summary, here are the differences: - -![](https://airbyte.com/wp-content/uploads/2021/01/Airbyte-vs-Fivetran.png) - -## **Fivetran:** - -* **Limited high-quality connectors:** after 8 years in business, Fivetran supports 150 connectors. The more connectors, the more difficult it is for Fivetran to keep the same level of maintenance across all connectors. They will always have a ROI consideration to maintaining long-tailed connectors. -* **Pricing indexed on usage:** Fivetran’s pricing is indexed on the number of active rows \(rows added or edited\) per month. Teams always need to keep that in mind and are not free to move data without thinking about cost, as the costs can grow fast. -* **Security and privacy compliance:** all companies are subject to privacy compliance laws, such as GDPR, CCPA, HIPAA, etc. As a matter of fact, above a certain stage \(about 100 employees\) in a company, all external products need to go through a security compliance process that can take several months. -* **No moving data between internal databases:** Fivetran sits in the cloud, so if you have to replicate data from an internal database to another, it makes no sense to have the data move through them \(Fivetran\) for privacy and cost reasons. - -## **Airbyte:** - -* **Free, as open source, so no more pricing based on usage**: learn more about our [future business model](https://handbook.airbyte.io/strategy/business-model) \(connectors will always remain open source\). -* **Supporting 60 connectors within 8 months from inception**. Our goal is to reach 200+ connectors by the end of 2021. -* **Building new connectors made trivial, in the language of your choice:** Airbyte makes it a lot easier to create your own connector, vs. building them yourself in-house \(with Airflow or other tools\). Scheduling, orchestration, and monitoring comes out of the box with Airbyte. -* **Addressing the long tail of connectors:** with the help of the community, Airbyte ambitions to support thousands of connectors. -* **Adapt existing connectors to your needs:** you can adapt any existing connector to address your own unique edge case. -* **Using data integration in a workflow:** Airbyte’s API lets engineering teams add data integration jobs into their workflow seamlessly. -* **Integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. -* **Debugging autonomy:** if you experience any connector issue, you won’t need to wait for Fivetran’s customer support team to get back to you, if you can fix the issue fast yourself. -* **No more security and privacy compliance, as self-hosted, source-available and open-sourced \(MIT\)**. Any team can directly address their integration needs. - -Your data stays in your cloud. Have full control over your data, and the costs of your data transfers. - diff --git a/docs/archive/faq/differences-with/meltano-vs-airbyte.md b/docs/archive/faq/differences-with/meltano-vs-airbyte.md deleted file mode 100644 index f8e2ff5fba64..000000000000 --- a/docs/archive/faq/differences-with/meltano-vs-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Meltano vs Airbyte - -We wrote an article, “[The State of Open-Source Data Integration and ETL](https://airbyte.com/articles/data-engineering-thoughts/the-state-of-open-source-data-integration-and-etl/),” in which we list and compare all ETL-related open-source projects, including Meltano and Airbyte. Don’t hesitate to check it out for more detailed arguments. As a summary, here are the differences: - -## **Meltano:** - -* **Meltano is built on top of the Singer protocol, whereas Airbyte is built on top of the Airbyte protocol**. Having initially created Airbyte on top of Singer, we wrote about why we didn't move forward with it [here](https://airbyte.com/blog/why-you-should-not-build-your-data-pipeline-on-top-of-singer) and [here](https://airbyte.com/blog/airbyte-vs-singer-why-airbyte-is-not-built-on-top-of-singer). Summarized, the reasons were: Singer connectors didn't always adhere to the Singer protocol, had poor standardization and visibility in terms of quality, and community governance and support was abandoned by Stitch. By contrast, we aim to make Airbyte a product that ["just works"](https://airbyte.com/blog/our-truth-for-2021-airbyte-just-works) and always plan to maximize engagement within the Airbyte community. -* **CLI-first approach:** Meltano was primarily built with a command line interface in mind. In that sense, they seem to target engineers with a preference for that interface. -* **Integration with Airflow for orchestration:** You can either use Meltano alone for orchestration or with Airflow; Meltano works both ways. -* All connectors must use Python. -* Meltano works with any of Singer's 200+ available connectors. However, in our experience, quality has been hit or miss. - -## **Airbyte:** - -In contrast, Airbyte is a company fully committed to the open-source project and has a [business model](https://handbook.airbyte.io/strategy/business-model) in mind around this project. Our [team](https://airbyte.com/about-us) are data integration experts that have built more than 1,000 integrations collectively at large scale. The team now counts 20 engineers working full-time on Airbyte. - -* **Airbyte supports more than 100 connectors after only 1 year since its inception**, 20% of which were built by the community. Our ambition is to support **200+ connectors by the end of 2021.** -* Airbyte’s connectors are **usable out of the box through a UI and API,** with monitoring, scheduling and orchestration. Airbyte was built on the premise that a user, whatever their background, should be able to move data in 2 minutes. Data engineers might want to use raw data and their own transformation processes, or to use Airbyte’s API to include data integration in their workflows. On the other hand, analysts and data scientists might want to use normalized consolidated data in their database or data warehouses. Airbyte supports all these use cases. -* **One platform, one project with standards:** This will help consolidate the developments behind one single project, some standardization and specific data protocol that can benefit all teams and specific cases. -* **Not limited by Singer’s data protocol:** In contrast to Meltano, Airbyte was not built on top of Singer, but its data protocol is compatible with Singer’s. This means Airbyte can go beyond Singer, but Meltano will remain limited. -* **Connectors can be built in the language of your choice,** as Airbyte runs them as Docker containers. -* **Airbyte integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. - -## **Other noteworthy differences:** - -* In terms of community, Meltano's Slack community got 430 new members in the last 6 months, while Airbyte got 800. -* The difference in velocity in terms of feature progress is easily measurable as both are open-source projects. Meltano closes about 30 issues per month, while Airbyte closes about 120. - diff --git a/docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md b/docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md deleted file mode 100644 index adcc9c2bf376..000000000000 --- a/docs/archive/faq/differences-with/pipelinewise-vs-airbyte.md +++ /dev/null @@ -1,25 +0,0 @@ -# Pipelinewise vs Airbyte - -## **PipelineWise:** - -PipelineWise is an open-source project by Transferwise that was built with the primary goal of serving their own needs. There is no business model attached to the project, and no apparent interest in growing the community. - -* **Supports 21 connectors,** and only adds new ones based on the needs of the mother company, Transferwise. -* **No business model attached to the project,** and no apparent interest from the company in growing the community. -* **As close to the original format as possible:** PipelineWise aims to reproduce the data from the source to an Analytics-Data-Store in as close to the original format as possible. Some minor load time transformations are supported, but complex mapping and joins have to be done in the Analytics-Data-Store to extract meaning. -* **Managed Schema Changes:** When source data changes, PipelineWise detects the change and alters the schema in your Analytics-Data-Store automatically. -* **YAML based configuration:** Data pipelines are defined as YAML files, ensuring that the entire configuration is kept under version control. -* **Lightweight:** No daemons or database setup are required. - -## **Airbyte:** - -In contrast, Airbyte is a company fully committed to the open-source project and has a [business model in mind](https://handbook.airbyte.io/) around this project. - -* Our ambition is to support **300+ connectors by the end of 2021.** We already supported about 50 connectors at the end of 2020, just 5 months after its inception. -* Airbyte’s connectors are **usable out of the box through a UI and API,** with monitoring, scheduling and orchestration. Airbyte was built on the premise that a user, whatever their background, should be able to move data in 2 minutes. Data engineers might want to use raw data and their own transformation processes, or to use Airbyte’s API to include data integration in their workflows. On the other hand, analysts and data scientists might want to use normalized consolidated data in their database or data warehouses. Airbyte supports all these use cases. -* **One platform, one project with standards:** This will help consolidate the developments behind one single project, some standardization and specific data protocol that can benefit all teams and specific cases. -* **Connectors can be built in the language of your choice,** as Airbyte runs them as Docker containers. -* **Airbyte integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. - -The data protocols for both projects are compatible with Singer’s. So it is easy to migrate a Singer tap or target onto Airbyte or PipelineWise. - diff --git a/docs/archive/faq/differences-with/singer-vs-airbyte.md b/docs/archive/faq/differences-with/singer-vs-airbyte.md deleted file mode 100644 index 58edd43eedb0..000000000000 --- a/docs/archive/faq/differences-with/singer-vs-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Singer vs Airbyte - -If you want to understand the difference between Airbyte and Singer, you might be interested in 2 articles we wrote: - -* “[Airbyte vs. Singer: Why Airbyte is not built on top of Singer](https://airbyte.com/articles/data-engineering-thoughts/airbyte-vs-singer-why-airbyte-is-not-built-on-top-of-singer/).” -* “[The State of Open-Source Data Integration and ETL](https://airbyte.com/articles/data-engineering-thoughts/the-state-of-open-source-data-integration-and-etl/),” in which we list and compare all ETL-related open-source projects, including Singer and Airbyte. As a summary, here are the differences: - -![](https://airbyte.com/wp-content/uploads/2020/10/Landscape-of-open-source-data-integration-platforms-4.png) - -## **Singer:** - -* **Supports 96 connectors after 4 years.** -* **Increasingly outdated connectors:** Talend \(acquirer of StitchData\) seems to have stopped investing in maintaining Singer’s community and connectors. As most connectors see schema changes several times a year, more and more Singer’s taps and targets are not actively maintained and are becoming outdated. -* **Absence of standardization:** each connector is its own open-source project. So you never know the quality of a tap or target until you have actually used it. There is no guarantee whatsoever about what you’ll get. -* **Singer’s connectors are standalone binaries:** you still need to build everything around to make them work \(e.g. UI, configuration validation, state management, normalization, schema migration, monitoring, etc\). -* **No full commitment to open sourcing all connectors,** as some connectors are only offered by StitchData under a paid plan. _\*\*_ - -## **Airbyte:** - -* Our ambition is to support **300+ connectors by the end of 2021.** We already supported about 50 connectors at the end of 2020, just 5 months after its inception. -* Airbyte’s connectors are **usable out of the box through a UI and API**, with monitoring, scheduling and orchestration. Airbyte was built on the premise that a user, whatever their background, should be able to move data in 2 minutes. Data engineers might want to use raw data and their own transformation processes, or to use Airbyte’s API to include data integration in their workflows. On the other hand, analysts and data scientists might want to use normalized consolidated data in their database or data warehouses. Airbyte supports all these use cases. -* **One platform, one project with standards:** This will help consolidate the developments behind one single project, some standardization and specific data protocol that can benefit all teams and specific cases. -* **Connectors can be built in the language of your choice,** as Airbyte runs them as Docker containers. -* **Airbyte integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. -* **A full commitment to the open-source MIT project** with the promise not to hide some connectors behind paid walls. - -Note that Airbyte’s data protocol is compatible with Singer’s. So it is easy to migrate a Singer tap onto Airbyte. - diff --git a/docs/archive/faq/differences-with/stitchdata-vs-airbyte.md b/docs/archive/faq/differences-with/stitchdata-vs-airbyte.md deleted file mode 100644 index ec612ea9b2b1..000000000000 --- a/docs/archive/faq/differences-with/stitchdata-vs-airbyte.md +++ /dev/null @@ -1,29 +0,0 @@ -# StitchData vs Airbyte - -We wrote an article, “[Open-source vs. Commercial Software: How to Solve the Data Integration Problem](https://airbyte.com/articles/data-engineering-thoughts/open-source-vs-commercial-software-how-to-better-solve-data-integration/),” in which we describe the pros and cons of StitchData’s commercial approach and Airbyte’s open-source approach. Don’t hesitate to check it out for more detailed arguments. As a summary, here are the differences: - -![](https://airbyte.com/wp-content/uploads/2020/10/Open-source-vs-commercial-approach-2048x1843.png) - -## StitchData: - -* **Limited deprecating connectors:** Stitch only supports 150 connectors. Talend has stopped investing in StitchData and its connectors. And on Singer, each connector is its own open-source project. So you never know the quality of a tap or target until you have actually used it. There is no guarantee whatsoever about what you’ll get. -* **Pricing indexed on usage:** StitchData’s pricing is indexed on the connectors used and the volume of data transferred. Teams always need to keep that in mind and are not free to move data without thinking about cost. -* **Security and privacy compliance:** all companies are subject to privacy compliance laws, such as GDPR, CCPA, HIPAA, etc. As a matter of fact, above a certain stage \(about 100 employees\) in a company, all external products need to go through a security compliance process that can take several months. -* **No moving data between internal databases:** StitchData sits in the cloud, so if you have to replicate data from an internal database to another, it makes no sense to have the data move through their cloud for privacy and cost reasons. -* **StitchData’s Singer connectors are standalone binaries:** you still need to build everything around to make them work. And it’s hard to update some pre-built connectors, as they are of poor quality. - -## Airbyte: - -* **Free, as open source, so no more pricing based on usage:** learn more about our [future business model](https://handbook.airbyte.io/strategy/business-model) \(connectors will always remain open-source\). -* **Supporting 50+ connectors by the end of 2020** \(so in only 5 months of existence\). Our goal is to reach 300+ connectors by the end of 2021. -* **Building new connectors made trivial, in the language of your choice:** Airbyte makes it a lot easier to create your own connector, vs. building them yourself in-house \(with Airflow or other tools\). Scheduling, orchestration, and monitoring comes out of the box with Airbyte. -* **Maintenance-free connectors you can use in minutes.** Just authenticate your sources and warehouse, and get connectors that adapt to schema and API changes for you. -* **Addressing the long tail of connectors:** with the help of the community, Airbyte ambitions to support thousands of connectors. -* **Adapt existing connectors to your needs:** you can adapt any existing connector to address your own unique edge case. -* **Using data integration in a workflow:** Airbyte’s API lets engineering teams add data integration jobs into their workflow seamlessly. -* **Integrates with your data stack and your needs:** Airflow, Kubernetes, dbt, etc. Its normalization is optional, it gives you a basic version that works out of the box, but also allows you to use dbt to do more complicated things. -* **Debugging autonomy:** if you experience any connector issue, you won’t need to wait for Fivetran’s customer support team to get back to you, if you can fix the issue fast yourself. -* **Your data stays in your cloud.** Have full control over your data, and the costs of your data transfers. -* **No more security and privacy compliance, as self-hosted and open-sourced \(MIT\).** Any team can directly address their integration needs. -* **Premium support directly on our Slack for free**. Our time to resolution is about 3-4 hours in average. - diff --git a/docs/archive/faq/getting-started.md b/docs/archive/faq/getting-started.md deleted file mode 100644 index fd4ce42d47f6..000000000000 --- a/docs/archive/faq/getting-started.md +++ /dev/null @@ -1,50 +0,0 @@ -# Getting Started - -## **What do I need to get started using Airbyte?** - -You can deploy Airbyte in several ways, as [documented here](../../deploying-airbyte/README.md). Airbyte will then help you replicate data between a source and a destination. If you don’t see the connector you need, you can [build your connector yourself](../../connector-development) and benefit from Airbyte’s optional scheduling, orchestration and monitoring modules. - -## **How long does it take to set up Airbyte?** - -It depends on your source and destination. Check our setup guides to see the tasks for your source and destination. Each source and destination also has a list of prerequisites for setup. To make setup faster, get your prerequisites ready before you start to set up your connector. During the setup process, you may need to contact others \(like a database administrator or AWS account owner\) for help, which might slow you down. But if you have access to the connection information, it can take 2 minutes: see [demo video. ](https://www.youtube.com/watch?v=jWVYpUV9vEg) - -## **What data sources does Airbyte offer connectors for?** - -We already offer 100+ connectors, and will focus all our effort in ramping up the number of connectors and strengthening them. If you don’t see a source you need, you can file a [connector request here](https://github.com/airbytehq/airbyte/issues/new?assignees=&labels=area%2Fintegration%2C+new-integration&template=new-integration-request.md&title=). - -## **Where can I see my data in Airbyte?** - -You can’t see your data in Airbyte, because we don’t store it. The sync loads your data into your destination \(data warehouse, data lake, etc.\). While you can’t see your data directly in Airbyte, you can check your schema and sync status on the source detail page in Airbyte. - -## **Can I add multiple destinations?** - -Sure, you can. Just go to the "Destinations" section and click on the top right "+ new destination" button. You can have multiple destinations for the same source, and multiple sources for the same destination. - -## Am I limited to GUI interaction or is there a way to set up / run / interact with Airbyte programmatically? - -You can use the API to do anything you do today from the UI. Though, word of notice, the API is in alpha and may change. You won’t lose any functionality, but you may need to update your code to catch up to any backwards incompatible changes in the API. - -## How does Airbyte handle connecting to databases that are behind a firewall / NAT? - -We don’t. Airbyte is to be self-hosted in your own private cloud. - -## Can I set a start time for my integration? - -[Here](../../understanding-airbyte/connections#sync-schedules) is the link to the docs on scheduling syncs. - -## **Can I disable analytics in Airbyte?** - -Yes, you can control what's sent outside of Airbyte for analytics purposes. - -We added the following telemetry to Airbyte to ensure the best experience for users: - -* Measure usage of features & connectors -* Measure failure rate of connectors to address bugs quickly -* Reach out to our users about Airbyte community updates if they opt-in -* ... - -To disable telemetry, modify the `.env` file and define the two following environment variables: - -```text -TRACKING_STRATEGY=logging -``` diff --git a/docs/archive/faq/security-and-data-audits.md b/docs/archive/faq/security-and-data-audits.md deleted file mode 100644 index e56db4de7ac3..000000000000 --- a/docs/archive/faq/security-and-data-audits.md +++ /dev/null @@ -1,14 +0,0 @@ -# Security & Data Audits - -## **How secure is Airbyte?** - -Airbyte is an open-source self-hosted solution, so let’s say it is as safe as your data infrastructure. _\*\*_ - -## **Is Airbyte GDPR compliant?** - -Airbyte is a self-hosted solution, so it doesn’t bring any security or privacy risk to your infrastructure. We do intend to add data quality and privacy compliance features in the future, in order to give you more visibility on that topic. - -## **How does Airbyte charge?** - -We don’t. All connectors are all under the MIT license. If you are curious about the business model we have in mind, please check our [company handbook](https://handbook.airbyte.io/strategy/business-model). - diff --git a/docs/archive/faq/transformation-and-schemas.md b/docs/archive/faq/transformation-and-schemas.md deleted file mode 100644 index 554b11b558fd..000000000000 --- a/docs/archive/faq/transformation-and-schemas.md +++ /dev/null @@ -1,20 +0,0 @@ -# Transformation and Schemas - -## **Where's the T in Airbyte’s ETL tool?** - -Airbyte is actually an ELT tool, and you have the freedom to use it as an EL-only tool. The transformation part is done by default, but it is optional. You can choose to receive the data in raw \(JSON file for instance\) in your destination. - -We do provide normalization \(if option is still on\) so that data analysts / scientists / any users of the data can use it without much effort. - -We also intend to integrate deeply with dbt to make it easier for your team to continue relying you on them, if this was what you were doing. - -## **How does Airbyte handle replication when a data source changes its schema?** - -Airbyte continues to sync data using the configured schema until that schema is updated. Because Airbyte treats all fields as optional, if a field is renamed or deleted in the source, that field simply will no longer be replicated, but all remaining fields will. The same is true for streams as well. - -For now, the schema can only be updated manually in the UI \(by clicking "Update Schema" in the settings page for the connection\). When a schema is updated Airbyte will re-sync all data for that source using the new schema. - -## **How does Airbyte handle namespaces \(or schemas for the DB-inclined\)?** - -Airbyte respects source-defined namespaces when syncing data with a namespace-supported destination. See [this](../../understanding-airbyte/namespaces.md) for more details. - diff --git a/docs/archive/mongodb.md b/docs/archive/mongodb.md deleted file mode 100644 index d239da867673..000000000000 --- a/docs/archive/mongodb.md +++ /dev/null @@ -1,102 +0,0 @@ -# Mongo DB - -The MongoDB source supports Full Refresh and Incremental sync strategies. - -## Resulting schema - -MongoDB does not have anything like table definition, thus we have to define column types from actual attributes and their values. Discover phase have two steps: - -### Step 1. Find all unique properties - -Connector runs the map-reduce command which returns all unique document props in the collection. Map-reduce approach should be sufficient even for large clusters. - -#### Note - -To work with Atlas MongoDB, a **non-free** tier is required, as the free tier does not support the ability to perform the mapReduce operation. - -### Step 2. Determine property types - -For each property found, connector selects 10k documents from the collection where this property is not empty. If all the selected values have the same type - connector will set appropriate type to the property. In all other cases connector will fallback to `string` type. - -## Features - -| Feature | Supported | -| :--- | :--- | -| Full Refresh Sync | Yes | -| Incremental - Append Sync | Yes | -| Replicate Incremental Deletes | No | -| Namespaces | No | - -### Full Refresh sync - -Works as usual full refresh sync. - -### Incremental sync - -Cursor field can not be nested. Currently only top level document properties are supported. - -Cursor should **never** be blank. In case cursor is blank - the incremental sync results might be unpredictable and will totally rely on MongoDB comparison algorithm. - -Only `datetime` and `integer` cursor types are supported. Cursor type is determined based on the cursor field name: - -* `datetime` - if cursor field name contains a string from: `time`, `date`, `_at`, `timestamp`, `ts` -* `integer` - otherwise - -## Getting started - -This guide describes in details how you can configure MongoDB for integration with Airbyte. - -### Create users - -Run `mongo` shell, switch to `admin` database and create a `READ_ONLY_USER`. `READ_ONLY_USER` will be used for Airbyte integration. Please make sure that user has read-only privileges. - -```javascript -mongo -use admin; -db.createUser({user: "READ_ONLY_USER", pwd: "READ_ONLY_PASSWORD", roles: [{role: "read", db: "TARGET_DATABASE"}]} -``` - -Make sure the user have appropriate access levels. - -### Configure application - -In case your application uses MongoDB without authentication you will have to adjust code base and MongoDB config to enable MongoDB authentication. **Otherwise your application might go down once MongoDB authentication will be enabled.** - -### Enable MongoDB authentication - -Open `/etc/mongod.conf` and add/replace specific keys: - -```yaml -net: - bindIp: 0.0.0.0 - -security: - authorization: enabled -``` - -Binding to `0.0.0.0` will allow to connect to database from any IP address. - -The last line will enable MongoDB security. Now only authenticated users will be able to access the database. - -### Configure firewall - -Make sure that MongoDB is accessible from external servers. Specific commands will depend on the firewall you are using \(UFW/iptables/AWS/etc\). Please refer to appropriate documentation. - -Your `READ_ONLY_USER` should now be ready for use with Airbyte. - - -#### Possible configuration Parameters - -* [Authentication Source](https://docs.mongodb.com/manual/reference/connection-string/#mongodb-urioption-urioption.authSource) -* Host: URL of the database -* Port: Port to use for connecting to the database -* User: username to use when connecting -* Password: used to authenticate the user -* [Replica Set](https://docs.mongodb.com/manual/reference/connection-string/#mongodb-urioption-urioption.replicaSet) -* Whether to enable SSL - - -## Changelog -| Version | Date | Pull Request | Subject | -| :------ | :-------- | :----- | :------ | -| 0.2.3 | 2021-07-20 | [4669](https://github.com/airbytehq/airbyte/pull/4669) | Subscriptions Stream now returns all kinds of subscriptions (including expired and canceled)| diff --git a/docs/archive/securing-airbyte.md b/docs/archive/securing-airbyte.md deleted file mode 100644 index 727ff5043eeb..000000000000 --- a/docs/archive/securing-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Securing Airbyte access - -## Reporting Vulnerabilities -⚠️ Please do not file GitHub issues or post on our public forum for security vulnerabilities as they are public! ⚠️ - -Airbyte takes security issues very seriously. If you have any concern around Airbyte or believe you have uncovered a vulnerability, please get in touch via the e-mail address security@airbyte.io. In the message, try to provide a description of the issue and ideally a way of reproducing it. The security team will get back to you as soon as possible. - -Note that this security address should be used only for undisclosed vulnerabilities. Dealing with fixed issues or general questions on how to use the security features should be handled regularly via the user and the dev lists. Please report any security problems to us before disclosing it publicly. - -## Access control - -Airbyte, in its open-source version, does not support RBAC to manage access to the UI. - -However, multiple options exist for the operators to implement access control themselves. - -To secure access to Airbyte you have three options: -* Networking restrictions: deploy Airbyte in a private network or use a firewall to filter which IP is allowed to access your host. -* Put Airbyte behind a reverse proxy and handle the access control on the reverse proxy side. -* If you deployed Airbyte on a cloud provider: - * GCP: use the [Identity-Aware proxy](https://cloud.google.com/iap) service - * AWS: use the [AWS Systems Manager Session Manager](https://docs.aws.amazon.com/systems-manager/latest/userguide/session-manager.html) service - -**Non exhaustive** online resources list to set up auth on your reverse proxy: -* [Configure HTTP Basic Auth on NGINX for Airbyte](https://shadabshaukat.medium.com/deploy-and-secure-airbyte-with-nginx-reverse-proxy-basic-authentication-lets-encrypt-ssl-72bee223a4d9) -* [Kubernetes: Basic auth on a Nginx ingress controller](https://kubernetes.github.io/ingress-nginx/examples/auth/basic/) -* [How to set up Okta SSO on an NGINX reverse proxy](https://developer.okta.com/blog/2018/08/28/nginx-auth-request) -* [How to enable HTTP Basic Auth on Caddy](https://caddyserver.com/docs/caddyfile/directives/basicauth) -* [SSO for Traefik](https://github.com/thomseddon/traefik-forward-auth) diff --git a/docs/cloud/core-concepts.md b/docs/cloud/core-concepts.md deleted file mode 100644 index c3c949599ee8..000000000000 --- a/docs/cloud/core-concepts.md +++ /dev/null @@ -1,106 +0,0 @@ -# Core Concepts - -Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, and how the data is written to in the destination. - -This page describes the concepts you need to know to use Airbyte. - -## Source - -A source is an API, file, database, or data warehouse that you want to ingest data from. - -## Destination - -A destination is a data warehouse, data lake, database, or an analytics tool where you want to load your ingested data. - -## Connector - -An Airbyte component which pulls data from a source or pushes data to a destination. - -## Connection - -A connection is an automated data pipeline that replicates data from a source to a destination. Setting up a connection enables configuration of the following parameters: - -| Concept | Description | -|---------------------|---------------------------------------------------------------------------------------------------------------------| -| Replication Frequency | When should a data sync be triggered? | -| Destination Namespace and Stream Prefix | Where should the replicated data be written? | -| Catalog Selection | What data (streams and columns) should be replicated from the source to the destination? | -| Sync Mode | How should the streams be replicated (read and written)? | -| Schema Propagation | How should Airbyte handle schema drift in sources? | -## Stream - -A stream is a group of related records. - -Examples of streams: - -- A table in a relational database -- A resource or API endpoint for a REST API -- The records from a directory containing many files in a filesystem - -## Field - -A field is an attribute of a record in a stream. - -Examples of fields: - -- A column in the table in a relational database -- A field in an API response - -## Namespace - -Namespace is a method of grouping streams in a source or destination. Namespaces are used to generally organize data, segregate tests and production data, and enforce permissions. In a relational database system, this is known as a schema. - -In a source, the namespace is the location from where the data is replicated to the destination. In a destination, the namespace is the location where the replicated data is stored in the destination. - -Airbyte supports the following configuration options for a connection: - - | Destination Namepsace | Description | -| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| Destination default | All streams will be replicated to the single default namespace defined by the Destination. For more details, see ​​Destination Connector Settings | -| Mirror source structure | Some sources (for example, databases) provide namespace information for a stream. If a source provides namespace information, the destination will mirror the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. | -| Custom format | All streams will be replicated to a single user-defined namespace. See Custom format for more details | - -## Connection sync modes - -A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. - -- **Full Refresh | Overwrite:** Sync all records from the source and replace data in destination by overwriting it each time. -- **Full Refresh | Append:** Sync all records from the source and add them to the destination without deleting any data. This creates a historical copy of all records each sync. -- **Incremental Sync | Append:** Sync new records from the source and add them to the destination without deleting any data. This enables efficient historical tracking over time of data. -- **Incremental Sync | Append + Deduped:** Sync new records from the source and add them to the destination. Also provides a de-duplicated view mirroring the state of the stream in the source. This is the most common replication use case. - -## Normalization - -Normalization is the process of structuring data from the source into a format appropriate for consumption in the destination. For example, when writing data from a nested, dynamically typed source like a JSON API to a relational destination like Postgres, normalization is the process which un-nests JSON from the source into a relational table format which uses the appropriate column types in the destination. - -Note that normalization is only relevant for the following relational database & warehouse destinations: - -- Redshift -- Postgres -- Oracle -- MySQL -- MSSQL - -Other destinations do not support normalization as described in this section, though they may normalize data in a format that makes sense for them. For example, the S3 destination connector offers the option of writing JSON files in S3, but also offers the option of writing statically typed files such as Parquet or Avro. - -After a sync is complete, Airbyte normalizes the data. When setting up a connection, you can choose one of the following normalization options: - -- Raw data (no normalization): Airbyte places the JSON blob version of your data in a table called `_airbyte_raw_` -- Basic Normalization: Airbyte converts the raw JSON blob version of your data to the format of your destination. _Note: Not all destinations support normalization._ -- [dbt Cloud integration](https://docs.airbyte.com/cloud/managing-airbyte-cloud/dbt-cloud-integration): Airbyte's dbt Cloud integration allows you to use dbt Cloud for transforming and cleaning your data during the normalization process. - -:::note - -Normalizing data may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is normalized and is not related to Airbyte credit usage. - -::: - -## Workspace - -A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. - -When you [sign up](http://cloud.airbyte.com/signup) for Airbyte Cloud, we automatically create your first workspace where you are the only user with access. You can set up your sources and destinations to start syncing data and invite other users to join your workspace. - -## Glossary of Terms - -You find and extended list of [Airbyte specific terms](https://glossary.airbyte.com/term/airbyte-glossary-of-terms/), [data engineering concepts](https://glossary.airbyte.com/term/data-engineering-concepts) or many [other data related terms](https://glossary.airbyte.com/). diff --git a/docs/cloud/getting-started-with-airbyte-cloud.md b/docs/cloud/getting-started-with-airbyte-cloud.md deleted file mode 100644 index 2fecf212572f..000000000000 --- a/docs/cloud/getting-started-with-airbyte-cloud.md +++ /dev/null @@ -1,178 +0,0 @@ -# Getting Started with Airbyte Cloud - -This page guides you through setting up your Airbyte Cloud account, setting up a source, destination, and connection, verifying the sync, and allowlisting an IP address. - -## Set up your Airbyte Cloud account - -To use Airbyte Cloud: - -1. If you haven't already, [sign up for Airbyte Cloud](https://cloud.airbyte.com/signup?utm_campaign=22Q1_AirbyteCloudSignUpCampaign_Trial&utm_source=Docs&utm_content=SetupGuide) using your email address, Google login, or GitHub login. - - Airbyte Cloud offers a 14-day free trial that begins after your first successful sync. For more information, see [Pricing](https://airbyte.com/pricing). - - :::note - If you are invited to a workspace, you currently cannot use your Google login to create a new Airbyte account. - ::: - -2. If you signed up using your email address, Airbyte will send you an email with a verification link. On clicking the link, you'll be taken to your new workspace. - - :::info - A workspace lets you collaborate with team members and share resources across your team under a shared billing account. - ::: - -## Set up a source - -:::info -A source is an API, file, database, or data warehouse that you want to ingest data from. -::: - -To set up a source: - -1. On the Airbyte Cloud dashboard, click **Sources**. -2. On the Set up the source page, select the source you want to set up from the **Source catalog**. Airbyte currently offers more than 200 source connectors in Cloud to choose from. Once you've selected the source, a Setup Guide will lead you through the authentication and setup of the source. - -3. Click **Set up source**. - -## Set up a destination - -:::info -A destination is a data warehouse, data lake, database, or an analytics tool where you want to load your extracted data. -::: - -To set up a destination: - -1. On the Airbyte Cloud dashboard, click **Destinations**. -2. On the Set up the Destination page, select the destination you want to set up from the **Destination catalog**. Airbyte currently offers more than 38 destination connectors in Cloud to choose from. Once you've selected the destination, a Setup Guide will lead you through the authentication and setup of the source. -3. Click **Set up destination**. - -## Set up a connection - -:::info -A connection is an automated data pipeline that replicates data from a source to a destination. -::: - -Setting up a connection involves configuring the following parameters: - -| Replication Setting | Description | -| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| [Destination Namespace](../understanding-airbyte/namespaces.md) and stream prefix | Where should the replicated data be written to? | -| Replication Frequency | How often should the data sync? | -| [Data Residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-the-data-residency-for-a-connection) | Where should the data be processed? | -| [Schema Propagation](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes) | Should schema drift be automated? | - -After configuring the connection settings, you will then define specifically what data will be synced. - -:::info -A connection's schema consists of one or many streams. Each stream is most commonly associated with a database table or an API endpoint. Within a stream, there can be one or many fields or columns. -::: - -| Catalog Selection | Description | -| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| Stream Selection | Which streams should be replicated from the source to the destination? | -| Column Selection | Which fields should be included in the sync? | -| [Sync Mode](../understanding-airbyte/connections/README.md) | How should the streams be replicated (read and written)? | - -To set up a connection: - -:::tip - -Set your [default data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) before creating a new connection to ensure your data is processed in the correct region. - -::: - -1. On the Airbyte Cloud dashboard, click **Connections** and then click **+ New connection**. -2. Select a source: - - - To use a data source you've already set up with Airbyte, select from the list of existing sources. Click the source to use it. - - To set up a new source, select **Set up a new source** and fill out the fields relevant to your source using the Setup Guide. - -3. Select a destination: - - - To use a data source you've already set up with Airbyte, select from the list of existing destinations. Click the destination to use it. - - To set up a new destination, select **Set up a new destination** and fill out the fields relevant to your destination using the Setup Guide. - - Airbyte will scan the schema of the source, and then display the **Connection Configuration** page. - -4. From the **Replication frequency** dropdown, select how often you want the data to sync from the source to the destination. The default replication frequency is **Every 24 hours**. You can also set up [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). - - Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour. - -5. From the **Destination Namespace** dropdown, select the format in which you want to store the data in the destination. Note: The default configuration is **Destination default**. - -| Destination Namepsace | Description | -| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -| Destination default | All streams will be replicated to the single default namespace defined by the Destination. For more details, see ​​Destination Connector Settings | -| Mirror source structure | Some sources (for example, databases) provide namespace information for a stream. If a source provides namespace information, the destination will mirror the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. | -| Custom format | All streams will be replicated to a single user-defined namespace. See Custom format for more details | - -:::tip -To ensure your data is synced correctly, see our examples of how to use the [Destination Namespace](../understanding-airbyte/namespaces.md#examples) -::: - -6. (Optional) In the **Destination Stream Prefix (Optional)** field, add a prefix to stream names. For example, adding a prefix `airbyte_` renames the stream `projects` to `airbyte_projects`. This is helpful if you are sending multiple connections to the same Destination Namespace to ensure connections do not conflict when writing to the destination. - -7. Select in the **Detect and propagate schema changes** dropdown whether Airbyte should propagate schema changes. See more details about how we handle [schema changes](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes). - - -8. Activate the streams you want to sync by toggling the **Sync** button on. Use the **Search stream name** search box to find streams quickly. If you want to sync all streams, bulk toggle to enable all streams. - -9. Configure the stream settings: - 1. **Data Destination**: Where the data will land in the destination - 2. **Stream**: The table name in the source - 3. **Sync mode**: How the data will be replicated from the source to the destination. - - For the source: - - - Select **Full Refresh** to copy the entire dataset each time you sync - - Select **Incremental** to replicate only the new or modified data - - For the destination: - - - Select **Overwrite** to erase the old data and replace it completely - - Select **Append** to capture changes to your table - **Note:** This creates duplicate records - - Select **Append + Deduped** to mirror your source while keeping records unique (most common) - - **Note:** Some sync modes may not yet be available for the source or destination. - - 4. **Cursor field**: Used in **Incremental** sync mode to determine which records to sync. Airbyte pre-selects the cursor field for you (example: updated date). If you have multiple cursor fields, select the one you want. - 5. **Primary key**: Used in **Append + Deduped** sync mode to determine the unique identifier. - 6. Choose which fields or columns to sync. By default, all fields are synced. - -10. Click **Set up connection**. -11. Airbyte tests the connectio setup. If the test is successful, Airbyte will save the configuration. If the Replication Frequency uses a preset schedule or CRON, your first sync will immediately begin! - -## Verify the sync - -Once the first sync has completed, you can verify the sync has completed by checking in Airbyte Cloud and in your destination. - -1. On the Airbyte Cloud dashboard, click **Connections**. The list of connections is displayed. Click on the connection you just set up. -2. The **Job History** tab shows each sync run, along with the sync summary of data and rows moved. You can also manually trigger syncs or view detailed logs for each sync here. -3. Check the data at your destination. If you added a Destination Stream Prefix while setting up the connection, make sure to search for the stream name with the prefix. - -## Allowlist IP addresses - -Depending on your [data residency](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-data-residency#choose-your-default-data-residency) location, you may need to allowlist the following IP addresses to enable access to Airbyte: - -### United States and Airbyte Default - -#### GCP region: us-west3 - -[comment]: # "IMPORTANT: if changing the list of IP addresses below, you must also update the connector.airbyteCloudIpAddresses LaunchDarkly flag to show the new list so that the correct list is shown in the Airbyte Cloud UI, then reach out to the frontend team and ask them to update the default value in the useAirbyteCloudIps hook!" - -- 34.106.109.131 -- 34.106.196.165 -- 34.106.60.246 -- 34.106.229.69 -- 34.106.127.139 -- 34.106.218.58 -- 34.106.115.240 -- 34.106.225.141 - -### European Union - -#### AWS region: eu-west-3 - -- 13.37.4.46 -- 13.37.142.60 -- 35.181.124.238 diff --git a/docs/cloud/managing-airbyte-cloud/configuring-connections.md b/docs/cloud/managing-airbyte-cloud/configuring-connections.md index 4e95bac58714..bc896004eb30 100644 --- a/docs/cloud/managing-airbyte-cloud/configuring-connections.md +++ b/docs/cloud/managing-airbyte-cloud/configuring-connections.md @@ -1,6 +1,6 @@ # Configuring connections -A connection links a source to a destination and defines how your data will sync. After you have created a connection, you can modify any of the [configuration settings](#configure-connection-settings) or [stream settings](#modify-streams-in-your-connection). +A connection links a source to a destination and defines how your data will sync. After you have created a connection, you can modify any of the configuration settings or stream settings. ## Configure Connection Settings @@ -8,7 +8,7 @@ Configuring the connection settings allows you to manage various aspects of the To configure these settings: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to change. +1. In the Airbyte UI, click **Connections** and then click the connection you want to change. 2. Click the **Replication** tab. @@ -24,25 +24,11 @@ You can configure the following settings: | Setting | Description | |--------------------------------------|-------------------------------------------------------------------------------------| -| Replication frequency | How often the data syncs | -| Destination namespace | Where the replicated data is written | +| [Replication frequency](/using-airbyte/core-concepts/sync-schedules.md) | How often the data syncs | +| [Destination namespace](/using-airbyte/core-concepts/namespaces.md) | Where the replicated data is written | | Destination stream prefix | How you identify streams from different connectors | -| [Detect and propagate schema changes](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-schema-changes/#review-non-breaking-schema-changes) | How Airbyte handles syncs when it detects schema changes in the source | -| Connection Data Residency | Where data will be processed | - -To use [cron scheduling](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html): - -1. In the **Replication Frequency** dropdown, click **Cron**. - -2. Enter a cron expression and choose a time zone to create a sync schedule. - -:::note - -* Only one sync per connection can run at a time. -* If a sync is scheduled to run before the previous sync finishes, the scheduled sync will start after the completion of the previous sync. -* Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour. - -::: +| [Detect and propagate schema changes](/cloud/managing-airbyte-cloud/manage-schema-changes.md) | How Airbyte handles syncs when it detects schema changes in the source | +| [Connection Data Residency](/cloud/managing-airbyte-cloud/manage-data-residency.md) | Where data will be processed | ## Modify streams in your connection @@ -54,7 +40,7 @@ A connection's schema consists of one or many streams. Each stream is most commo To modify streams: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to change. +1. In the Airbyte UI, click **Connections** and then click the connection you want to change. 2. Click the **Replication** tab. @@ -74,7 +60,7 @@ Source-defined cursors and primary keys are selected automatically and cannot be 3. Click on a stream to display the stream details panel. You'll see each column we detect from the source. -4. Toggle individual fields or columns to include or exclude them in the sync, or use the toggle in the table header to select all fields at once. +4. Column selection is available to protect PII or sensitive data from being synced to the destination. Toggle individual fields to include or exclude them in the sync, or use the toggle in the table header to select all fields at once. :::info diff --git a/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md b/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md index 96510918cfd9..777433ec72e3 100644 --- a/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md +++ b/docs/cloud/managing-airbyte-cloud/dbt-cloud-integration.md @@ -1,7 +1,15 @@ # Use the dbt Cloud integration + + By using the dbt Cloud integration, you can create and run dbt transformations during syncs in Airbyte Cloud. This allows you to transform raw data into a format that is suitable for analysis and reporting, including cleaning and enriching the data. +:::note + +Normalizing data may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is normalized and is not related to Airbyte credit usage. + +::: + ## Step 1: Generate a service token Generate a [service token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-tokens#generating-service-account-tokens) for your dbt Cloud transformation. @@ -17,7 +25,7 @@ Generate a [service token](https://docs.getdbt.com/docs/dbt-cloud-apis/service-t To set up the dbt Cloud integration in Airbyte Cloud: -1. On the Airbyte Cloud dashboard, click **Settings**. +1. In the Airbyte UI, click **Settings**. 2. Click **dbt Cloud integration**. diff --git a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md index 160b28d5f47e..2b39a0bb1893 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md +++ b/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications.md @@ -1,6 +1,6 @@ # Manage notifications -This page provides guidance on how to manage notifications for Airbyte Cloud, allowing you to stay up-to-date on the activities in your workspace. +This page provides guidance on how to manage notifications for Airbyte, allowing you to stay up-to-date on the activities in your workspace. ## Notification Event Types @@ -12,41 +12,74 @@ This page provides guidance on how to manage notifications for Airbyte Cloud, al | Connection Updates Requiring Action | A connection update requires you to take action (ex. a breaking schema change is detected) | | Warning - Repeated Failures | A connection will be disabled soon due to repeated failures. It has failed 50 times consecutively or there were only failed jobs in the past 7 days | | Sync Disabled - Repeated Failures | A connection was automatically disabled due to repeated failures. It will be disabled when it has failed 100 times consecutively or has been failing for 14 days in a row | -| Warning - Upgrade Required (email only) | A new connector version is available and requires manual upgrade | -| Sync Disabled - Upgrade Required (email only) | One or more connections were automatically disabled due to a connector upgrade deadline passing -| +| Warning - Upgrade Required (Cloud only) | A new connector version is available and requires manual upgrade | +| Sync Disabled - Upgrade Required (Cloud only) | One or more connections were automatically disabled due to a connector upgrade deadline passing -## Configure Notification Settings +## Configure Email Notification Settings + + To set up email notifications: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. In the Airbyte UI, click **Settings** and navigate to **Notifications**. -2. Click **Notifications**. +2. Toggle which messages you'd like to receive from Airbyte. All email notifications will be sent by default to the creator of the workspace. To change the recipient, edit and save the **notification email recipient**. If you would like to send email notifications to more than one recipient, you can enter an email distribution list (ie Google Group) as the recipient. -3. Toggle which messages you'd like to receive from Airbyte. All email notifications will be sent by default to the creator of the workspace. To change the recipient, edit and save the **notification email recipient**. If you would like to send email notifications to more than one recipient, you can enter an email distribution list (ie Google Group) as the recipient. +3. Click **Save changes**. -4. Click **Save changes**. +:::note +All email notifications except for Successful Syncs are enabled by default. +::: + +## Configure Slack Notification settings + +To set up Slack notifications: + +If you're more of a visual learner, just head over to [this video](https://www.youtube.com/watch?v=NjYm8F-KiFc&ab_channel=Airbyte) to learn how to do this. You can also refer to the Slack documentation on how to [create an incoming webhook for Slack](https://api.slack.com/messaging/webhooks). + +### Create a Slack app + +1. **Create a Slack App**: Navigate to https://api.slack.com/apps/. Select `Create an App`. + +![](../../.gitbook/assets/notifications_create_slack_app.png) + +2. Select `From Scratch`. Enter your App Name (e.g. Airbyte Sync Notifications) and pick your desired Slack workspace. + +3. **Set up the webhook URL.**: in the left sidebar, click on `Incoming Webhooks`. Click the slider button in the top right to turn the feature on. Then click `Add New Webhook to Workspace`. -To set up webhook notifications: +![](../../.gitbook/assets/notifications_add_new_webhook.png) -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +4. Pick the channel that you want to receive Airbyte notifications in (ideally a dedicated one), and click `Allow` to give it permissions to access the channel. You should see the bot show up in the selected channel now. You will see an active webhook right above the `Add New Webhook to Workspace` button. -2. Click **Notifications**. +![](../../.gitbook/assets/notifications_webhook_url.png) -3. Have a webhook URL ready if you plan to use webhook notifications. Using a Slack webook is recommended. [Create an Incoming Webhook for Slack](https://api.slack.com/messaging/webhooks). +5. Click `Copy.` to copy the link to your clipboard, which you will need to enter into Airbyte. -4. Toggle the type of events you are interested to receive notifications for. - 1. To enable webhook notifications, the webhook URL is required. For your convenience, we provide a 'test' function to send a test message to your webhook URL so you can make sure it's working as expected. +Your Webhook URL should look something like this: -5. Click **Save changes**. +![](../../.gitbook/assets/notifications_airbyte_notification_settings.png) + + +### Enable the Slack notification in Airbyte + +1. In the Airbyte UI, click **Settings** and navigate to **Notifications**. + +2. Paste the copied webhook URL to `Webhook URL`. Using a Slack webook is recommended. On this page, you can toggle each slider decide whether you want notifications on each notification type. + +3. **Test it out.**: you can click `Test` to send a test message to the channel. Or, just run a sync now and try it out! If all goes well, you should receive a notification in your selected channel that looks like this: + +![](../../.gitbook/assets/notifications_slack_message.png) + +You're done! + +4. Click **Save changes**. ## Enable schema update notifications -To get notified when your source schema changes: -1. Make sure you have `Automatic Connection Updates` and `Connection Updates Requiring Action` turned on for your desired notification channels; If these are off, even if you turned on schema update notifications in a connection's settings, Airbyte will *NOT* send out any notifications related to these types of events. +To be notified of any source schema changes: +1. Make sure you have enabled `Automatic Connection Updates` and `Connection Updates Requiring Action` notifications. If these are off, even if you turned on schema update notifications in a connection's settings, Airbyte will *NOT* send out any notifications related to these types of events. -2. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection you want to receive notifications for. +2. In the Airbyte UI, click **Connections** and select the connection you want to receive notifications for. 3. Click the **Settings** tab on the Connection page. diff --git a/docs/cloud/managing-airbyte-cloud/manage-connection-state.md b/docs/cloud/managing-airbyte-cloud/manage-connection-state.md index 321c3753e7b8..23d25db6be99 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-connection-state.md +++ b/docs/cloud/managing-airbyte-cloud/manage-connection-state.md @@ -3,7 +3,7 @@ The connection state provides additional information about incremental syncs. It includes the most recent values for the global or stream-level cursors, which can aid in debugging or determining which data will be included in the next sync. To review the connection state: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to display. +1. In the Airbyte UI, click **Connections** and then click the connection you want to display. 2. Click the **Settings** tab on the Connection page. diff --git a/docs/cloud/managing-airbyte-cloud/manage-credits.md b/docs/cloud/managing-airbyte-cloud/manage-credits.md index 7ed15c0ed76f..ed54d783d6ae 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-credits.md +++ b/docs/cloud/managing-airbyte-cloud/manage-credits.md @@ -1,14 +1,16 @@ # Manage credits + + ## Buy credits Airbyte [credits](https://airbyte.com/pricing) are used to pay for Airbyte resources when you run a sync. You can purchase credits on Airbyte Cloud to keep your data flowing without interruption. To buy credits: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Billing** in the navigation bar. +1. In the Airbyte UI, click **Billing** in the navigation bar. -2. If you are unsure of how many credits you need, use our [Cost Estimator](https://cost.airbyte.com/) or click **Talk to Sales** to find the right amount for your team. +2. If you are unsure of how many credits you need, use our [Cost Estimator](https://www.airbyte.com/pricing) or click **Talk to Sales** to find the right amount for your team. 3. Click **Buy credits**. @@ -44,7 +46,7 @@ To buy credits: You can enroll in automatic top-ups of your credit balance. This is a beta feature for those who do not want to manually add credits each time. -To enroll, [email us](mailto:natalie@airbyte.io) with: +To enroll, [email us](mailto:billing@airbyte.io) with: 1. A link to your workspace that you'd like to enable this feature for. 2. **Recharge threshold** The number under what credit balance you would like the automatic top up to occur. @@ -59,11 +61,11 @@ To take a real example, if: Note that the difference between the recharge credit amount and recharge threshold must be at least 20 as our minimum purchase is 20 credits. -If you are enrolled and want to change your limits or cancel your enrollment, [email us](mailto:natalie@airbyte.io). +If you are enrolled and want to change your limits or cancel your enrollment, [email us](mailto:billing@airbyte.io). ## View invoice history -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Billing** in the navigation bar. +1. In the Airbyte UI, click **Billing** in the navigation bar. 2. Click **Invoice History**. You will be redirected to a Stripe portal. diff --git a/docs/cloud/managing-airbyte-cloud/manage-data-residency.md b/docs/cloud/managing-airbyte-cloud/manage-data-residency.md index da02874006ce..384d18337bb5 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-data-residency.md +++ b/docs/cloud/managing-airbyte-cloud/manage-data-residency.md @@ -1,5 +1,7 @@ # Manage data residency + + In Airbyte Cloud, you can set the default data residency and choose the data residency for individual connections, which can help you comply with data localization requirements. ## Choose your default data residency @@ -12,11 +14,11 @@ While the data is processed in a data plane of the chosen residency, the cursor ::: -When you set the default data residency, it applies to new connections only. If you do not set the default data residency, the [Airbyte Default](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud/#united-states-and-airbyte-default) region is used. If you want to change the data residency for a connection, you can do so in its [connection settings](#choose-the-data-residency-for-a-connection). +When you set the default data residency, it applies to new connections only. If you do not set the default data residency, the [Airbyte Default](configuring-connections.md) region is used. If you want to change the data residency for a connection, you can do so in its [connection settings](configuring-connections.md). To choose your default data residency: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. In the Airbyte UI, click **Settings**. 2. Click **Data Residency**. @@ -26,16 +28,16 @@ To choose your default data residency: :::info -Depending on your network configuration, you may need to add [IP addresses](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud/#allowlist-ip-addresses) to your allowlist. +Depending on your network configuration, you may need to add [IP addresses](/operating-airbyte/security.md#network-security-1) to your allowlist. ::: ## Choose the data residency for a connection -You can choose the data residency for your connection in the connection settings. You can also choose data residency when creating a [new connection](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud#set-up-a-connection), or you can set the [default data residency](#choose-your-default-data-residency) for your workspace. +You can choose the data residency for your connection in the connection settings. You can also choose data residency when creating a new connection, or you can set the default data residency for your workspace. To choose the data residency for your connection: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection that you want to change. +1. In the Airbyte UI, click **Connections** and then click the connection that you want to change. 2. Click the **Settings** tab. diff --git a/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md b/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md index 1e76e5f6ff58..f938008b52ff 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md +++ b/docs/cloud/managing-airbyte-cloud/manage-schema-changes.md @@ -4,6 +4,7 @@ You can specify for each connection how Airbyte should handle any change of sche Airbyte checks for any changes in your source schema immediately before syncing, at most once every 24 hours. +## Detection and Propagate Schema Changes Based on your configured settings for **Detect and propagate schema changes**, Airbyte will automatically sync those changes or ignore them: | Setting | Description | @@ -13,6 +14,7 @@ Based on your configured settings for **Detect and propagate schema changes**, A | Ignore | Schema changes will be detected, but not propagated. Syncs will continue running with the schema you've set up. To propagate the detected schema changes, you will need to approve the changes manually | | Pause Connection | Connections will be automatically disabled as soon as any schema changes are detected | +## Types of Schema Changes When propagation is enabled, your data in the destination will automatically shift to bring in the new changes. | Type of Schema Change | Propagation Behavior | @@ -23,6 +25,10 @@ When propagation is enabled, your data in the destination will automatically shi | Removal of stream | The stream will stop updating, and any existing data in the destination will remain. | | Column data type changes | The data in the destination will remain the same. Any new or updated rows with incompatible data types will result in a row error in the raw Airbyte tables. You will need to refresh the schema and do a full resync to ensure the data types are consistent. +:::tip +Ensure you receive webhook notifications for your connection by enabling `Schema update notifications` in the connection's settings. +::: + In all cases, if a breaking schema change is detected, the connection will be paused immediately for manual review to prevent future syncs from failing. Breaking schema changes occur when: * An existing primary key is removed from the source * An existing cursor is removed from the source @@ -33,7 +39,7 @@ To re-enable the streams, ensure the correct **Primary Key** and **Cursor** are If the connection is set to **Ignore** any schema changes, Airbyte continues syncing according to your last saved schema. You need to manually approve any detected schema changes for the schema in the destination to change. -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections**. Select a connection and navigate to the **Replication** tab. If schema changes are detected, you'll see a blue "i" icon next to the Replication ab. +1. In the Airbyte UI, click **Connections**. Select a connection and navigate to the **Replication** tab. If schema changes are detected, you'll see a blue "i" icon next to the Replication ab. 2. Click **Review changes**. @@ -62,7 +68,7 @@ A major version upgrade will include a breaking change if any of these apply: | State Changes | The format of the source’s state has changed, and the full dataset will need to be re-synced | To review and fix breaking schema changes: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections** and select the connection with breaking changes. +1. In the Airbyte UI, click **Connections** and select the connection with breaking changes. 2. Review the description of what has changed in the new version. The breaking change will require you to upgrade your source or destination to a new version by a specific cutoff date. @@ -74,13 +80,10 @@ In addition to Airbyte Cloud’s automatic schema change detection, you can manu To manually refresh the source schema: - 1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Connections** and then click the connection you want to refresh. + 1. In the Airbyte UI, click **Connections** and then click the connection you want to refresh. 2. Click the **Replication** tab. 3. In the **Activate the streams you want to sync** table, click **Refresh source schema** to fetch the schema of your data source. - 4. If there are changes to the schema, you can review them in the **Refreshed source schema** dialog. - -## Manage Schema Change Notifications -[Refer to our notification documentation](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications#enable-schema-update-notifications) to understand how to stay updated on any schema updates to your connections. \ No newline at end of file + 4. If there are changes to the schema, you can review them in the **Refreshed source schema** dialog. \ No newline at end of file diff --git a/docs/cloud/managing-airbyte-cloud/review-connection-status.md b/docs/cloud/managing-airbyte-cloud/review-connection-status.md index d9ee57020af7..5970e3756f4b 100644 --- a/docs/cloud/managing-airbyte-cloud/review-connection-status.md +++ b/docs/cloud/managing-airbyte-cloud/review-connection-status.md @@ -2,9 +2,9 @@ The connection status displays information about the connection and of each stream being synced. Reviewing this summary allows you to assess the connection's current status and understand when the next sync will be run. To review the connection status: -1. On the [Airbyte Cloud](http://cloud.airbyte.com/) dashboard, click **Connections**. +1. In the Airbyte UI, click **Connections**. -2. Click a connection in the list to view its status. +2. Click a connection in the list to view its status. | Status | Description | |------------------|---------------------------------------------------------------------------------------------------------------------| @@ -13,10 +13,20 @@ To review the connection status: | Delayed | The connection has not loaded data within the scheduled replication frequency. For example, if the replication frequency is 1 hour, the connection has not loaded data for more than 1 hour | | Error | The connection has not loaded data in more than two times the scheduled replication frequency. For example, if the replication frequency is 1 hour, the connection has not loaded data for more than 2 hours | | Action Required | A breaking change related to the source or destination requires attention to resolve | -| Pending | The connection has not been run yet, so no status exists | -| Disabled | The connection has been disabled and is not scheduled to run | | In Progress | The connection is currently extracting or loading data | +| Disabled | The connection has been disabled and is not scheduled to run | +| Pending | The connection has not been run yet, so no status exists | +If the most recent sync failed, you'll see the error message that will help diagnose if the failure is due to a source or destination configuration error. [Reach out](/community/getting-support.md) to us if you need any help to ensure you data continues syncing. + +:::info +If a sync starts to fail, it will automatically be disabled after 100 consecutive failures or 14 consecutive days of failure. +::: + +If a new major version of the connector has been released, you will also see a banner on this page indicating the cutoff date for the version. Airbyte recommends upgrading before the cutoff date to ensure your data continues syncing. If you do not upgrade before the cutoff date, Airbyte will automatically disable your connection. + +Learn more about version upgrades in our [resolving breaking change documentation](/cloud/managing-airbyte-cloud/manage-schema-changes#resolving-breaking-changes). + ## Review the stream status The stream status allows you to monitor each stream's latest status. The stream will be highlighted with a grey pending bar to indicate the sync is actively extracting or loading data. @@ -28,6 +38,7 @@ The stream status allows you to monitor each stream's latest status. The stream Each stream shows the last record loaded to the destination. Toggle the header to display the exact datetime the last record was loaded. -You can reset an individual stream without resetting all streams in a connection by clicking the three grey dots next to any stream. It is recommended to start a new sync after a reset. +You can [reset](/operator-guides/reset.md) an individual stream without resetting all streams in a connection by clicking the three grey dots next to any stream. + +You can also navigate directly to the stream's configuration by click the three grey dots next to any stream and selecting "Open details" to be redirected to the stream configuration. -You can also navigate directly to the stream's configuration by click the three grey dots next to any stream and selecting "Open details" to be redirected to the stream configuration. \ No newline at end of file diff --git a/docs/cloud/managing-airbyte-cloud/review-sync-history.md b/docs/cloud/managing-airbyte-cloud/review-sync-history.md index 0bb5cf2290f5..485d981fc92f 100644 --- a/docs/cloud/managing-airbyte-cloud/review-sync-history.md +++ b/docs/cloud/managing-airbyte-cloud/review-sync-history.md @@ -2,7 +2,7 @@ The job history displays information about synced data, such as the amount of data moved, the number of records read and committed, and the total sync time. Reviewing this summary can help you monitor the sync performance and identify any potential issues. -To review the sync history, click a connection in the list to view its sync history. Sync History displays the sync status or [reset](https://docs.airbyte.com/operator-guides/reset/) status. The sync status is defined as: +To review the sync history, click a connection in the list to view its sync history. Sync History displays the sync status or [reset](/operator-guides/reset.md) status. The sync status is defined as: | Status | Description | |---------------------|---------------------------------------------------------------------------------------------------------------------| diff --git a/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md b/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md index 9d8a429eab9e..47bc59ea6b19 100644 --- a/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md +++ b/docs/cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits.md @@ -1,16 +1,12 @@ -# Understand Airbyte Cloud limits +# Airbyte Cloud limits Understanding the following limitations will help you more effectively manage Airbyte Cloud. * Max number of workspaces per user: 3* * Max number of instances of the same source connector: 10* * Max number of destinations in a workspace: 20* -* Max number of consecutive sync failures before a connection is paused: 100 -* Max number of days with consecutive sync failures before a connection is paused: 14 days * Max number of streams that can be returned by a source in a discover call: 1K * Max number of streams that can be configured to sync in a single connection: 1K * Size of a single record: 20MB -* Shortest sync schedule: Every 60 min (Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour) -* Schedule accuracy: +/- 30 min *Limits on workspaces, sources, and destinations do not apply to customers of [Powered by Airbyte](https://airbyte.com/solutions/powered-by-airbyte). To learn more [contact us](https://airbyte.com/talk-to-sales)! diff --git a/docs/community/code-of-conduct.md b/docs/community/code-of-conduct.md new file mode 100644 index 000000000000..4cb81d4468fc --- /dev/null +++ b/docs/community/code-of-conduct.md @@ -0,0 +1,91 @@ +--- +description: Our Community Code of Conduct +--- + +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others’ private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [conduct@airbyte.io](mailto:conduct@airbyte.io). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html) + +## Slack Code of Conduct + +Airbyte's Slack community is growing incredibly fast. We're home to over 1500 data professionals and are growing at an awesome pace. We are proud of our community, and have provided these guidelines to support new members in maintaining the wholesome spirit we have developed here. We appreciate your continued commitment to making this a community we are all excited to be a part of. + +### Rule 1: Be respectful. + +Our desire is for everyone to have a positive, fulfilling experience in Airbyte Slack, and we sincerely appreciate your help in making this happen. +All of the guidelines we provide below are important, but there’s a reason respect is the first rule. We take it seriously, and while the occasional breach of etiquette around Slack is forgivable, we cannot condone disrespectful behavior. + +### Rule 2: Use the most relevant channels. + +We deliberately use topic-specific Slack channels so members of the community can opt-in on various types of conversations. Our members take care to post their messages in the most relevant channel, and you’ll often see reminders about the best place to post a message (respectfully written, of course!). If you're looking for help directly from the Community Assistance Team or other Airbyte employees, please stick to posting in the airbyte-help channel, so we know you're asking us specifically! + +### Rule 3: Don’t double-post. + +Please be considerate of our community members’ time. We know your question is important, but please keep in mind that Airbyte Slack is not a customer service platform but a community of volunteers who will help you as they are able around their own work schedule. You have access to all the history, so it’s easy to check if your question has already been asked. + +### Rule 4: Check question for clarity and thoughtfulness. + +Airbyte Slack is a community of volunteers. Our members enjoy helping others; they are knowledgeable, gracious, and willing to give their time and expertise for free. Putting some effort into a well-researched and thoughtful post shows consideration for their time and will gain more responses. + +### Rule 5: Keep it public. + +This is a public forum; please do not contact individual members of this community without their express permission, regardless of whether you are trying to recruit someone, sell a product, or solicit help. + +### Rule 6: No soliciting! + +The purpose of the Airbyte Slack community is to provide a forum for data practitioners to discuss their work and share their ideas and learnings. It is not intended as a place to generate leads for vendors or recruiters, and may not be used as such. + +If you’re a vendor, you may advertise your product in #shameless-plugs. Advertising your product anywhere else is strictly against the rules. + +### Rule 7: Don't spam tags, or use @here or @channel. + +Using the @here and @channel keywords in a post will not help, as they are disabled in Slack for everyone excluding admins. Nonetheless, if you use them we will remind you with a link to this rule, to help you better understand the way Airbyte Slack operates. + +Do not tag specific individuals for help on your questions. If someone chooses to respond to your question, they will do so. You will find that our community of volunteers is generally very responsive and amazingly helpful! + +### Rule 8: Use threads for discussion. + +The simplest way to keep conversations on track in Slack is to use threads. The Airbyte Slack community relies heavily on threads, and if you break from this convention, rest assured one of our community members will respectfully inform you quickly! + +_If you see a message or receive a direct message that violates any of these rules, please contact an Airbyte team member and we will take the appropriate moderation action immediately. We have zero tolerance for intentional rule-breaking and hate speech._ + diff --git a/docs/operator-guides/contact-support.md b/docs/community/getting-support.md similarity index 88% rename from docs/operator-guides/contact-support.md rename to docs/community/getting-support.md index db42a9aef36f..03b1ff795560 100644 --- a/docs/operator-guides/contact-support.md +++ b/docs/community/getting-support.md @@ -1,4 +1,4 @@ -# Airbyte Support +# Getting Support Hold up! Have you looked at [our docs](https://docs.airbyte.com/) yet? We recommend searching the wealth of knowledge in our documentation as many times the answer you are looking for is there! @@ -6,14 +6,26 @@ Hold up! Have you looked at [our docs](https://docs.airbyte.com/) yet? We recomm Running Airbyte Open Source and have questions that our docs could not clear up? Post your questions on our [Github Discussions](https://github.com/airbytehq/airbyte/discussions?_gl=1*70s0c6*_ga*MTc1OTkyOTYzNi4xNjQxMjQyMjA0*_ga_HDBMVFQGBH*MTY4OTY5MDQyOC4zNDEuMC4xNjg5NjkwNDI4LjAuMC4w) and also join our community Slack to connect with other Airbyte users. +### Community Slack **Join our Slack community** [HERE](https://slack.airbyte.com/?_gl=1*1h8mjfe*_gcl_au*MTc4MjAxMDQzOS4xNjgyOTczMDYy*_ga*MTc1OTkyOTYzNi4xNjQxMjQyMjA0*_ga_HDBMVFQGBH*MTY4Nzg4OTQ4MC4zMjUuMS4xNjg3ODkwMjE1LjAuMC4w&_ga=2.58571491.813788522.1687789276-1759929636.1641242204)! -Ask your questions first in the #ask-ai channel and if our bot can not assist you, reach out to our community in the #ask-community-for-troubleshooting channel. - +Ask your questions first in the #ask-ai channel and if our bot can not assist you, reach out to our community in the #ask-community-for-troubleshooting channel. If you require personalized support, reach out to our sales team to inquire about [Airbyte Enterprise](https://airbyte.com/airbyte-enterprise). +### Airbyte Forum + +We are driving our community support from our [forum](https://github.com/airbytehq/airbyte/discussions) on GitHub. + +### Office Hour + +Airbyte provides a [Daily Office Hour](https://airbyte.com/daily-office-hour) to discuss issues. +It is a 45 minute meeting, the first 20 minutes are reserved to a weekly topic presentation about Airbyte concepts and the others 25 minutes are for general questions. The schedule is: +* Monday, Wednesday and Fridays: 1 PM PST/PDT +* Tuesday and Thursday: 4 PM CEST + + ## Airbyte Cloud Support If you have questions about connector setup, error resolution, or want to report a bug, Airbyte Support is available to assist you. We recommend checking [our documentation](https://docs.airbyte.com/) and searching our [Help Center](https://support.airbyte.com/hc/en-us) before opening a support ticket. @@ -59,5 +71,4 @@ Although we strive to offer our utmost assistance, there are certain requests th * Curating unique documentation and training materials * Configuring Airbyte to meet security requirements -If you think you will need asssitance when upgrading, we recommend upgrading during our support hours, Monday-Friday 7AM - 7PM ET so we can assist if support is needed. If you upgrade outside of support hours, please submit a ticket and we will assist when we are back online. - +If you think you will need assistance when upgrading, we recommend upgrading during our support hours, Monday-Friday 7AM - 7PM ET so we can assist if support is needed. If you upgrade outside of support hours, please submit a ticket and we will assist when we are back online. diff --git a/docs/connector-development/connector-builder-ui/incremental-sync.md b/docs/connector-development/connector-builder-ui/incremental-sync.md index 5801267fea9d..0a4db2bc7a54 100644 --- a/docs/connector-development/connector-builder-ui/incremental-sync.md +++ b/docs/connector-development/connector-builder-ui/incremental-sync.md @@ -12,7 +12,7 @@ To use incremental syncs, the API endpoint needs to fullfil the following requir - If the record's cursor field is nested, you can use an "Add Field" transformation to copy it to the top-level, and a Remove Field to remove it from the object. This will effectively move the field to the top-level of the record - It's possible to filter/request records by the cursor field -The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/understanding-airbyte/connections/incremental-append/) and [Incremental Sync - Append + Deduped](/understanding-airbyte/connections/incremental-append-deduped) pages. +The knowledge of a cursor value also allows the Airbyte system to automatically keep a history of changes to records in the destination. To learn more about how different modes of incremental syncs, check out the [Incremental Sync - Append](/using-airbyte/core-concepts/sync-modes/incremental-append/) and [Incremental Sync - Append + Deduped](/using-airbyte/core-concepts/sync-modes/incremental-append-deduped) pages. ## Configuration @@ -132,7 +132,7 @@ Some APIs update records over time but do not allow to filter or search by modif In these cases, there are two options: -- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/understanding-airbyte/connections/incremental-append-deduped) +- **Do not use incremental sync** and always sync the full set of records to always have a consistent state, losing the advantages of reduced load and [automatic history keeping in the destination](/using-airbyte/core-concepts/sync-modes/incremental-append-deduped) - **Configure the "Lookback window"** to not only sync exclusively new records, but resync some portion of records before the cutoff date to catch changes that were made to existing records, trading off data consistency and the amount of synced records. In the case of the API of The Guardian, news articles tend to only be updated for a few days after the initial release date, so this strategy should be able to catch most updates without having to resync all articles. Reiterating the example from above with a "Lookback window" of 2 days configured, let's assume the last encountered article looked like this: diff --git a/docs/connector-development/connector-builder-ui/record-processing.mdx b/docs/connector-development/connector-builder-ui/record-processing.mdx index d5ac0dbb88de..41a57d2351a9 100644 --- a/docs/connector-development/connector-builder-ui/record-processing.mdx +++ b/docs/connector-development/connector-builder-ui/record-processing.mdx @@ -321,7 +321,7 @@ Besides bringing the records in the right shape, it's important to communicate s ### Primary key -The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/understanding-airbyte/connections/incremental-append-deduped)). +The "Primary key" field specifies how to uniquely identify a record. This is important for downstream de-duplication of records (e.g. by the [incremental sync - Append + Deduped sync mode](/using-airbyte/core-concepts/sync-modes/incremental-append-deduped)). In a lot of cases, like for the EmailOctopus example from above, there is a dedicated id field that can be used for this purpose. It's important that the value of the id field is guaranteed to only occur once for a single record. diff --git a/docs/connector-development/tutorials/adding-incremental-sync.md b/docs/connector-development/tutorials/adding-incremental-sync.md index 992c9d9ed4b5..b463503a795b 100644 --- a/docs/connector-development/tutorials/adding-incremental-sync.md +++ b/docs/connector-development/tutorials/adding-incremental-sync.md @@ -2,7 +2,7 @@ ## Overview -This tutorial will assume that you already have a working source. If you do not, feel free to refer to the [Building a Toy Connector](building-a-python-source.md) tutorial. This tutorial will build directly off the example from that article. We will also assume that you have a basic understanding of how Airbyte's Incremental-Append replication strategy works. We have a brief explanation of it [here](../../understanding-airbyte/connections/incremental-append.md). +This tutorial will assume that you already have a working source. If you do not, feel free to refer to the [Building a Toy Connector](building-a-python-source.md) tutorial. This tutorial will build directly off the example from that article. We will also assume that you have a basic understanding of how Airbyte's Incremental-Append replication strategy works. We have a brief explanation of it [here](/using-airbyte/core-concepts/sync-modes/incremental-append.md). ## Update Catalog in `discover` @@ -293,6 +293,6 @@ Bonus points: go to Airbyte UI and reconfigure the connection to use incremental Incremental definitely requires more configurability than full refresh, so your implementation may deviate slightly depending on whether your cursor field is source defined or user-defined. If you think you are running into one of those cases, check out -our [incremental](../../understanding-airbyte/connections/incremental-append.md) documentation for more information on different types of +our [incremental](/using-airbyte/core-concepts/sync-modes/incremental-append.md) documentation for more information on different types of configuration. diff --git a/docs/connector-development/tutorials/build-a-connector-the-hard-way.md b/docs/connector-development/tutorials/build-a-connector-the-hard-way.md index fe2ea339bd51..9fb9a71aac70 100644 --- a/docs/connector-development/tutorials/build-a-connector-the-hard-way.md +++ b/docs/connector-development/tutorials/build-a-connector-the-hard-way.md @@ -57,7 +57,7 @@ Here's the outline of what we'll do to build our connector: Once we've completed the above steps, we will have built a functioning connector. Then, we'll add some optional functionality: -- Support [incremental sync](../../understanding-airbyte/connections/incremental-append.md) +- Support [incremental sync](/using-airbyte/core-concepts/sync-modes/incremental-append.md) - Add custom integration tests ### 1. Bootstrap the connector package diff --git a/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md b/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md index 711880cb0460..8cdee893e5ab 100644 --- a/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md +++ b/docs/connector-development/tutorials/cdk-tutorial-python-http/read-data.md @@ -132,7 +132,7 @@ To add incremental sync, we'll do a few things: 6. Update the `path` method to specify the date to pull exchange rates for. 7. Update the configured catalog to use `incremental` sync when we're testing the stream. -We'll describe what each of these methods do below. Before we begin, it may help to familiarize yourself with how incremental sync works in Airbyte by reading the [docs on incremental](../../../understanding-airbyte/connections/incremental-append.md). +We'll describe what each of these methods do below. Before we begin, it may help to familiarize yourself with how incremental sync works in Airbyte by reading the [docs on incremental](/using-airbyte/core-concepts/sync-modes/incremental-append.md). To keep things concise, we'll only show functions as we edit them one by one. diff --git a/docs/contributing-to-airbyte/README.md b/docs/contributing-to-airbyte/README.md index e2b9669e46ea..6683cd77fbb5 100644 --- a/docs/contributing-to-airbyte/README.md +++ b/docs/contributing-to-airbyte/README.md @@ -8,7 +8,7 @@ Thank you for your interest in contributing! We love community contributions. Read on to learn how to contribute to Airbyte. We appreciate first time contributors and we are happy to assist you in getting started. In case of questions, just reach out to us via [email](mailto:hey@airbyte.io) or [Slack](https://slack.airbyte.io)! -Before getting started, please review Airbyte's Code of Conduct. Everyone interacting in Slack, codebases, mailing lists, events, or other Airbyte activities is expected to follow [Code of Conduct](../project-overview/code-of-conduct.md). +Before getting started, please review Airbyte's Code of Conduct. Everyone interacting in Slack, codebases, mailing lists, events, or other Airbyte activities is expected to follow [Code of Conduct](../community/code-of-conduct.md). ## Code Contributions diff --git a/docs/contributing-to-airbyte/writing-docs.md b/docs/contributing-to-airbyte/writing-docs.md index 6e8e0b21081d..a0621d10e9e1 100644 --- a/docs/contributing-to-airbyte/writing-docs.md +++ b/docs/contributing-to-airbyte/writing-docs.md @@ -13,7 +13,7 @@ The Docs team maintains a list of [#good-first-issues](https://github.com/airbyt ## Contributing to Airbyte docs -Before contributing to Airbyte docs, read the Airbyte Community [Code of Conduct](../project-overview/code-of-conduct.md). +Before contributing to Airbyte docs, read the Airbyte Community [Code of Conduct](../community/code-of-conduct.md). :::tip If you're new to GitHub and Markdown, complete [the First Contributions tutorial](https://github.com/firstcontributions/first-contributions) and learn [Markdown basics](https://guides.github.com/features/mastering-markdown/) before contributing to Airbyte documentation. Even if you're familiar with the basics, you may be interested in Airbyte's [custom markdown extensions for connector docs](#custom-markdown-extensions-for-connector-docs). @@ -276,16 +276,7 @@ Eagle-eyed readers may note that _all_ markdown should support this feature sinc ### Adding a redirect -To add a redirect, open the [`docusaurus.config.js`](https://github.com/airbytehq/airbyte/blob/master/docusaurus/docusaurus.config.js#L22) file and locate the following commented section: - -```js -// { -// from: '/some-lame-path', -// to: '/a-much-cooler-uri', -// }, -``` - -Copy this section, replace the values, and [test the changes locally](#editing-on-your-local-machine) by going to the path you created a redirect for and verify that the address changes to the new one. +To add a redirect, open the [`docusaurus/redirects.yml`](https://github.com/airbytehq/airbyte/blob/master/docusaurus/redirects.yml) file and add an entry from which old path to which new path a redirect should happen. :::note Your path **needs** a leading slash `/` to work diff --git a/docs/deploying-airbyte/README.md b/docs/deploying-airbyte/README.md deleted file mode 100644 index 2f8a6e290a36..000000000000 --- a/docs/deploying-airbyte/README.md +++ /dev/null @@ -1,15 +0,0 @@ -# Deploy Airbyte where you want to - -![not all who wander are lost](https://user-images.githubusercontent.com/2591516/170351002-0d054d06-c901-4794-8719-97569060408f.png) - -- [Local Deployment](local-deployment.md) -- [On Airbyte Cloud](on-cloud.md) -- [On Aws](on-aws-ec2.md) -- [On Azure VM Cloud Shell](on-azure-vm-cloud-shell.md) -- [On Digital Ocean Droplet](on-digitalocean-droplet.md) -- [On GCP.md](on-gcp-compute-engine.md) -- [On Kubernetes](on-kubernetes-via-helm.md) -- [On OCI VM](on-oci-vm.md) -- [On Restack](on-restack.md) -- [On Plural](on-plural.md) -- [On AWS ECS (spoiler alert: it doesn't work)](on-aws-ecs.md) diff --git a/docs/deploying-airbyte/local-deployment.md b/docs/deploying-airbyte/local-deployment.md index ff94ad68c885..d3247a86668c 100644 --- a/docs/deploying-airbyte/local-deployment.md +++ b/docs/deploying-airbyte/local-deployment.md @@ -21,8 +21,8 @@ cd airbyte ./run-ab-platform.sh ``` -- In your browser, just visit [http://localhost:8000](http://localhost:8000) -- You will be asked for a username and password. By default, that's username `airbyte` and password `password`. Once you deploy Airbyte to your servers, be sure to change these: +- In your browser, visit [http://localhost:8000](http://localhost:8000) +- You will be asked for a username and password. By default, that's username `airbyte` and password `password`. Once you deploy Airbyte to your servers, be sure to change these in your `.env` file: ```yaml # Proxy Configuration @@ -66,5 +66,11 @@ bash run-ab-platform.sh - Start moving some data! ## Troubleshooting +If you have any questions about the local setup and deployment process, head over to our [Getting Started FAQ](https://github.com/airbytehq/airbyte/discussions/categories/questions) on our Airbyte Forum that answers the following questions and more: + +- How long does it take to set up Airbyte? +- Where can I see my data once I've run a sync? +- Can I set a start time for my sync? -If you encounter any issues, just connect to our [Slack](https://slack.airbyte.io). Our community will help! We also have a [troubleshooting](../troubleshooting.md) section in our docs for common problems. +If you encounter any issues, check out [Getting Support](/community/getting-support) documentation +for options how to get in touch with the community or us. diff --git a/docs/project-overview/licenses/README.md b/docs/developer-guides/licenses/README.md similarity index 100% rename from docs/project-overview/licenses/README.md rename to docs/developer-guides/licenses/README.md diff --git a/docs/project-overview/licenses/elv2-license.md b/docs/developer-guides/licenses/elv2-license.md similarity index 100% rename from docs/project-overview/licenses/elv2-license.md rename to docs/developer-guides/licenses/elv2-license.md diff --git a/docs/project-overview/licenses/examples.md b/docs/developer-guides/licenses/examples.md similarity index 100% rename from docs/project-overview/licenses/examples.md rename to docs/developer-guides/licenses/examples.md diff --git a/docs/project-overview/licenses/license-faq.md b/docs/developer-guides/licenses/license-faq.md similarity index 100% rename from docs/project-overview/licenses/license-faq.md rename to docs/developer-guides/licenses/license-faq.md diff --git a/docs/project-overview/licenses/mit-license.md b/docs/developer-guides/licenses/mit-license.md similarity index 100% rename from docs/project-overview/licenses/mit-license.md rename to docs/developer-guides/licenses/mit-license.md diff --git a/docs/enterprise-setup/self-managed/README.md b/docs/enterprise-setup/README.md similarity index 59% rename from docs/enterprise-setup/self-managed/README.md rename to docs/enterprise-setup/README.md index 21d5fedf047d..9bb1a95450fa 100644 --- a/docs/enterprise-setup/self-managed/README.md +++ b/docs/enterprise-setup/README.md @@ -1,12 +1,12 @@ -# Airbyte Self-Managed +# Airbyte Enterprise -[Airbyte Self-Managed](https://airbyte.com/product/airbyte-enterprise) is the best way to run Airbyte yourself. You get all 300+ pre-built connectors, data never leaves your environment, and Airbyte becomes self-serve in your organization with new tools to manage multiple users, and multiple teams using Airbyte all in one place. +[Airbyte Enterprise](https://airbyte.com/product/airbyte-enterprise) is the best way to run Airbyte yourself. You get all 300+ pre-built connectors, data never leaves your environment, and Airbyte becomes self-serve in your organization with new tools to manage multiple users, and multiple teams using Airbyte all in one place. -A valid license key is required to get started with Airbyte Self-Managed. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. +A valid license key is required to get started with Airbyte Enterprise. [Talk to sales](https://airbyte.com/company/talk-to-sales) to receive your license key. The following pages outline how to: -1. [Deploy Airbyte Self-Managed using Kubernetes](./implementation-guide.md) -2. [Configure Okta for Single Sign-On (SSO) with Airbyte Self-Managed](./sso.md) +1. [Deploy Airbyte Enterprise using Kubernetes](./implementation-guide.md) +2. [Configure Okta for Single Sign-On (SSO) with Airbyte Enterprise](./sso.md) | Feature | Description | |---------------------------|--------------------------------------------------------------------------------------------------------------| diff --git a/docs/enterprise-setup/self-managed/implementation-guide.md b/docs/enterprise-setup/implementation-guide.md similarity index 78% rename from docs/enterprise-setup/self-managed/implementation-guide.md rename to docs/enterprise-setup/implementation-guide.md index 882a024436bb..6affccf7709d 100644 --- a/docs/enterprise-setup/self-managed/implementation-guide.md +++ b/docs/enterprise-setup/implementation-guide.md @@ -3,15 +3,15 @@ import TabItem from '@theme/TabItem'; # Implementation Guide -[Airbyte Self-Managed](./README.md) is in an early access stage for select priority users. Once you [are qualified for an Airbyte Self Managed license key](https://airbyte.com/company/talk-to-sales), you can deploy Airbyte with the following instructions. +[Airbyte Enterprise](./README.md) is in an early access stage for select priority users. Once you [are qualified for an Airbyte Enterprise license key](https://airbyte.com/company/talk-to-sales), you can deploy Airbyte with the following instructions. -Airbyte Self Managed must be deployed using Kubernetes. This is to enable Airbyte's best performance and scale. The core components \(api server, scheduler, etc\) run as deployments while the scheduler launches connector-related pods on different nodes. +Airbyte Enterprise must be deployed using Kubernetes. This is to enable Airbyte's best performance and scale. The core components \(api server, scheduler, etc\) run as deployments while the scheduler launches connector-related pods on different nodes. ## Prerequisites -There are three prerequisites to deploying Self-Managed: installing [helm](https://helm.sh/docs/intro/install/), a Kubernetes cluster, and having configured `kubectl` to connect to the cluster. +There are three prerequisites to deploying Enterprise: installing [helm](https://helm.sh/docs/intro/install/), a Kubernetes cluster, and having configured `kubectl` to connect to the cluster. -For production, we recommend deploying to EKS, GKE or AKS. If you are doing some local testing, follow the cluster setup instructions outlined [here](../../deploying-airbyte/on-kubernetes-via-helm.md#cluster-setup). +For production, we recommend deploying to EKS, GKE or AKS. If you are doing some local testing, follow the cluster setup instructions outlined [here](/deploying-airbyte/on-kubernetes-via-helm.md#cluster-setup). To install `kubectl`, please follow [these instructions](https://kubernetes.io/docs/tasks/tools/). To configure `kubectl` to connect to your cluster by using `kubectl use-context my-cluster-name`, see the following: @@ -38,7 +38,7 @@ To install `kubectl`, please follow [these instructions](https://kubernetes.io/d
-## Deploy Airbyte Self-Managed +## Deploy Airbyte Enterprise ### Add Airbyte Helm Repository @@ -60,7 +60,7 @@ cp configs/airbyte.sample.yml configs/airbyte.yml 3. Add your Airbyte Enterprise license key to your `airbyte.yml`. -4. Add your [auth details](/enterprise-setup/self-managed/sso) to your `airbyte.yml`. Auth configurations aren't easy to modify after Airbyte is installed, so please double check them to make sure they're accurate before proceeding. +4. Add your [auth details](/enterprise-setup/sso) to your `airbyte.yml`. Auth configurations aren't easy to modify after Airbyte is installed, so please double check them to make sure they're accurate before proceeding.
Configuring auth in your airbyte.yml file @@ -81,7 +81,7 @@ To configure basic auth (deploy without SSO), remove the entire `auth:` section
-### Install Airbyte Self Managed +### Install Airbyte Enterprise Install Airbyte Enterprise on helm using the following command: @@ -92,7 +92,7 @@ Install Airbyte Enterprise on helm using the following command: The default release name is `airbyte-pro`. You can change this via the `RELEASE_NAME` environment variable. -### Customizing your Airbyte Self Managed Deployment +### Customizing your Airbyte Enterprise Deployment In order to customize your deployment, you need to create `values.yaml` file in a local folder and populate it with default configuration override values. A `values.yaml` example can be located in [charts/airbyte](https://github.com/airbytehq/airbyte-platform/blob/main/charts/airbyte/values.yaml) folder of the Airbyte repository. diff --git a/docs/enterprise-setup/self-managed/sso.md b/docs/enterprise-setup/sso.md similarity index 86% rename from docs/enterprise-setup/self-managed/sso.md rename to docs/enterprise-setup/sso.md index 55d7053736f7..8aede3304284 100644 --- a/docs/enterprise-setup/self-managed/sso.md +++ b/docs/enterprise-setup/sso.md @@ -6,7 +6,7 @@ Airbyte Self Managed currently supports SSO via OIDC with [Okta](https://www.okt The following instructions walk you through: 1. [Setting up the Okta OIDC App Integration to be used by your Airbyte instance](#setting-up-okta-for-sso) -2. [Configuring Airbyte Self-Managed to use SSO](#deploying-airbyte-enterprise-with-okta) +2. [Configuring Airbyte Enterprise to use SSO](#deploying-airbyte-enterprise-with-okta) ### Setting up Okta for SSO @@ -14,13 +14,13 @@ You will need to create a new Okta OIDC App Integration for your Airbyte instanc You should create an app integration with **OIDC - OpenID Connect** as the sign-in method and **Web Application** as the application type: -![Screenshot of Okta app integration creation modal](../assets/okta-create-new-app-integration.png) +![Screenshot of Okta app integration creation modal](./assets/okta-create-new-app-integration.png) #### App integration name Please choose a URL-friendly app integraiton name without spaces or special characters, such as `my-airbyte-app`: -![Screenshot of Okta app integration name](../assets/okta-app-integration-name.png) +![Screenshot of Okta app integration name](./assets/okta-app-integration-name.png) Spaces or special characters in this field could result in invalid redirect URIs. @@ -40,13 +40,13 @@ Sign-out redirect URIs /auth/realms/airbyte/broker//endpoint/logout_response ``` -![Okta app integration name screenshot](../assets/okta-login-redirect-uris.png) +![Okta app integration name screenshot](./assets/okta-login-redirect-uris.png) _Example values_ `` should point to where your Airbyte instance will be available, including the http/https protocol. -## Deploying Airbyte Self-Managed with Okta +## Deploying Airbyte Enterprise with Okta Once your Okta app is set up, you're ready to deploy Airbyte with SSO. Take note of the following configuration values, as you will need them to configure Airbyte to use your new Okta SSO app integration: diff --git a/docs/integrations/README.md b/docs/integrations/README.md index fef3f532394d..fe41578bacf5 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -10,7 +10,7 @@ Airbyte uses a two tiered system for connectors to help you understand what to e **Community**: A community connector is maintained by the Airbyte community until it becomes Certified. Airbyte has over 800 code contributors and 15,000 people in the Slack community to help. The Airbyte team is continually certifying Community connectors as usage grows. As these connectors are not maintained by Airbyte, we do not offer support SLAs around them, and we encourage caution when using them in production. -For more information about the system, see [Product Support Levels](https://docs.airbyte.com/project-overview/product-support-levels) +For more information about the system, see [Connector Support Levels](./connector-support-levels.md) _[View the connector registries in full](https://connectors.airbyte.com/files/generated_reports/connector_registry_report.html)_ diff --git a/docs/integrations/connector-support-levels.md b/docs/integrations/connector-support-levels.md new file mode 100644 index 000000000000..e684c1292b7c --- /dev/null +++ b/docs/integrations/connector-support-levels.md @@ -0,0 +1,39 @@ +# Connector Support Levels + +The following table describes the support levels of Airbyte connectors. + +| | Certified | Community | Custom | +| ------------------------------------ | ----------------------------------------- | ------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Availability** | Available to all users | Available to all users | Available to all users | +| **Who builds them?** | Either the community or the Airbyte team. | Typically they are built by the community. The Airbyte team may upgrade them to Certified at any time. | Anyone can build custom connectors. We recommend using our [Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview) or [Low-code CDK](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview). | +| **Who maintains them?** | The Airbyte team | Users | Users | +| **Production Readiness** | Guaranteed by Airbyte | Not guaranteed | Not guaranteed | +| **Support: Cloud** | Supported* | No Support | Supported** | +| **Support: Powered by Airbyte** | Supported* | No Support | Supported** | +| **Support: Self-Managed Enterprise** | Supported* | No Support | Supported** | +| **Support: Community (OSS)** | Slack Support only | No Support | Slack Support only | + +\*For Certified connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. Otherwise, please use our support portal and we will address your issues as soon as possible. + +\*\*For Custom connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. This support is provided with best efforts, and maintenance/upgrades are owned by the customer. + +## Certified + +A **Certified** connector is actively maintained and supported by the Airbyte team and maintains a high quality bar. It is production ready. + +### What you should know about Certified connectors: + +- Certified connectors are available to all users. +- These connectors have been tested and vetted in order to be certified and are production ready. +- Certified connectors should go through minimal breaking change but in the event an upgrade is needed users will be given an adequate upgrade window. + +## Community + +A **Community** connector is maintained by the Airbyte community until it becomes Certified. Airbyte has over 800 code contributors and 15,000 people in the Slack community to help. The Airbyte team is continually certifying Community connectors as usage grows. As these connectors are not maintained by Airbyte, we do not offer support SLAs around them, and we encourage caution when using them in production. + +### What you should know about Community connectors: + +- Community connectors are available to all users. +- Community connectors may be upgraded to Certified at any time, and we will notify users of these upgrades via our Slack Community and in our Connector Catalog. +- Community connectors might not be feature-complete (features planned for release are under development or not prioritized) and may include backward-incompatible/breaking API changes with no or short notice. +- Community connectors have no Support SLAs. diff --git a/docs/integrations/destinations/chroma.md b/docs/integrations/destinations/chroma.md index 3e37bebba225..f99a9cf869a5 100644 --- a/docs/integrations/destinations/chroma.md +++ b/docs/integrations/destinations/chroma.md @@ -17,7 +17,7 @@ Only one stream will exist to collect data from all source streams. This will be For each record, a UUID string is generated and used as the document id. The embeddings generated as defined will be stored as embeddings. Data in the text fields will be stored as documents and those in the metadata fields will be stored as metadata. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) You can connect to a Chroma instance either in client/server mode or in a local persistent mode. For the local persistent mode, the database file will be saved in the path defined in the `path` config parameter. Note that `path` must be an absolute path, prefixed with `/local`. diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md index 75da81407f48..02446ba825f6 100644 --- a/docs/integrations/destinations/clickhouse.md +++ b/docs/integrations/destinations/clickhouse.md @@ -21,7 +21,7 @@ Each stream will be output into its own table in ClickHouse. Each table will con Airbyte Cloud only supports connecting to your ClickHouse instance with SSL or TLS encryption, which is supported by [ClickHouse JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc). -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/destinations/csv.md b/docs/integrations/destinations/csv.md index 4cc00f440c79..223c618b8f8b 100644 --- a/docs/integrations/destinations/csv.md +++ b/docs/integrations/destinations/csv.md @@ -69,7 +69,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path}/{filename}.csv . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. ## Changelog diff --git a/docs/integrations/destinations/databend.md b/docs/integrations/destinations/databend.md index e25a80f7ec88..444a47473a6d 100644 --- a/docs/integrations/destinations/databend.md +++ b/docs/integrations/destinations/databend.md @@ -20,7 +20,7 @@ Each stream will be output into its own table in Databend. Each table will conta ## Getting Started (Airbyte Cloud) Coming soon... -## Getting Started (Airbyte Open-Source) +## Getting Started (Airbyte Open Source) You can follow the [Connecting to a Warehouse docs](https://docs.databend.com/using-databend-cloud/warehouses/connecting-a-warehouse) to get the user, password, host etc. Or you can create such a user by running: diff --git a/docs/integrations/destinations/duckdb.md b/docs/integrations/destinations/duckdb.md index fa87f65038b9..078006e75f54 100644 --- a/docs/integrations/destinations/duckdb.md +++ b/docs/integrations/destinations/duckdb.md @@ -98,7 +98,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path} . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. diff --git a/docs/integrations/destinations/gcs.md b/docs/integrations/destinations/gcs.md index df8405a3448d..f272b77a9d6c 100644 --- a/docs/integrations/destinations/gcs.md +++ b/docs/integrations/destinations/gcs.md @@ -13,7 +13,7 @@ The Airbyte GCS destination allows you to sync data to cloud storage buckets. Ea | Feature | Support | Notes | | :----------------------------- | :-----: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/understanding-airbyte/connections/incremental-append#inclusive-cursors) | +| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/using-airbyte/core-concepts/sync-modes/incremental-append#inclusive-cursors) | | Incremental - Append + Deduped | ❌ | | | Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | diff --git a/docs/integrations/destinations/local-json.md b/docs/integrations/destinations/local-json.md index 11870a8d5177..45ddda3fb757 100644 --- a/docs/integrations/destinations/local-json.md +++ b/docs/integrations/destinations/local-json.md @@ -69,7 +69,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path}/{filename}.jsonl . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. ## Changelog diff --git a/docs/integrations/destinations/mongodb.md b/docs/integrations/destinations/mongodb.md index 51bd94cb8c46..6df8e95f929c 100644 --- a/docs/integrations/destinations/mongodb.md +++ b/docs/integrations/destinations/mongodb.md @@ -25,7 +25,7 @@ Each stream will be output into its own collection in MongoDB. Each collection w Airbyte Cloud only supports connecting to your MongoDB instance with TLS encryption. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/destinations/mssql.md b/docs/integrations/destinations/mssql.md index c48261be1a0b..2a4bfd50bf5a 100644 --- a/docs/integrations/destinations/mssql.md +++ b/docs/integrations/destinations/mssql.md @@ -33,7 +33,7 @@ Airbyte Cloud only supports connecting to your MSSQL instance with TLS encryptio | Incremental - Append + Deduped | Yes | | | Namespaces | Yes | | -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) ### Requirements diff --git a/docs/integrations/destinations/mysql.md b/docs/integrations/destinations/mysql.md index 3ade0339ed56..469d24d4fa59 100644 --- a/docs/integrations/destinations/mysql.md +++ b/docs/integrations/destinations/mysql.md @@ -27,7 +27,7 @@ Each stream will be output into its own table in MySQL. Each table will contain Airbyte Cloud only supports connecting to your MySQL instance with TLS encryption. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) ### Requirements diff --git a/docs/integrations/destinations/oracle.md b/docs/integrations/destinations/oracle.md index 2b26a69cbf6c..d2e9867eb04a 100644 --- a/docs/integrations/destinations/oracle.md +++ b/docs/integrations/destinations/oracle.md @@ -26,7 +26,7 @@ Enabling normalization will also create normalized, strongly typed tables. The Oracle connector is currently in Alpha on Airbyte Cloud. Only TLS encrypted connections to your DB can be made from Airbyte Cloud. Other than that, follow the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/destinations/rockset.md b/docs/integrations/destinations/rockset.md index 0ab1709a68b6..bf685f3e4ce9 100644 --- a/docs/integrations/destinations/rockset.md +++ b/docs/integrations/destinations/rockset.md @@ -23,7 +23,7 @@ | api_server | string | api URL to rockset, specifying http protocol | | workspace | string | workspace under which rockset collections will be added/modified | -## Getting Started \(Airbyte Open-Source / Airbyte Cloud\) +## Getting Started \(Airbyte Open Source / Airbyte Cloud\) #### Requirements diff --git a/docs/integrations/destinations/s3-glue.md b/docs/integrations/destinations/s3-glue.md index 5e66cf7d6e70..f588bc1b424b 100644 --- a/docs/integrations/destinations/s3-glue.md +++ b/docs/integrations/destinations/s3-glue.md @@ -178,7 +178,7 @@ A data sync may create multiple files as the output files can be partitioned by | Feature | Support | Notes | | :----------------------------- | :-----: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/understanding-airbyte/connections/incremental-append#inclusive-cursors) | +| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/using-airbyte/core-concepts/sync-modes/incremental-append#inclusive-cursors) | | Incremental - Append + Deduped | ❌ | | | Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index 209b52a7bd31..81f796cae883 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -174,7 +174,7 @@ A data sync may create multiple files as the output files can be partitioned by | Feature | Support | Notes | | :----------------------------- | :-----: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | Full Refresh Sync | ✅ | Warning: this mode deletes all previously synced data in the configured bucket path. | -| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/understanding-airbyte/connections/incremental-append#inclusive-cursors) | +| Incremental - Append Sync | ✅ | Warning: Airbyte provides at-least-once delivery. Depending on your source, you may see duplicated data. Learn more [here](/using-airbyte/core-concepts/sync-modes/incremental-append#inclusive-cursors) | | Incremental - Append + Deduped | ❌ | | | Namespaces | ❌ | Setting a specific bucket path is equivalent to having separate namespaces. | diff --git a/docs/integrations/destinations/sqlite.md b/docs/integrations/destinations/sqlite.md index eb266b61eee8..f5c2a3193780 100644 --- a/docs/integrations/destinations/sqlite.md +++ b/docs/integrations/destinations/sqlite.md @@ -68,7 +68,7 @@ You can also copy the output file to your host machine, the following command wi docker cp airbyte-server:/tmp/airbyte_local/{destination_path} . ``` -Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](../../operator-guides/locating-files-local-destination.md) for an alternative approach. +Note: If you are running Airbyte on Windows with Docker backed by WSL2, you have to use similar step as above or refer to this [link](/integrations/locating-files-local-destination.md) for an alternative approach. ## Changelog diff --git a/docs/integrations/destinations/timeplus.md b/docs/integrations/destinations/timeplus.md index dcf43cc48225..d883fc1b3726 100644 --- a/docs/integrations/destinations/timeplus.md +++ b/docs/integrations/destinations/timeplus.md @@ -16,7 +16,7 @@ Each stream will be output into its own stream in Timeplus, with corresponding s ## Getting Started (Airbyte Cloud) Coming soon... -## Getting Started (Airbyte Open-Source) +## Getting Started (Airbyte Open Source) You can follow the [Quickstart with Timeplus Ingestion API](https://docs.timeplus.com/quickstart-ingest-api) to createa a workspace and API key. ### Setup the Timeplus Destination in Airbyte diff --git a/docs/integrations/getting-started/destination-redshift.md b/docs/integrations/getting-started/destination-redshift.md deleted file mode 100644 index ae59b0eeff95..000000000000 --- a/docs/integrations/getting-started/destination-redshift.md +++ /dev/null @@ -1,70 +0,0 @@ -# Getting Started: Destination Redshift - -## Requirements - -1. Active Redshift cluster -2. Allow connections from Airbyte to your Redshift cluster \(if they exist in separate VPCs\) -3. A staging S3 bucket with credentials \(for the COPY strategy\). - -## Setup guide - -### 1. Make sure your cluster is active and accessible from the machine running Airbyte - -This is dependent on your networking setup. The easiest way to verify if Airbyte is able to connect to your Redshift cluster is via the check connection tool in the UI. You can check AWS Redshift documentation with a tutorial on how to properly configure your cluster's access [here](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-authorize-cluster-access.html) - -### 2. Fill up connection info - -Next is to provide the necessary information on how to connect to your cluster such as the `host` whcih is part of the connection string or Endpoint accessible [here](https://docs.aws.amazon.com/redshift/latest/gsg/rs-gsg-connect-to-cluster.html#rs-gsg-how-to-get-connection-string) without the `port` and `database` name \(it typically includes the cluster-id, region and end with `.redshift.amazonaws.com`\). - -You should have all the requirements needed to configure Redshift as a destination in the UI. You'll need the following information to configure the destination: - -* **Host** -* **Port** -* **Username** -* **Password** -* **Schema** -* **Database** - * This database needs to exist within the cluster provided. - -### 2a. Fill up S3 info \(for COPY strategy\) - -Provide the required S3 info. - -* **S3 Bucket Name** - * See [this](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) to create an S3 bucket. -* **S3 Bucket Region** - * Place the S3 bucket and the Redshift cluster in the same region to save on networking costs. -* **Access Key Id** - * See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. - * We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html) to objects in the staging bucket. -* **Secret Access Key** - * Corresponding key to the above key id. -* **Part Size** - * Affects the size limit of an individual Redshift table. Optional. Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default table limit of 100GB. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. - -Optional parameters: -* **Bucket Path** - * The directory within the S3 bucket to place the staging data. For example, if you set this to `yourFavoriteSubdirectory`, staging data will be placed inside `s3://yourBucket/yourFavoriteSubdirectory`. If not provided, defaults to the root directory. - -## Notes about Redshift Naming Conventions - -From [Redshift Names & Identifiers](https://docs.aws.amazon.com/redshift/latest/dg/r_names.html): - -### Standard Identifiers - -* Begin with an ASCII single-byte alphabetic character or underscore character, or a UTF-8 multibyte character two to four bytes long. -* Subsequent characters can be ASCII single-byte alphanumeric characters, underscores, or dollar signs, or UTF-8 multibyte characters two to four bytes long. -* Be between 1 and 127 bytes in length, not including quotation marks for delimited identifiers. -* Contain no quotation marks and no spaces. - -### Delimited Identifiers - -Delimited identifiers \(also known as quoted identifiers\) begin and end with double quotation marks \("\). If you use a delimited identifier, you must use the double quotation marks for every reference to that object. The identifier can contain any standard UTF-8 printable characters other than the double quotation mark itself. Therefore, you can create column or table names that include otherwise illegal characters, such as spaces or the percent symbol. ASCII letters in delimited identifiers are case-insensitive and are folded to lowercase. To use a double quotation mark in a string, you must precede it with another double quotation mark character. - -Therefore, Airbyte Redshift destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special characters. - -## Data Size Limitations - -Redshift specifies a maximum limit of 65535 bytes to store the raw JSON record data. Thus, when a row is too big to fit, the Redshift destination fails to load such data and currently ignores that record. - -For more information, see the [docs here.](https://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html) diff --git a/docs/integrations/getting-started/source-github.md b/docs/integrations/getting-started/source-github.md deleted file mode 100644 index 6ae7f442aade..000000000000 --- a/docs/integrations/getting-started/source-github.md +++ /dev/null @@ -1,12 +0,0 @@ -## Getting Started: Source GitHub - -### Requirements - -* Github Account -* Github Personal Access Token wih the necessary permissions \(described below\) - -### Setup guide - -Log into Github and then generate a [personal access token](https://github.com/settings/tokens). - -Your token should have at least the `repo` scope. Depending on which streams you want to sync, the user generating the token needs more permissions: diff --git a/docs/integrations/getting-started/source-google-ads.md b/docs/integrations/getting-started/source-google-ads.md deleted file mode 100644 index f1558cddf335..000000000000 --- a/docs/integrations/getting-started/source-google-ads.md +++ /dev/null @@ -1,42 +0,0 @@ -# Getting Started: Source Google Ads - -## Requirements - -Google Ads Account with an approved Developer Token \(note: In order to get API access to Google Ads, you must have a "manager" account. This must be created separately from your standard account. You can find more information about this distinction in the [google ads docs](https://ads.google.com/home/tools/manager-accounts/).\) - -* developer_token -* client_id -* client_secret -* refresh_token -* start_date -* customer_id - -## Setup guide - -This guide will provide information as if starting from scratch. Please skip over any steps you have already completed. - -* Create an Google Ads Account. Here are [Google's instruction](https://support.google.com/google-ads/answer/6366720) on how to create one. -* Create an Google Ads MANAGER Account. Here are [Google's instruction](https://ads.google.com/home/tools/manager-accounts/) on how to create one. -* You should now have two Google Ads accounts: a normal account and a manager account. Link the Manager account to the normal account following [Google's documentation](https://support.google.com/google-ads/answer/7459601). -* Apply for a developer token \(**make sure you follow our** [**instructions**](#how-to-apply-for-the-developer-token)\) on your Manager account. This token allows you to access your data from the Google Ads API. Here are [Google's instructions](https://developers.google.com/google-ads/api/docs/first-call/dev-token). The docs are a little unclear on this point, but you will _not_ be able to access your data via the Google Ads API until this token is approved. You cannot use a test developer token, it has to be at least a basic developer token. It usually takes Google 24 hours to respond to these applications. This developer token is the value you will use in the `developer_token` field. -* Fetch your `client_id`, `client_secret`, and `refresh_token`. Google provides [instructions](https://developers.google.com/google-ads/api/docs/first-call/overview) on how to do this. -* Select your `customer_id`. The `customer_is` refer to the id of each of your Google Ads accounts. This is the 10 digit number in the top corner of the page when you are in google ads ui. The source will only pull data from the accounts for which you provide an id. If you are having trouble finding it, check out [Google's instructions](https://support.google.com/google-ads/answer/1704344). - -Wow! That was a lot of steps. We are working on making the OAuth flow for all of our connectors simpler \(allowing you to skip needing to get a `developer_token` and a `refresh_token` which are the most painful / time-consuming steps in this walkthrough\). - -## How to apply for the developer token - -Google is very picky about which software and which use case can get access to a developer token. The Airbyte team has worked with the Google Ads team to whitelist Airbyte and make sure you can get one \(see [issue 1981](https://github.com/airbytehq/airbyte/issues/1981) for more information\). - -When you apply for a token, you need to mention: - -* Why you need the token \(eg: want to run some internal analytics...\) -* That you will be using the Airbyte Open Source project -* That you have full access to the code base \(because we're open source\) -* That you have full access to the server running the code \(because you're self-hosting Airbyte\) - -If for any reason the request gets denied, let us know and we will be able to unblock you. - -## Understanding Google Ads Query Language - -The Google Ads Query Language can query the Google Ads API. Check out [Google Ads Query Language](https://developers.google.com/google-ads/api/docs/query/overview) diff --git a/docs/operator-guides/locating-files-local-destination.md b/docs/integrations/locating-files-local-destination.md similarity index 98% rename from docs/operator-guides/locating-files-local-destination.md rename to docs/integrations/locating-files-local-destination.md index e514f3a92ebd..d401d7952455 100644 --- a/docs/operator-guides/locating-files-local-destination.md +++ b/docs/integrations/locating-files-local-destination.md @@ -1,3 +1,7 @@ +--- +displayed_sidebar: docs +--- + # Windows - Browsing Local File Output ## Overview diff --git a/docs/integrations/missing-an-integration.md b/docs/integrations/missing-an-integration.md deleted file mode 100644 index e52613182866..000000000000 --- a/docs/integrations/missing-an-integration.md +++ /dev/null @@ -1,14 +0,0 @@ -# Missing an Integration? - -If you'd like to ask for a new connector, or build a new connectors and make them part of the pool of pre-built connectors on Airbyte, first a big thank you. We invite you to check our [contributing guide](../contributing-to-airbyte/). - -If you'd like to build new connectors, or update existing ones, for your own usage, without contributing to the Airbyte codebase, read along. - -## Developing your own connectors - -It's easy to code your own integrations on Airbyte. Here are some links to instruct on how to code new sources and destinations. - -* [Building new connectors](../contributing-to-airbyte/README.md) - -While the guides above are specific to the languages used most frequently to write integrations, **Airbyte integrations can be written in any language**. Please reach out to us if you'd like help developing integrations in other languages. - diff --git a/docs/integrations/sources/dv-360.md b/docs/integrations/sources/dv-360.md index 9e4341f1d847..b3c095f4691c 100644 --- a/docs/integrations/sources/dv-360.md +++ b/docs/integrations/sources/dv-360.md @@ -36,7 +36,7 @@ Available filters and metrics are provided in this [page](https://developers.goo 3. Fill out a start date, and optionally, an end date and filters (check the [Queries documentation](https://developers.google.com/bid-manager/v1.1/queries)) . 4. You're done. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/sources/e2e-test-cloud.md b/docs/integrations/sources/e2e-test-cloud.md index be70af977245..633e65c3e548 100644 --- a/docs/integrations/sources/e2e-test-cloud.md +++ b/docs/integrations/sources/e2e-test-cloud.md @@ -2,7 +2,7 @@ ## Overview -This is a mock source for testing the Airbyte pipeline. It can generate arbitrary data streams. It is a subset of what is in [End-to-End Testing Source](e2e-test.md) in Open-Source to avoid Airbyte Cloud users accidentally in curring a huge bill. +This is a mock source for testing the Airbyte pipeline. It can generate arbitrary data streams. It is a subset of what is in [End-to-End Testing Source](e2e-test.md) in Open Source to avoid Airbyte Cloud users accidentally in curring a huge bill. ## Mode diff --git a/docs/integrations/sources/google-analytics-v4.md b/docs/integrations/sources/google-analytics-v4.md index 835d1d324df5..85538f77acef 100644 --- a/docs/integrations/sources/google-analytics-v4.md +++ b/docs/integrations/sources/google-analytics-v4.md @@ -104,7 +104,7 @@ The Google Analytics (Universal Analytics) source connector can sync the followi Reach out to us on Slack or [create an issue](https://github.com/airbytehq/airbyte/issues) if you need to send custom Google Analytics report data with Airbyte. -## Rate Limits and Performance Considerations \(Airbyte Open-Source\) +## Rate Limits and Performance Considerations \(Airbyte Open Source\) [Analytics Reporting API v4](https://developers.google.com/analytics/devguides/reporting/core/v4/limits-quotas) diff --git a/docs/integrations/sources/google-directory.md b/docs/integrations/sources/google-directory.md index b0e570f7544f..d263d9efc93e 100644 --- a/docs/integrations/sources/google-directory.md +++ b/docs/integrations/sources/google-directory.md @@ -40,7 +40,7 @@ This connector attempts to back off gracefully when it hits Directory API's rate 1. Click `OAuth2.0 authorization` then `Authenticate your Google Directory account`. 2. You're done. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) Google APIs use the OAuth 2.0 protocol for authentication and authorization. This connector supports [Web server application](https://developers.google.com/identity/protocols/oauth2#webserver) and [Service accounts](https://developers.google.com/identity/protocols/oauth2#serviceaccount) scenarios. Therefore, there are 2 options of setting up authorization for this source: diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index 391c7cfbed48..c73999857959 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -25,7 +25,7 @@ Note: Currently hierarchyid and sql_variant are not processed in CDC migration t On Airbyte Cloud, only TLS connections to your MSSQL instance are supported in source configuration. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/sources/my-hours.md b/docs/integrations/sources/my-hours.md index 66ae44d7bc2d..f58aa7899378 100644 --- a/docs/integrations/sources/my-hours.md +++ b/docs/integrations/sources/my-hours.md @@ -24,7 +24,7 @@ This source allows you to synchronize the following data tables: **Requirements** In order to use the My Hours API you need to provide the credentials to an admin My Hours account. -### Performance Considerations (Airbyte Open-Source) +### Performance Considerations (Airbyte Open Source) Depending on the amount of team members and time logs the source provides a property to change the pagination size for the time logs query. Typically a pagination of 30 days is a correct balance between reliability and speed. But if you have a big amount of monthly entries you might want to change this value to a lower value. diff --git a/docs/integrations/sources/mysql.md b/docs/integrations/sources/mysql.md index f75d347df8f6..9f5c110266c7 100644 --- a/docs/integrations/sources/mysql.md +++ b/docs/integrations/sources/mysql.md @@ -91,7 +91,7 @@ To fill out the required information: #### Step 4: (Airbyte Cloud Only) Allow inbound traffic from Airbyte IPs. If you are on Airbyte Cloud, you will always need to modify your database configuration to allow inbound traffic from Airbyte IPs. You can find a list of all IPs that need to be allowlisted in -our [Airbyte Security docs](../../../operator-guides/security#network-security-1). +our [Airbyte Security docs](../../operating-airbyte/security#network-security-1). Now, click `Set up source` in the Airbyte UI. Airbyte will now test connecting to your database. Once this succeeds, you've configured an Airbyte MySQL source! diff --git a/docs/integrations/sources/oracle.md b/docs/integrations/sources/oracle.md index 1e81b7c73fed..e4493f950b19 100644 --- a/docs/integrations/sources/oracle.md +++ b/docs/integrations/sources/oracle.md @@ -20,7 +20,7 @@ The Oracle source does not alter the schema present in your database. Depending On Airbyte Cloud, only TLS connections to your Oracle instance are supported. Other than that, you can proceed with the open-source instructions below. -## Getting Started \(Airbyte Open-Source\) +## Getting Started \(Airbyte Open Source\) #### Requirements diff --git a/docs/integrations/sources/pokeapi.md b/docs/integrations/sources/pokeapi.md index 4290a6073023..ee543b33e024 100644 --- a/docs/integrations/sources/pokeapi.md +++ b/docs/integrations/sources/pokeapi.md @@ -4,7 +4,7 @@ The PokéAPI is primarly used as a tutorial and educational resource, as it requires zero dependencies. Learn how Airbyte and this connector works with these tutorials: -- [Airbyte Quickstart: An Introduction to Deploying and Syncing](../../quickstart/deploy-airbyte.md) +- [Airbyte Quickstart: An Introduction to Deploying and Syncing](../../using-airbyte/getting-started/readme.md) - [Airbyte CDK Speedrun: A Quick Primer on Building Source Connectors](../../connector-development/tutorials/cdk-speedrun.md) - [How to Build ETL Sources in Under 30 Minutes: A Video Tutorial](https://www.youtube.com/watch?v=kJ3hLoNfz_E&t=13s&ab_channel=Airbyte) @@ -24,7 +24,7 @@ This source uses the fully open [PokéAPI](https://pokeapi.co/docs/v2#info) to s Currently, only one output stream is available from this source, which is the Pokémon output stream. This schema is defined [here](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-pokeapi/source_pokeapi/schemas/pokemon.json). -## Rate Limiting & Performance Considerations \(Airbyte Open-Source\) +## Rate Limiting & Performance Considerations \(Airbyte Open Source\) According to the API's [fair use policy](https://pokeapi.co/docs/v2#fairuse), please make sure to cache resources retrieved from the PokéAPI wherever possible. That said, the PokéAPI does not perform rate limiting. diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 6c09d3aabd75..2d31f7286bb0 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -54,7 +54,7 @@ To fill out the required information: #### Step 3: (Airbyte Cloud Only) Allow inbound traffic from Airbyte IPs. If you are on Airbyte Cloud, you will always need to modify your database configuration to allow inbound traffic from Airbyte IPs. You can find a list of all IPs that need to be allowlisted in -our [Airbyte Security docs](../../../operator-guides/security#network-security-1). +our [Airbyte Security docs](../../operating-airbyte/security#network-security-1). Now, click `Set up source` in the Airbyte UI. Airbyte will now test connecting to your database. Once this succeeds, you've configured an Airbyte Postgres source! diff --git a/docs/integrations/sources/postgres/cloud-sql-postgres.md b/docs/integrations/sources/postgres/cloud-sql-postgres.md index 9a3f9e6e01a0..670d268f82d3 100644 --- a/docs/integrations/sources/postgres/cloud-sql-postgres.md +++ b/docs/integrations/sources/postgres/cloud-sql-postgres.md @@ -58,7 +58,7 @@ If you are on Airbyte Cloud, you will always need to modify your database config ![Add a Network](./assets/airbyte_cloud_sql_postgres_add_network.png) -2. Add a new network, and enter the Airbyte's IPs, which you can find in our [Airbyte Security documentation](../../../operator-guides/security#network-security-1). +2. Add a new network, and enter the Airbyte's IPs, which you can find in our [Airbyte Security documentation](../../../operating-airbyte/security#network-security-1). Now, click `Set up source` in the Airbyte UI. Airbyte will now test connecting to your database. Once this succeeds, you've configured an Airbyte Postgres source! diff --git a/docs/operator-guides/security.md b/docs/operating-airbyte/security.md similarity index 97% rename from docs/operator-guides/security.md rename to docs/operating-airbyte/security.md index a887e8bd5b91..7f1b10973bd6 100644 --- a/docs/operator-guides/security.md +++ b/docs/operating-airbyte/security.md @@ -1,4 +1,4 @@ -# Airbyte Security +# Security Airbyte is committed to keeping your data safe by following industry-standard practices for securing physical deployments, setting access policies, and leveraging the security features of leading Cloud providers. @@ -142,7 +142,7 @@ Airbyte Cloud allows you to log in to the platform using your email and password ### Access Control -Airbyte Cloud supports [user management](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace#add-users-to-your-workspace) but doesn’t support role-based access control (RBAC) yet. +Airbyte Cloud supports [user management](/using-airbyte/workspaces.md#add-users-to-your-workspace) but doesn’t support role-based access control (RBAC) yet. ### Compliance diff --git a/docs/operator-guides/browsing-output-logs.md b/docs/operator-guides/browsing-output-logs.md index 456965c21904..19de2cdcb6b6 100644 --- a/docs/operator-guides/browsing-output-logs.md +++ b/docs/operator-guides/browsing-output-logs.md @@ -1,29 +1,49 @@ -# Browsing Output Logs +# Browsing Logs ## Overview -This tutorial will describe how to explore Airbyte Workspace folders. +Airbyte records the full logs as a part of each sync. These logs can be used to understand the underlying operations Airbyte performs to read data from the source and write to the destination as a part of the [Airbyte Protocol](/understanding-airbyte/airbyte-protocol.md). The logs includes many details, including any errors that can be helpful when troubleshooting sync errors. -This is useful if you need to browse the docker volumes where extra output files of Airbyte server and workers are stored since they may not be accessible through the UI. +:::info +When using Airbyte Open Source, you can also access additional logs outside of the UI. This is useful if you need to browse the Docker volumes where extra output files of Airbyte server and workers are stored. +::: + +To find the logs for a connection, navigate to a connection's `Job History` tab to see the latest syncs. + +## View the logs in the UI +To open the logs in the UI, select the three grey dots next to a sync and select `View logs`. This will open our full screen in-app log viewer. + +:::tip +If you are troubleshooting a sync error, you can search for `Error`, `Exception`, or `Fail` to find common errors. +::: -## Exploring the Logs folders +The in-app log viewer will only search for instances of the search term within that attempt. To search across all attempts, download the logs locally. -When running a Sync in Airbyte, you have the option to look at the logs in the UI as shown next. +## Link to a sync job +To help others quickly find your job, copy the link to the logs to your clipboard, select the three grey dots next to a sync and select `Copy link to job`. -### Identifying Workspace IDs +You can also access the link to a sync job from the in-app log viewer. + +## Download the logs +To download a copy of the logs locally, select the three grey dots next to a sync and select `Download logs`. + +You can also access the download log button from the in-app log viewer. + +:::note +If a sync was completed across multiple attempts, downloading the logs will union all the logs for all attempts for that job. +::: -In the screenshot below, you can notice the highlighted blue boxes are showing the id numbers that were used for the selected "Attempt" for this sync job. +## Exploring Local Logs -In this case, the job was running in `/tmp/workspace/9/2/` folder since the tab of the third attempt is being selected in the UI \(first attempt would be `/tmp/workspace/9/0/`\). + -![](../.gitbook/assets/explore_logs.png) +### Establish the folder directory -The highlighted button in the red circle on the right would allow you to download the logs.log file. -However, there are actually more files being recorded in the same workspace folder... Thus, we might want to dive deeper to explore these folders and gain a better understanding of what is being run by Airbyte. +In the UI, you can discover the Attempt ID within the sync job. Most jobs will complete in the first attempt, so your folder directory will look like `/tmp/workspace/9/0`. If you sync job completes in multiple attempts, you'll need to define which attempt you're interested in, and note this. For example, for the third attempt, it will look like `/tmp/workspace/9/2/` . ### Understanding the Docker run commands -Scrolling down a bit more, we can also read the different docker commands being used internally are starting with: +We can also read the different docker commands being used internally are starting with: ```text docker run --rm -i -v airbyte_workspace:/data -v /tmp/airbyte_local:/local -w /data/9/2 --network host ... @@ -35,7 +55,7 @@ Following [Docker Volume documentation](https://docs.docker.com/storage/volumes/ ### Opening a Unix shell prompt to browse the Docker volume -For example, we can run any docker container/image to browse the content of this named volume by mounting it similarly, let's use the [busybox](https://hub.docker.com/_/busybox) image. +For example, we can run any docker container/image to browse the content of this named volume by mounting it similarly. In the example below, the [busybox](https://hub.docker.com/_/busybox) image is used. ```text docker run -it --rm --volume airbyte_workspace:/data busybox @@ -50,13 +70,15 @@ ls /data/9/2/ Example Output: ```text -catalog.json normalize tap_config.json -logs.log singer_rendered_catalog.json target_config.json +catalog.json +tap_config.json +logs.log +target_config.json ``` ### Browsing from the host shell -Or, if you don't want to transfer to a shell prompt inside the docker image, you can simply run Shell commands using docker commands as a proxy like this: +Or, if you don't want to transfer to a shell prompt inside the docker image, you can run Shell commands using docker commands as a proxy: ```bash docker run -it --rm --volume airbyte_workspace:/data busybox ls /data/9/2 @@ -81,7 +103,7 @@ docker run -it --rm --volume airbyte_workspace:/data busybox cat /data/9/2/catal Example Output: ```text -{"streams":[{"stream":{"name":"exchange_rate","json_schema":{"type":"object","properties":{"CHF":{"type":"number"},"HRK":{"type":"number"},"date":{"type":"string"},"MXN":{"type":"number"},"ZAR":{"type":"number"},"INR":{"type":"number"},"CNY":{"type":"number"},"THB":{"type":"number"},"AUD":{"type":"number"},"ILS":{"type":"number"},"KRW":{"type":"number"},"JPY":{"type":"number"},"PLN":{"type":"number"},"GBP":{"type":"number"},"IDR":{"type":"number"},"HUF":{"type":"number"},"PHP":{"type":"number"},"TRY":{"type":"number"},"RUB":{"type":"number"},"HKD":{"type":"number"},"ISK":{"type":"number"},"EUR":{"type":"number"},"DKK":{"type":"number"},"CAD":{"type":"number"},"MYR":{"type":"number"},"USD":{"type":"number"},"BGN":{"type":"number"},"NOK":{"type":"number"},"RON":{"type":"number"},"SGD":{"type":"number"},"CZK":{"type":"number"},"SEK":{"type":"number"},"NZD":{"type":"number"},"BRL":{"type":"number"}}},"supported_sync_modes":["full_refresh"],"default_cursor_field":[]},"sync_mode":"full_refresh","cursor_field":[]}]} +{"streams":[{"stream":{"name":"exchange_rate","json_schema":{"type":"object","properties":{"CHF":{"type":"number"},"HRK":{"type":"number"},"date":{"type":"string"},"MXN":{"type":"number"},"ZAR":{"type":"number"},"INR":{"type":"number"},"CNY":{"type":"number"},"THB":{"type":"number"},"NZD":{"type":"number"},"BRL":{"type":"number"}}},"supported_sync_modes":["full_refresh"],"default_cursor_field":[]},"sync_mode":"full_refresh","cursor_field":[]}]} ``` ### Extract catalog.json file from docker volume diff --git a/docs/operator-guides/configuring-sync-notifications.md b/docs/operator-guides/configuring-sync-notifications.md deleted file mode 100644 index 6418aa2ffab5..000000000000 --- a/docs/operator-guides/configuring-sync-notifications.md +++ /dev/null @@ -1,55 +0,0 @@ -# Configuring Sync Notifications - -## Overview - -You can set up Airbyte to notify you when syncs have **failed** or **succeeded**. This is achieved through a webhook, a URL that you can input into other applications to get real time data from Airbyte. - -## Set up Slack Notifications on Sync Status - -If you're more of a visual learner, just head over to [this video](https://www.youtube.com/watch?v=NjYm8F-KiFc&ab_channel=Airbyte) to learn how to do this. Otherwise, keep reading! - -**Set up the bot.** - -Navigate to https://api.slack.com/apps/. Hit `Create an App`. - -![](../.gitbook/assets/notifications_create_slack_app.png) - -Then click `From scratch`. Enter your App Name (e.g. Airbyte Sync Notifications) and pick your desired Slack workspace. - -**Set up the webhook URL.** - -Now on the left sidebar, click on `Incoming Webhooks`. - -![](../.gitbook/assets/notifications_incoming_webhooks.png) - -Click the slider button in the top right to turn the feature on. Then click `Add New Webhook to Workspace`. - -![](../.gitbook/assets/notifications_add_new_webhook.png) - -Pick the channel that you want to receive Airbyte notifications in (ideally a dedicated one), and click `Allow` to give it permissions to access the channel. You should see the bot show up in the selected channel now. - -Now you should see an active webhook right above the `Add New Webhook to Workspace` button. - -![](../.gitbook/assets/notifications_webhook_url.png) - -Click `Copy.` - -**Add the webhook to Airbyte.** - -Assuming you have a [running instance of Airbyte](../deploying-airbyte/README.md), we can navigate to the UI. Click on Settings and then click on `Notifications`. - -![](../.gitbook/assets/notifications_airbyte_settings.png) - -Simply paste the copied webhook URL in `Connection status Webhook URL` and you're ready to go! On this page, you can click one or both of the sliders to decide whether you want notifications on sync successes, failures, or both. Make sure to click `Save changes` before you leave. - -Your Webhook URL should look something like this: - -![](../.gitbook/assets/notifications_airbyte_notification_settings.png) - -**Test it out.** - -From the settings page, you can click `Test` to send a test message to the channel. Or, just run a sync now and try it out! If all goes well, you should receive a notification in your selected channel that looks like this: - -![](../.gitbook/assets/notifications_slack_message.png) - -You're done! diff --git a/docs/operator-guides/reset.md b/docs/operator-guides/reset.md index ff7dc4d06124..3fba28aa45a3 100644 --- a/docs/operator-guides/reset.md +++ b/docs/operator-guides/reset.md @@ -1,20 +1,25 @@ # Resetting Your Data -The reset button gives you a blank slate, of sorts, to perform a fresh new sync. This can be useful if you are just testing Airbyte or don't necessarily require the data replicated to your destination to be saved permanently. +Resetting your data allows you to drop all previously synced data so that any ensuing sync can start syncing fresh. This is useful if you don't require the data replicated to your destination to be saved permanently or are just testing Airbyte. -![](../.gitbook/assets/reset_your_data_1.png) +Airbyte allows you to reset all streams in the connection, some, or only a single stream (when the connector support per-stream operations). -As outlined above, you can click on the `Reset your data` button to give you that clean slate. Just as a heads up, here is what it does and doesn't do: +A sync will automatically start after a completed reset, which commonly backfills all historical data. -The reset button **DOES**: +## Performing a Reset +To perform a reset, select `Reset your data` in the UI on a connection's status or job history tabs. You will also be prompted to reset affected streams if you edit any stream settings to ensure data continues to sync accurately. -* Delete all records in your destination tables -* Delete all records in your destination file +Similarly to a sync job, a reset can be completed as successful, failed, or cancelled. To resolve a failed reset, you should manually drop the tables in the destination so that Airbyte can continue syncing accurately into the destination. -The reset button **DOES NOT**: +## Reset behavior +When a reset is successfully completed, all the records are deleted from your destination tables (and files, if using local JSON or local CSV as the destination). -* Delete the destination tables -* Delete a destination file if using the LocalCSV or LocalJSON Destinations +:::info +If you are using destinations that are on the [Destinations v2](/release_notes/upgrading_to_destinations_v2.md) framework, only raw tables will be cleared of their data. Final tables will retain all records from the last sync. +::: -Because of this, if you have any orphaned tables or files that are no longer being synced to, they will have to be cleaned up later, as Airbyte will not clean them up for you. +A reset **DOES NOT** delete any destination tables when using a data warehouse, data lake, database. The schema is retained but will not contain any rows. +:::tip +If you have any orphaned tables or files that are no longer being synced to, they should be cleaned up separately, as Airbyte will not clean them up for you. This can occur when the `Destination Namespace` or `Stream Prefix` connection configuration is changed for an existing connection. +::: diff --git a/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md b/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md index a204b2a2f49b..1f0175b392d8 100644 --- a/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md +++ b/docs/operator-guides/transformation-and-normalization/transformations-with-airbyte.md @@ -18,7 +18,7 @@ After replication of data from a source connector \(Extract\) to a destination c ## Public Git repository -In the connection settings page, I can add new Transformations steps to apply after [normalization](../../understanding-airbyte/basic-normalization.md). For example, I want to run my custom dbt project jaffle_shop, whenever my sync is done replicating and normalizing my data. +In the connection settings page, I can add new Transformations steps to apply after [normalization](../../using-airbyte/core-concepts/basic-normalization.md). For example, I want to run my custom dbt project jaffle_shop, whenever my sync is done replicating and normalizing my data. You can find the jaffle shop test repository by clicking [here](https://github.com/dbt-labs/jaffle_shop). diff --git a/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md b/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md index 3f6c9357d2c1..4e29e15fe167 100644 --- a/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md +++ b/docs/operator-guides/transformation-and-normalization/transformations-with-sql.md @@ -16,7 +16,7 @@ At its core, Airbyte is geared to handle the EL \(Extract Load\) steps of an ELT However, this is actually producing a table in the destination with a JSON blob column... For the typical analytics use case, you probably want this json blob normalized so that each field is its own column. -So, after EL, comes the T \(transformation\) and the first T step that Airbyte actually applies on top of the extracted data is called "Normalization". You can find more information about it [here](../../understanding-airbyte/basic-normalization.md). +So, after EL, comes the T \(transformation\) and the first T step that Airbyte actually applies on top of the extracted data is called "Normalization". You can find more information about it [here](../../using-airbyte/core-concepts/basic-normalization.md). Airbyte runs this step before handing the final data over to other tools that will manage further transformation down the line. diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 5c5197441b85..4d2dafd2991f 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -1,5 +1,12 @@ # Upgrading Airbyte +:::info + +If you run on [Airbyte Cloud](https://cloud.airbyte.com/signup) you'll always run on the newest +Airbyte version automatically. This documentation only applies to users deploying our self-managed +version. +::: + ## Overview This tutorial will describe how to determine if you need to run this upgrade process, and if you do, how to do so. This process does require temporarily turning off Airbyte. diff --git a/docs/operator-guides/using-custom-connectors.md b/docs/operator-guides/using-custom-connectors.md index 4516f19ff987..04be26cf889e 100644 --- a/docs/operator-guides/using-custom-connectors.md +++ b/docs/operator-guides/using-custom-connectors.md @@ -1,15 +1,17 @@ # Using custom connectors -If our connector catalog does not fulfill your needs, you can build your own Airbyte connectors. -There are two approaches you can take while jumping on connector development project: -1. You want to build a connector for an **external** source or destination (public API, off-the-shelf DBMS, data warehouses, etc.). In this scenario, your connector development will probably benefit the community. The right way is to open a PR on our repo to add your connector to our catalog. You will then benefit from an Airbyte team review and potential future improvements and maintenance from the community. -2. You want to build a connector for an **internal** source or destination (private API) specific to your organization. This connector has no good reason to be exposed to the community. - -This guide focuses on the second approach and assumes the following: -* You followed our other guides and tutorials about connector developments. -* You finished your connector development, running it locally on an Airbyte development instance. + +:::info +This guide walks through the setup of a Docker-based custom connector. To understand how to use our low-code connector builder, read our guide [here](/connector-development/connector-builder-ui/overview.md). +::: + +If our connector catalog does not fulfill your needs, you can build your own Airbyte connectors! You can either use our [low-code connector builder](/connector-development/connector-builder-ui/overview.md) or upload a Docker-based custom connector. + +This page walks through the process to upload a **Docker-based custom connector**. This is an ideal route for connectors that have an **internal** use case like a private API with a specific fit for your organization. This guide for using Docker-based custom connectors assumes the following: +* You followed our other guides and tutorials about [connector development](/connector-development/connector-builder-ui/overview.md) +* You finished your connector development and have it running locally on an Airbyte development instance. * You want to deploy this connector to a production Airbyte instance running on a VM with docker-compose or on a Kubernetes cluster. -If you prefer video tutorials, [we recorded a demo about uploading connectors images to a GCP Artifact Registry](https://www.youtube.com/watch?v=4YF20PODv30&ab_channel=Airbyte). +If you prefer video tutorials, we recorded a demo on how to upload [connectors images to a GCP Artifact Registry](https://www.youtube.com/watch?v=4YF20PODv30&ab_channel=Airbyte). ## 1. Create a private Docker registry Airbyte needs to pull its Docker images from a remote Docker registry to consume a connector. @@ -70,42 +72,21 @@ If you want Airbyte to pull images from another private Docker registry, you wil You should run all the above commands from your local/CI environment, where your connector source code is available. -## 4. Use your custom connector in Airbyte +## 4. Use your custom Docker connector in Airbyte At this step, you should have: * A private Docker registry hosting your custom connector image. * Authenticated your Airbyte instance to your private Docker registry. You can pull your connector image from your private registry to validate the previous steps. On your Airbyte instance: run `docker pull :` if you are using our `docker-compose` deployment, or start a pod that is using the connector image. -### 1. Click on Settings -![Step 1 screenshot](https://images.tango.us/public/screenshot_bf5c3e27-19a3-4cc0-bc40-90c80afdbcba?crop=focalpoint&fit=crop&fp-x=0.0211&fp-y=0.9320&fp-z=2.9521&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 2. Click on Sources (or Destinations) -![Step 2 screenshot](https://images.tango.us/public/screenshot_d956e987-424d-4f76-ad39-f6d6172f6acc?crop=focalpoint&fit=crop&fp-x=0.0855&fp-y=0.1083&fp-z=2.7473&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 3. Click on + New connector -![Step 3 screenshot](https://images.tango.us/public/screenshot_52248202-6351-496d-bc8f-892c43cf7cf8?crop=focalpoint&fit=crop&fp-x=0.8912&fp-y=0.0833&fp-z=3.0763&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 4. Fill the name of your custom connector -![Step 4 screenshot](https://images.tango.us/public/screenshot_809a22c8-ff38-4b10-8292-bce7364f111c?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.4145&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 5. Fill the Docker image name of your custom connector -![Step 5 screenshot](https://images.tango.us/public/screenshot_ed91d789-9fc7-4758-a6f0-50bf2f04f248?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.4924&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) - - -### 6. Fill the Docker Tag of your custom connector image -![Step 6 screenshot](https://images.tango.us/public/screenshot_5b6bff70-5703-4dac-b359-95b9ab8f8ce1?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.5703&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) +1. Click on `Settings` in the left-hand sidebar. Navigate to `Sources` or `Destinations` depending on your connector. Click on `Add a new Docker connector`. +2. Name your custom connector in `Connector display name`. This is just the display name used for your workspace. -### 7. Fill the URL to your connector documentation -This is a required field at the moment, but you can fill with any value if you do not have online documentation for your connector. -This documentation will be linked in the connector setting page. -![Step 7 screenshot](https://images.tango.us/public/screenshot_007e6465-619f-4553-8d65-9af2f5ad76bc?crop=focalpoint&fit=crop&fp-x=0.4989&fp-y=0.6482&fp-z=1.9188&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) +3. Fill in the Docker `Docker full image name` and `Docker image tag`. +4. (Optional) Add a link to connector's documentation in `Connector documentation URL` +You can optionally fill this with any value if you do not have online documentation for your connector. +This documentation will be linked in your connector setting's page. -### 8. Click on Add -![Step 8 screenshot](https://images.tango.us/public/screenshot_c097183f-1687-469f-852d-f66f743e8c10?crop=focalpoint&fit=crop&fp-x=0.5968&fp-y=0.7010&fp-z=3.0725&w=1200&mark-w=0.2&mark-pad=0&mark64=aHR0cHM6Ly9pbWFnZXMudGFuZ28udXMvc3RhdGljL21hZGUtd2l0aC10YW5nby13YXRlcm1hcmsucG5n&ar=4594%3A2234) +5. `Add` the connector to save the configuration. You can now select your new connector when setting up a new connection! \ No newline at end of file diff --git a/docs/project-overview/README.md b/docs/project-overview/README.md deleted file mode 100644 index a427d02b0519..000000000000 --- a/docs/project-overview/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# Project Overview - diff --git a/docs/project-overview/code-of-conduct.md b/docs/project-overview/code-of-conduct.md deleted file mode 100644 index 9eacce28a212..000000000000 --- a/docs/project-overview/code-of-conduct.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -description: Our Community Code of Conduct ---- - -# Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others’ private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies within all project spaces, and it also applies when an individual is representing the project or its community in public spaces. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at [conduct@airbyte.io](mailto:conduct@airbyte.io). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct.html](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html) - diff --git a/docs/project-overview/product-support-levels.md b/docs/project-overview/product-support-levels.md deleted file mode 100644 index 47e533d90f40..000000000000 --- a/docs/project-overview/product-support-levels.md +++ /dev/null @@ -1,39 +0,0 @@ -# Connector Support Levels - -The following table describes the support levels of Airbyte connectors. - -| | Certified | Custom | Community | -| --------------------------------- | -------------------------- | -------------------------- | ---------------------- | -| **Availability** | Available to all users | Available to all users | Available to all users | -| **Support: Cloud** | Supported* | Supported** | No Support | -| **Support: Powered by Airbyte** | Supported* | Supported** | No Support | -| **Support: Self-Managed Enterprise** | Supported* | Supported** | No Support | -| **Support: Community (OSS)** | Slack Support only | Slack Support only | No Support | -| **Who builds them?** | Either the community or the Airbyte team. | Anyone can build custom connectors. We recommend using our [Connector Builder](https://docs.airbyte.com/connector-development/connector-builder-ui/overview) or [Low-code CDK](https://docs.airbyte.com/connector-development/config-based/low-code-cdk-overview). | Typically they are built by the community. The Airbyte team may upgrade them to Certified at any time. | -| **Who maintains them?** | The Airbyte team | Users | Users | -| **Production Readiness** | Guaranteed by Airbyte | Not guaranteed | Not guaranteed | - -\*For Certified connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. Otherwise, please use our support portal and we will address your issues as soon as possible. - -\*\*For Custom connectors, Official Support SLAs are only available to customers with Premium Support included in their contract. This support is provided with best efforts, and maintenance/upgrades are owned by the customer. - -## Certified - -A **Certified** connector is actively maintained and supported by the Airbyte team and maintains a high quality bar. It is production ready. - -### What you should know about Certified connectors: - -- Certified connectors are available to all users. -- These connectors have been tested and vetted in order to be certified and are production ready. -- Certified connectors should go through minimal breaking change but in the event an upgrade is needed users will be given an adequate upgrade window. - -## Community - -A **Community** connector is maintained by the Airbyte community until it becomes Certified. Airbyte has over 800 code contributors and 15,000 people in the Slack community to help. The Airbyte team is continually certifying Community connectors as usage grows. As these connectors are not maintained by Airbyte, we do not offer support SLAs around them, and we encourage caution when using them in production. - -### What you should know about Community connectors: - -- Community connectors are available to all users. -- Community connectors may be upgraded to Certified at any time, and we will notify users of these upgrades via our Slack Community and in our Connector Catalog. -- Community connectors might not be feature-complete (features planned for release are under development or not prioritized) and may include backward-incompatible/breaking API changes with no or short notice. -- Community connectors have no Support SLAs. diff --git a/docs/project-overview/slack-code-of-conduct.md b/docs/project-overview/slack-code-of-conduct.md deleted file mode 100644 index c88da4c1adb5..000000000000 --- a/docs/project-overview/slack-code-of-conduct.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -description: Be nice to one another. ---- - -# Slack Code of Conduct - -Airbyte's Slack community is growing incredibly fast. We're home to over 1500 data professionals and are growing at an awesome pace. We are proud of our community, and have provided these guidelines to support new members in maintaining the wholesome spirit we have developed here. We appreciate your continued commitment to making this a community we are all excited to be a part of. - -## Rule 1: Be respectful. - -Our desire is for everyone to have a positive, fulfilling experience in Airbyte Slack, and we sincerely appreciate your help in making this happen. -All of the guidelines we provide below are important, but there’s a reason respect is the first rule. We take it seriously, and while the occasional breach of etiquette around Slack is forgivable, we cannot condone disrespectful behavior. - -## Rule 2: Use the most relevant channels. - -We deliberately use topic-specific Slack channels so members of the community can opt-in on various types of conversations. Our members take care to post their messages in the most relevant channel, and you’ll often see reminders about the best place to post a message (respectfully written, of course!). If you're looking for help directly from the Community Assistance Team or other Airbyte employees, please stick to posting in the airbyte-help channel, so we know you're asking us specifically! - -## Rule 3: Don’t double-post. - -Please be considerate of our community members’ time. We know your question is important, but please keep in mind that Airbyte Slack is not a customer service platform but a community of volunteers who will help you as they are able around their own work schedule. You have access to all the history, so it’s easy to check if your question has already been asked. - -## Rule 4: Check question for clarity and thoughtfulness. - -Airbyte Slack is a community of volunteers. Our members enjoy helping others; they are knowledgeable, gracious, and willing to give their time and expertise for free. Putting some effort into a well-researched and thoughtful post shows consideration for their time and will gain more responses. - -## Rule 5: Keep it public. - -This is a public forum; please do not contact individual members of this community without their express permission, regardless of whether you are trying to recruit someone, sell a product, or solicit help. - -## Rule 6: No soliciting! - -The purpose of the Airbyte Slack community is to provide a forum for data practitioners to discuss their work and share their ideas and learnings. It is not intended as a place to generate leads for vendors or recruiters, and may not be used as such. - -If you’re a vendor, you may advertise your product in #shameless-plugs. Advertising your product anywhere else is strictly against the rules. - -## Rule 7: Don't spam tags, or use @here or @channel. - -Using the @here and @channel keywords in a post will not help, as they are disabled in Slack for everyone excluding admins. Nonetheless, if you use them we will remind you with a link to this rule, to help you better understand the way Airbyte Slack operates. - -Do not tag specific individuals for help on your questions. If someone chooses to respond to your question, they will do so. You will find that our community of volunteers is generally very responsive and amazingly helpful! - -## Rule 8: Use threads for discussion. - -The simplest way to keep conversations on track in Slack is to use threads. The Airbyte Slack community relies heavily on threads, and if you break from this convention, rest assured one of our community members will respectfully inform you quickly! - -_If you see a message or receive a direct message that violates any of these rules, please contact an Airbyte team member and we will take the appropriate moderation action immediately. We have zero tolerance for intentional rule-breaking and hate speech._ - diff --git a/docs/quickstart/deploy-airbyte.md b/docs/quickstart/deploy-airbyte.md deleted file mode 100644 index 4df34e9aa05a..000000000000 --- a/docs/quickstart/deploy-airbyte.md +++ /dev/null @@ -1,28 +0,0 @@ -# Deploy Airbyte - -Deploying Airbyte Open-Source just takes two steps. - -1. Install Docker on your workstation \(see [instructions](https://www.docker.com/products/docker-desktop)\). Make sure you're on the latest version of `docker-compose`. -2. Run the following commands in your terminal: - -```bash -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -./run-ab-platform.sh -``` - -Once you see an Airbyte banner, the UI is ready to go at [http://localhost:8000](http://localhost:8000)! You will be asked for a username and password. By default, that's username `airbyte` and password `password`. Once you deploy airbyte to your servers, **be sure to change these** in your `.env` file. - -Alternatively, if you have an Airbyte Cloud invite, just follow [these steps.](../deploying-airbyte/on-cloud.md) - -If you need direct access to our team for any kind of assistance, don't hesitate to [talk to our team](https://airbyte.com/talk-to-sales-premium-support) to discuss about our premium support offers. - -## FAQ - -If you have any questions about the Airbyte Open-Source setup and deployment process, head over to our [Getting Started FAQ](https://github.com/airbytehq/airbyte/discussions/categories/questions) on our Airbyte Forum that answers the following questions and more: - -- How long does it take to set up Airbyte? -- Where can I see my data once I've run a sync? -- Can I set a start time for my sync? - -If there are any questions that we couldn't answer here, we'd love to help you get started. [Join our Slack](https://airbytehq.slack.com/ssb/redirect) and feel free to ask your questions in the \#getting-started channel. diff --git a/docs/quickstart/getting-started.md b/docs/quickstart/getting-started.md deleted file mode 100644 index afb0e3408522..000000000000 --- a/docs/quickstart/getting-started.md +++ /dev/null @@ -1,105 +0,0 @@ -# Getting Started - -## Goal - -During this getting started tutorial, we are going to replicate currencies closing price into a JSON file. - -## Start Airbyte - -First of all, make sure you have Docker and Docker Compose installed. Then run the following commands: - -```text -git clone https://github.com/airbytehq/airbyte.git -cd airbyte -./run-ab-platform.sh -``` - -Once you see an Airbyte banner, the UI is ready to go at [http://localhost:8000/](http://localhost:8000/). - -## Set up your preferences - -You should see an onboarding page. Enter your email if you want updates about Airbyte and continue. - -![](../.gitbook/assets/airbyte_get-started.png) - -## Set up your first connection - -### Create a source - -The source we are creating will pull data from an external API. It will replicate the closing price of currencies compared to USD since the specified start date. - -To set it up, just follow the instructions on the screenshot below. - -:::info - -You might have to wait ~30 seconds before the fields show up because it is the first time you're using Airbyte. - -::: - -![](../.gitbook/assets/demo_source.png) - -### Create a destination - -The destination we are creating is a simple JSON line file, meaning that it will contain one JSON object per line. Each objects will represent data extracted from the source. - -The resulting files will be located in `/tmp/airbyte_local/json_data` - -:::caution - -Please make sure that Docker Desktop has access to `/tmp` (and `/private` on a MacOS, as /tmp has a symlink that points to /private. It will not work otherwise). You allow it with "File sharing" in `Settings -> Resources -> File sharing -> add the one or two above folder` and hit the "Apply & restart" button. - -::: - -To set it up, just follow the instructions on the screenshot below. - -:::info - -You might have to wait ~30 seconds before the fields show up because it is the first time you're using Airbyte. - -::: - -![](../.gitbook/assets/demo_destination.png) - -### Create connection - -When we create the connection, we can select which data stream we want to replicate. We can also select if we want an incremental replication. The replication will run at the specified sync frequency. - -To set it up, just follow the instructions on the screenshot below. - -![](../.gitbook/assets/demo_connection.png) - -## Check the logs of your first sync - -After you've completed the onboarding, you will be redirected to the source list and will see the source you just added. Click on it to find more information about it. You will now see all the destinations connected to that source. Click on it and you will see the sync history. - -From there, you can look at the logs, download them, force a sync and adjust the configuration of your connection. - -![](../.gitbook/assets/demo_history.png) - -## Check the data of your first sync - -Now let's verify that this worked: - -```bash -cat /tmp/airbyte_local/json_data/_airbyte_raw_exchange_rate.jsonl -``` - -You should see one line for each day that was replicated. - -If you have [`jq`](https://stedolan.github.io/jq/) installed, let's look at the evolution of `EUR`. - -```bash -cat /tmp/airbyte_local/test_json/_airbyte_raw_exchange_rate.jsonl | -jq -c '.data | {date: .date, EUR: .EUR }' -``` - -And there you have it. You've pulled data from an API directly into a file and all of the actual configuration for this replication only took place in the UI. - -## That's it! - -This is just the beginning of using Airbyte. We support a large collection of sources and destinations. You can even contribute your own. - -If you have any questions at all, please reach out to us on [Slack](https://slack.airbyte.io/). We’re still in alpha, so if you see any rough edges or want to request a connector you need, please create an issue on our [Github](https://github.com/airbytehq/airbyte) or leave a thumbs up on an existing issue. - -Thank you and we hope you enjoy using Airbyte. - diff --git a/docs/readme.md b/docs/readme.md index cbf550c2a7a6..708a6a790430 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -1,21 +1,25 @@ +--- +displayed_sidebar: docs +--- + # Welcome to Airbyte Docs Whether you are an Airbyte user or contributor, we have docs for you! ## For Airbyte Cloud users -Browse the [connector catalog](https://docs.airbyte.com/integrations/) to find the connector you want. In case the connector is not yet supported on Airbyte Cloud, consider using [Airbyte Open Source](#for-airbyte-open-source-users). +Browse the [connector catalog](/integrations/) to find the connector you want. In case the connector is not yet supported on Airbyte Cloud, consider using [Airbyte Open Source](#for-airbyte-open-source-users). -Next, check out the [step-by-step tutorial](https://docs.airbyte.com/cloud/getting-started-with-airbyte-cloud) to sign up for Airbyte Cloud, understand Airbyte [concepts](https://docs.airbyte.com/cloud/core-concepts), and run your first sync. Then learn how to [use your Airbyte Cloud account](https://docs.airbyte.com/category/using-airbyte-cloud). +Next, check out the [step-by-step tutorial](/using-airbyte/getting-started) to sign up for Airbyte Cloud, understand Airbyte [concepts](/using-airbyte/core-concepts), and run your first sync. ## For Airbyte Open Source users -Browse the [connector catalog](https://docs.airbyte.com/integrations/) to find the connector you want. If the connector is not yet supported on Airbyte Open Source, [build your own connector](https://docs.airbyte.com/connector-development/). +Browse the [connector catalog](/integrations/) to find the connector you want. If the connector is not yet supported on Airbyte Open Source, [build your own connector](/connector-development/). -Next, check out the [Airbyte Open Source QuickStart](https://docs.airbyte.com/quickstart/deploy-airbyte). Then learn how to [deploy](https://docs.airbyte.com/deploying-airbyte/local-deployment) and [manage](https://docs.airbyte.com/operator-guides/upgrading-airbyte) Airbyte Open Source in your cloud infrastructure. +Next, check out the [Airbyte Open Source QuickStart](/quickstart/deploy-airbyte). Then learn how to [deploy](/deploying-airbyte/local-deployment) and [manage](/operator-guides/upgrading-airbyte) Airbyte Open Source in your cloud infrastructure. ## For Airbyte contributors -To contribute to Airbyte code, connectors, and documentation, refer to our [Contributing Guide](https://docs.airbyte.com/contributing-to-airbyte/). +To contribute to Airbyte code, connectors, and documentation, refer to our [Contributing Guide](/contributing-to-airbyte/). [![GitHub stars](https://img.shields.io/github/stars/airbytehq/airbyte?style=social&label=Star&maxAge=2592000)](https://GitHub.com/airbytehq/airbyte/stargazers/) [![License](https://img.shields.io/static/v1?label=license&message=MIT&color=brightgreen)](https://github.com/airbytehq/airbyte/tree/a9b1c6c0420550ad5069aca66c295223e0d05e27/LICENSE/README.md) [![License](https://img.shields.io/static/v1?label=license&message=ELv2&color=brightgreen)](https://github.com/airbytehq/airbyte/tree/a9b1c6c0420550ad5069aca66c295223e0d05e27/LICENSE/README.md) diff --git a/docs/release_notes/july_2022.md b/docs/release_notes/july_2022.md index 0c6cbc35e004..c3a4c8240b2b 100644 --- a/docs/release_notes/july_2022.md +++ b/docs/release_notes/july_2022.md @@ -19,7 +19,7 @@ This page includes new features and improvements to the Airbyte Cloud and Airbyt * Airbyte is currently developing a low-code connector builder, which allows you to easily create new source and destination connectors in your workspace. [#14402](https://github.com/airbytehq/airbyte/pull/14402) [#14317](https://github.com/airbytehq/airbyte/pull/14317) [#14288](https://github.com/airbytehq/airbyte/pull/14288) [#14004](https://github.com/airbytehq/airbyte/pull/14004) -* Added [documentation](https://docs.airbyte.com/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace#single-workspace-vs-multiple-workspaces) about the benefits and considerations of having a single workspace vs. multiple workspaces in Airbyte Cloud. [#14608](https://github.com/airbytehq/airbyte/pull/14608) +* Added [documentation](/using-airbyte/workspaces.md#single-workspace-vs-multiple-workspaces) about the benefits and considerations of having a single workspace vs. multiple workspaces in Airbyte Cloud. [#14608](https://github.com/airbytehq/airbyte/pull/14608) ### Improvements * Improved platform security by using Docker images from the latest version of OpenJDK (openjdk:19-slim-bullseye). [#14971](https://github.com/airbytehq/airbyte/pull/14971) diff --git a/docs/release_notes/upgrading_to_destinations_v2.md b/docs/release_notes/upgrading_to_destinations_v2.md index 0d5f70c6bed4..e48eea50f611 100644 --- a/docs/release_notes/upgrading_to_destinations_v2.md +++ b/docs/release_notes/upgrading_to_destinations_v2.md @@ -13,7 +13,7 @@ Airbyte Destinations V2 provides you with: - Internal Airbyte tables in the `airbyte_internal` schema: Airbyte will now generate all raw tables in the `airbyte_internal` schema. We no longer clutter your destination schema with raw data tables. - Incremental delivery for large syncs: Data will be incrementally delivered to your final tables. No more waiting hours to see the first rows in your destination table. -To see more details and examples on the contents of the Destinations V2 release, see this [guide](understanding-airbyte/typing-deduping.md). The remainder of this page will walk you through upgrading connectors from legacy normalization to Destinations V2. +To see more details and examples on the contents of the Destinations V2 release, see this [guide](../using-airbyte/core-concepts/typing-deduping.md). The remainder of this page will walk you through upgrading connectors from legacy normalization to Destinations V2. Destinations V2 were in preview for Snowflake and BigQuery during August 2023, and launched on August 29th, 2023. Other destinations will be transitioned to Destinations V2 on or before November 1st, 2023. diff --git a/docs/snowflake-native-apps/facebook-marketing.md b/docs/snowflake-native-apps/facebook-marketing.md index a24a38b37bc1..1b4a458e2e20 100644 --- a/docs/snowflake-native-apps/facebook-marketing.md +++ b/docs/snowflake-native-apps/facebook-marketing.md @@ -3,7 +3,7 @@ The Facebook Marketing Connector by Airbyte is a Snowflake Native Application that allows you to extract data from your Facebook Marketing account and load records into a Snowflake database of your choice. :::info -The Snowflake Native Apps platform is new and rapidly evolving. The Facebook Marketing Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](../understanding-airbyte/connections/full-refresh-overwrite.md) without deduplication is supported. +The Snowflake Native Apps platform is new and rapidly evolving. The Facebook Marketing Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md) without deduplication is supported. ::: # Getting started diff --git a/docs/snowflake-native-apps/linkedin-ads.md b/docs/snowflake-native-apps/linkedin-ads.md index af43f7157cc5..bd34a7ffa565 100644 --- a/docs/snowflake-native-apps/linkedin-ads.md +++ b/docs/snowflake-native-apps/linkedin-ads.md @@ -3,7 +3,7 @@ The LinkedIn Ads Connector by Airbyte is a Snowflake Native Application that allows you to extract data from your LinkedIn Ads account and load records into a Snowflake database of your choice. :::info -The Snowflake Native Apps platform is new and rapidly evolving. The LinkedIn Ads Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](../understanding-airbyte/connections/full-refresh-overwrite.md) without deduplication is supported. +The Snowflake Native Apps platform is new and rapidly evolving. The LinkedIn Ads Connector by Airbyte is in _public preview_ and is subject to further development that may affect setup and configuration of the application. Please note that, at this time, only a [full table refresh](/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md) without deduplication is supported. ::: # Getting started diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index b9a5d7d12472..000000000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,59 +0,0 @@ -# Troubleshooting & FAQ - -Welcome to the Airbyte troubleshooting guide! Like any platform, you may experience issues when using Airbyte. This guide is designed to help you diagnose and resolve any problems you may encounter while using Airbyte. By following the troubleshooting steps outlined in this guide, you can quickly and effectively identify the root cause of the issue and take steps to resolve it. We recommend checking this guide whenever you encounter an issue with Airbyte to help ensure a smooth and uninterrupted experience with our platform. Let's dive in! - -Step 1: Check the logs. The logs provide detailed information about what's happening behind the scenes, and they can help pinpoint the root cause of the problem. - -Step 2: Check the documentation. Our documentation covers a wide range of topics, including common issues and their solutions, troubleshooting tips, and best practices. - -Step 3: Reach out to the community. Our community forum is a great place to ask for help, share your experiences, and learn from others who have faced similar issues. - -Step 4: Open a Github ticket. If you're still unable to resolve the issue after reaching out to the community, it's time to open a support ticket. Our support team is here to help you with any issues you're facing with Airbyte. - -Airbyte is an open source project with a vibrant community that fosters collaboration and mutual support. To ensure accessible troubleshooting guidance, Airbyte offers multiple platforms for users to ask and discuss issues, including the Airbyte Github, Airbyte Community Slack (which is over 10,000 users), and the Airbyte Forum. In addition, Airbyte hosts daily office hours that include topic demonstrations and dedicated space for issue discussion in Zoom meetings. In addition to these community resources, Airbyte also offers premium support packages for users who require additional assistance beyond what is provided by the community. - -## OSS Premium Support -Open source [premium support packages](https://airbyte.com/talk-to-sales-premium-support) are a great option for who use Airbyte OSS and need additional assistance beyond what is provided by the community. These packages typically include access to a dedicated support team that can provide assistance with installation, configuration, troubleshooting, and other technical issues. Premium support packages also often include faster response times, guaranteed issue resolution, and access to updates and patches. By opting for a premium support package, users can enjoy the benefits of open source software while also receiving the peace of mind they need to keep their systems running smoothly. - -Premier Support comes with: - -* 1-business-day SLA for your Severity 0 and 1 -* 2-business-day SLA for your Severity 2 and 3 -* 1-week Pull Request review SLA for first comment -If you need better SLA times, we can definitely discuss this, don't hesitate to [talk to our team](https://airbyte.com/talk-to-sales) about it. You can also see more details about it in our pricing page. - -## Office Hour -Airbyte provides a [Daily Office Hour](https://airbyte.com/daily-office-hour) to discuss issues. -It is a 45 minute meeting, the first 20 minutes are reserved to a weekly topic presentation about Airbyte concepts and the others 25 minutes are for general questions. The schedule is: -* Monday, Wednesday and Fridays: 1 PM PST/PDT -* Tuesday and Thursday: 4 PM CEST - - -## Github Issues -Whenever you face an issue using a connector or with the platform you're welcome to report opening a Github issue. -https://github.com/airbytehq/airbyte - - -## Airbyte Slack -You can access Airbyte Slack [here](https://slack.airbyte.com/). - -**Before posting on a channel this please first check if a similar question was already answered.** - -**The existing categories**: -* `#help-connections-issues`: for any questions or issues on your connections -* `#help-infrastructure-deployment`: for any questions or issues on your deployment and infrastructure -* `#help-connector-development`: for any questions about on the CDKs and issues while building a custom connector -* `#help-api-cli-orchestration`: for any questions or issues about the API, CLI, any scheduling effort. -* `#help-contributions`: for any questions about contributing to Airbyte’s codebase - -## Airbyte Forum -We are driving our community support from our [forum](https://github.com/airbytehq/airbyte/discussions). - -**Before posting on this forum please first check if a similar question was already answered.** - -**The existing categories**: -* 🙏 Questions: Ask the community for help on your question. As a reminder, the Airbyte team won’t provide help here, as our support is part of our Airbyte Cloud and Airbyte Enterprise offers. -* 💡 Ideas: Share ideas for new features, improvements, or feedback. -* 🙌 Show & Tell: Share projects, tutorials, videos, and articles you are working on. -* 🫶 Kind words: Show off something you love about Airbyte -* 🐙 General: For anything that doesn’t fit in the above categories diff --git a/docs/understanding-airbyte/airbyte-protocol.md b/docs/understanding-airbyte/airbyte-protocol.md index 66c0bc4f10ed..e436b24eada6 100644 --- a/docs/understanding-airbyte/airbyte-protocol.md +++ b/docs/understanding-airbyte/airbyte-protocol.md @@ -333,7 +333,7 @@ Technical systems often group their underlying data into namespaces with each na An example of a namespace is the RDBMS's `schema` concept. An API namespace might be used for multiple accounts (e.g. `company_a` vs `company_b`, each having a "users" and "purchases" stream). Some common use cases for schemas are enforcing permissions, segregating test and production data and general data organization. -The `AirbyteStream` represents this concept through an optional field called `namespace`. Additional documentation on Namespaces can be found [here](namespaces.md). +The `AirbyteStream` represents this concept through an optional field called `namespace`. Additional documentation on Namespaces can be found [here](/using-airbyte/core-concepts/namespaces.md). ### Cursor diff --git a/docs/understanding-airbyte/beginners-guide-to-catalog.md b/docs/understanding-airbyte/beginners-guide-to-catalog.md index ff5451e15c5d..1953b1681c82 100644 --- a/docs/understanding-airbyte/beginners-guide-to-catalog.md +++ b/docs/understanding-airbyte/beginners-guide-to-catalog.md @@ -16,7 +16,7 @@ This article will illustrate how to use `AirbyteCatalog` via a series of example * [Dynamic Streams Example](#dynamic-streams-example) * [Nested Schema Example](#nested-schema-example) -In order to understand in depth how to configure incremental data replication, head over to the [incremental replication docs](connections/incremental-append.md). +In order to understand in depth how to configure incremental data replication, head over to the [incremental replication docs](/using-airbyte/core-concepts/sync-modes/incremental-append.md). ## Database Example @@ -92,7 +92,7 @@ The catalog is structured as a list of `AirbyteStream`. In the case of a databas Let's walk through what each field in a stream means. * `name` - The name of the stream. -* `supported_sync_modes` - This field lists the type of data replication that this source supports. The possible values in this array include `FULL_REFRESH` \([docs](connections/full-refresh-overwrite.md)\) and `INCREMENTAL` \([docs](connections/incremental-append.md)\). +* `supported_sync_modes` - This field lists the type of data replication that this source supports. The possible values in this array include `FULL_REFRESH` \([docs](/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md)\) and `INCREMENTAL` \([docs](/using-airbyte/core-concepts/sync-modes/incremental-append.md)\). * `source_defined_cursor` - If the stream supports `INCREMENTAL` replication, then this field signals whether the source can figure out how to detect new records on its own or not. * `json_schema` - This field is a [JsonSchema](https://json-schema.org/understanding-json-schema) object that describes the structure of the data. Notice that each key in the `properties` object corresponds to a column name in our database table. @@ -137,7 +137,7 @@ Let's walk through each field in the `ConfiguredAirbyteStream`: * `sync_mode` - This field must be one of the values that was in `supported_sync_modes` in the `AirbyteStream` - Configures which sync mode will be used when data is replicated. * `stream` - Hopefully this one looks familiar! This field contains an `AirbyteStream`. It should be _identical_ to the one we saw in the `AirbyteCatalog`. -* `cursor_field` - When `sync_mode` is `INCREMENTAL` and `source_defined_cursor = false`, this field configures which field in the stream will be used to determine if a record should be replicated or not. Read more about this concept in our [documentation of incremental replication](connections/incremental-append.md). +* `cursor_field` - When `sync_mode` is `INCREMENTAL` and `source_defined_cursor = false`, this field configures which field in the stream will be used to determine if a record should be replicated or not. Read more about this concept in our [documentation of incremental replication](/using-airbyte/core-concepts/sync-modes/incremental-append.md). ### Summary of the Postgres Example diff --git a/docs/understanding-airbyte/connections/README.md b/docs/understanding-airbyte/connections/README.md deleted file mode 100644 index 5e6c449152b7..000000000000 --- a/docs/understanding-airbyte/connections/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# Connections and Sync Modes - -A connection is a configuration for syncing data between a source and a destination. To setup a connection, a user must configure things such as: - -- Sync schedule: when to trigger a sync of the data. -- Destination [Namespace](../namespaces.md) and stream names: where the data will end up being written. -- A catalog selection: which [streams and fields](../airbyte-protocol.md#catalog) to replicate from the source -- Sync mode: how streams should be replicated \(read and write\): -- Optional transformations: how to convert Airbyte protocol messages \(raw JSON blob\) data into some other data representations. - -## Sync schedules - -Sync schedules are explained below. For information about catalog selections, see [AirbyteCatalog & ConfiguredAirbyteCatalog](../airbyte-protocol.md#catalog). - -Syncs will be triggered by either: - -- A manual request \(i.e: clicking the "Sync Now" button in the UI\) -- A schedule - -When a scheduled connection is first created, a sync is executed as soon as possible. After that, a sync is run once the time since the last sync \(whether it was triggered manually or due to a schedule\) has exceeded the schedule interval. For example, consider the following illustrative scenario: - -- **October 1st, 2pm**, a user sets up a connection to sync data every 24 hours. -- **October 1st, 2:01pm**: sync job runs -- **October 2nd, 2:01pm:** 24 hours have passed since the last sync, so a sync is triggered. -- **October 2nd, 5pm**: The user manually triggers a sync from the UI -- **October 3rd, 2:01pm:** since the last sync was less than 24 hours ago, no sync is run -- **October 3rd, 5:01pm:** It has been more than 24 hours since the last sync, so a sync is run - -## Destination namespace - -The location of where a connection replication will store data is referenced as the destination namespace. The destination connectors should create and write records \(for both raw and normalized tables\) in the specified namespace which should be configurable in the UI via the Namespace Configuration field \(or NamespaceDefinition in the API\). You can read more about configuring namespaces [here](../namespaces.md). - -## Destination stream name - -### Prefix stream name - -Stream names refer to table names in a typical RDBMS. But it can also be the name of an API endpoint, etc. Similarly to the namespace, stream names can be configured to diverge from their names in the source with a "prefix" field. The prefix is prepended to the source stream name in the destination. - -## Stream-specific customization - -All the customization of namespace and stream names described above will be equally applied to all streams selected for replication in a catalog per connection. If you need more granular customization, stream by stream, for example, or with different logic rules, then you could follow the tutorial on [customizing transformations with dbt](../../operator-guides/transformation-and-normalization/transformations-with-dbt.md). - -## Sync modes - -A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. To minimize confusion, a mode's behavior is reflected in its name. The easiest way to understand Airbyte's sync modes is to understand how the modes are named. - -1. The first part of the name denotes how the source connector reads data from the source: - 1. Incremental: Read records added to the source since the last sync job. \(The first sync using Incremental is equivalent to a Full Refresh\) - - Method 1: Using a cursor. Generally supported by all connectors whose data source allows extracting records incrementally. - - Method 2: Using change data capture. Only supported by some sources. See [CDC](../cdc.md) for more info. - 2. Full Refresh: Read everything in the source. -2. The second part of the sync mode name denotes how the destination connector writes data. This is not affected by how the source connector produced the data: - 1. Overwrite: Overwrite by first deleting existing data in the destination. - 2. Append: Write by adding data to existing tables in the destination. - 3. Deduped History: Write by first adding data to existing tables in the destination to keep a history of changes. The final table is produced by de-duplicating the intermediate ones using a primary key. - -A sync mode is therefore, a combination of a source and destination mode together. The UI exposes the following options, whenever both source and destination connectors are capable to support it for the corresponding stream: - -- [Full Refresh Overwrite](full-refresh-overwrite.md): Sync the whole stream and replace data in destination by overwriting it. -- [Full Refresh Append](full-refresh-append.md): Sync the whole stream and append data in destination. -- [Incremental Append](incremental-append.md): Sync new records from stream and append data in destination. -- [Incremental Append + Deduped](incremental-append-deduped.md): Sync new records from stream and append data in destination, also provides a de-duplicated view mirroring the state of the stream in the source. - -## Optional operations - -### Typing and Deduping - -As described by the [Airbyte Protocol from the Airbyte Specifications](../airbyte-protocol.md), replication is composed of source connectors that are transmitting data in a JSON format. It is then written as such by the destination connectors. On top of this replication, Airbyte's database and datawarehous destinations can provide converstions from the raw JSON data into type-cast relational columns. Learn more [here](/understanding-airbyte/typing-deduping). - -:::note - -Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. - -::: - -### Custom sync operations - -Further operations can be included in a sync on top of Airbyte basic normalization \(or even to replace it completely\). See [operations](../operations.md) for more details. diff --git a/docs/understanding-airbyte/namespaces.md b/docs/understanding-airbyte/namespaces.md deleted file mode 100644 index d5deac5d12fc..000000000000 --- a/docs/understanding-airbyte/namespaces.md +++ /dev/null @@ -1,122 +0,0 @@ -# Namespaces - -## High-Level Overview - -:::info - -The high-level overview contains all the information you need to use Namespaces when pulling from APIs. Information past that can be read for advanced or educational purposes. - -::: - -When looking through our connector docs, you'll notice that some sources and destinations support "Namespaces." These allow you to organize and separate your data into groups in the destination if the destination supports it. In most cases, namespaces are schemas in the database you're replicating to. If your desired destination doesn't support it, you can ignore this feature. - -Note that this is the location that both your normalized and raw data will get written to. Your raw data will show up with the prefix `_airbyte_raw_` in the namespace you define. If you don't enable basic normalization, you will only receive the raw tables. - -If only your destination supports namespaces, you have two simple options. **This is the most likely case**, as all HTTP APIs currently don't support Namespaces. - -1. Mirror Destination Settings - Replicate to the default namespace in the destination, which will differ based on your destination. -2. Custom Format - Create a "Custom Format" to rename the namespace that your data will be replicated into. - -If both your desired source and destination support namespaces, you're likely using a more advanced use case with a database as a source, so continue reading. - -## What is a Namespace? - -Technical systems often group their underlying data into namespaces with each namespace's data isolated from another namespace. This isolation allows for better organisation and flexibility, leading to better usability. - -An example of a namespace is the RDMS's `schema` concept. Some common use cases for schemas are enforcing permissions, segregating test and production data and general data organisation. - -## Syncing - -The Airbyte Protocol supports namespaces and allows Sources to define namespaces, and Destinations to write to various namespaces. - -If the Source does not support namespaces, the data will be replicated into the Destination's default namespace. For databases, the default namespace is the schema provided in the destination configuration. - -If the Destination does not support namespaces, the [namespace field](https://github.com/airbytehq/airbyte/blob/master/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml#L64) is ignored. - -## Destination namespace configuration - -As part of the [connections sync settings](connections/), it is possible to configure the namespace used by: 1. destination connectors: to store the `_airbyte_raw_*` tables. 2. basic normalization: to store the final normalized tables. - -Note that custom transformation outputs are not affected by the namespace settings from Airbyte: It is up to the configuration of the custom dbt project, and how it is written to handle its [custom schemas](https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas). The default target schema for dbt in this case, will always be the destination namespace. - -Available options for namespace configurations are: - -### - Mirror source structure - -Some sources \(such as databases based on JDBC for example\) are providing namespace information from which a stream has been extracted. Whenever a source is able to fill this field in the catalog.json file, the destination will try to reproduce exactly the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will fall back to the "Destination Connector settings". - -### - Destination connector settings - -All stream will be replicated and store in the default namespace defined on the destination settings page. In the destinations, namespace refers to: - -| Destination Connector | Namespace setting | -| :--- | :--- | -| BigQuery | dataset | -| MSSQL | schema | -| MySql | database | -| Oracle DB | schema | -| Postgres | schema | -| Redshift | schema | -| Snowflake | schema | -| S3 | path prefix | - -### - Custom format - -When replicating multiple sources into the same destination, conflicts on tables being overwritten by syncs can occur. - -For example, a Github source can be replicated into a "github" schema. But if we have multiple connections to different GitHub repositories \(similar in multi-tenant scenarios\): - -* we'd probably wish to keep the same table names \(to keep consistent queries downstream\) -* but store them in different namespaces \(to avoid mixing data from different "tenants"\) - -To solve this, we can either: - -* use a specific namespace for each connection, thus this option of custom format. -* or, use prefix to stream names as described below. - -Note that we can use a template format string using variables that will be resolved during replication as follow: - -* `${SOURCE_NAMESPACE}`: will be replaced by the namespace provided by the source if available - -### Examples - -The following table summarises how this works. We assume an example of replication configurations between a Postgres Source and Snowflake Destination \(with settings of schema = "my\_schema"\): - -| Namespace Configuration | Source Namespace | Source Table Name | Destination Namespace | Destination Table Name | -| :--- | :--- | :--- | :--- | :--- | -| Mirror source structure | public | my\_table | public | my\_table | -| Mirror source structure | | my\_table | my\_schema | my\_table | -| Destination connector settings | public | my\_table | my\_schema | my\_table | -| Destination connector settings | | my\_table | my\_schema | my\_table | -| Custom format = "custom" | public | my\_table | custom | my\_table | -| Custom format = "${SOURCE\_NAMESPACE}" | public | my\_table | public | my\_table | -| Custom format = "my\_${SOURCE\_NAMESPACE}\_schema" | public | my\_table | my\_public\_schema | my\_table | -| Custom format = " " | public | my\_table | my\_schema | my\_table | - -## Requirements - -* Both Source and Destination connectors need to support namespaces. -* Relevant Source and Destination connectors need to be at least version `0.3.0` or later. -* Airbyte version `0.21.0-alpha` or later. - -## Current Support - -### Sources - -* MSSQL -* MYSQL -* Oracle DB -* Postgres -* Redshift - -### Destination - -* BigQuery -* MSSQL -* MySql -* Oracle DB -* Postgres -* Redshift -* Snowflake -* S3 - diff --git a/docs/understanding-airbyte/operations.md b/docs/understanding-airbyte/operations.md index f3839499e39b..b21a087651b3 100644 --- a/docs/understanding-airbyte/operations.md +++ b/docs/understanding-airbyte/operations.md @@ -1,6 +1,6 @@ # Operations -Airbyte [connections](connections/) support configuring additional transformations that execute after the sync. Useful applications could be: +Airbyte [connections](/using-airbyte/core-concepts/sync-modes/) support configuring additional transformations that execute after the sync. Useful applications could be: * Customized normalization to better fit the requirements of your own business context. * Business transformations from a technical data representation into a more logical and business oriented data structure. This can facilitate usage by end-users, non-technical operators, and executives looking to generate Business Intelligence dashboards and reports. diff --git a/docs/understanding-airbyte/tech-stack.md b/docs/understanding-airbyte/tech-stack.md index ba69157075e6..c829f8b7a81b 100644 --- a/docs/understanding-airbyte/tech-stack.md +++ b/docs/understanding-airbyte/tech-stack.md @@ -3,7 +3,7 @@ ## Airbyte Core Backend * [Java 17](https://jdk.java.net/archive/) -* Framework: [Jersey](https://eclipse-ee4j.github.io/jersey/) +* Framework: [Micronaut](https://micronaut.io/) * API: [OAS3](https://www.openapis.org/) * Databases: [PostgreSQL](https://www.postgresql.org/) * Unit & E2E testing: [JUnit 5](https://junit.org/junit5) @@ -18,7 +18,7 @@ Connectors can be written in any language. However the most common languages are ## **Frontend** -* [Node.js 16](https://nodejs.org/en/) +* [Node.js](https://nodejs.org/en/) * [TypeScript](https://www.typescriptlang.org/) * Web Framework/Library: [React](https://reactjs.org/) @@ -27,7 +27,7 @@ Connectors can be written in any language. However the most common languages are * CI/CD: [GitHub Actions](https://github.com/features/actions) * Containerization: [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/) * Linter \(Frontend\): [ESLint](https://eslint.org/) -* Formatter \(Frontend\): [Prettier](https://prettier.io/) +* Formatter \(Frontend & Backend\): [Prettier](https://prettier.io/) * Formatter \(Backend\): [Spotless](https://github.com/diffplug/spotless) ## FAQ diff --git a/docs/understanding-airbyte/basic-normalization.md b/docs/using-airbyte/core-concepts/basic-normalization.md similarity index 91% rename from docs/understanding-airbyte/basic-normalization.md rename to docs/using-airbyte/core-concepts/basic-normalization.md index e51f4eb1a1ac..b76d4759de54 100644 --- a/docs/understanding-airbyte/basic-normalization.md +++ b/docs/using-airbyte/core-concepts/basic-normalization.md @@ -2,7 +2,7 @@ :::danger -Basic normalization is being removed in favor of [Typing and Deduping](/understanding-airbyte/typing-deduping), as part of [Destinations V2](/release_notes/upgrading_to_destinations_v2). This pages remains as a guide for legacy connectors. +Basic normalization is being removed in favor of [Typing and Deduping](typing-deduping.md), as part of [Destinations V2](/release_notes/upgrading_to_destinations_v2). This pages remains as a guide for legacy connectors. ::: @@ -14,10 +14,23 @@ The high-level overview contains all the information you need to use Basic Norma ::: -When you run your first Airbyte sync without the basic normalization, you'll notice that your data gets written to your destination as one data column with a JSON blob that contains all of your data. This is the `_airbyte_raw_` table that you may have seen before. Why do we create this table? A core tenet of ELT philosophy is that data should be untouched as it moves through the E and L stages so that the raw data is always accessible. If an unmodified version of the data exists in the destination, it can be retransformed without needing to sync data again. +For every connection, you can choose between two options: + +- Basic Normalization: Airbyte converts the raw JSON blob version of your data to the format of your destination. _Note: Not all destinations support normalization._ +- Raw data (no normalization): Airbyte places the JSON blob version of your data in a table called `_airbyte_raw_` + +When basic normalization is enabled, Airbyte transforms data after the sync in a step called `Basic Normalization`, which structures data from the source into a format appropriate for consumption in the destination. For example, when writing data from a nested, dynamically typed source like a JSON API to a relational destination like Postgres, normalization is the process which un-nests JSON from the source into a relational table format which uses the appropriate column types in the destination. + +Without basic normalization, your data will be written to your destination as one data column with a JSON blob that contains all of your data. This is the `_airbyte_raw_` table that you may have seen before. Why do we create this table? A core tenet of ELT philosophy is that data should be untouched as it moves through the E and L stages so that the raw data is always accessible. If an unmodified version of the data exists in the destination, it can be retransformed without needing to sync data again. If you have Basic Normalization enabled, Airbyte automatically uses this JSON blob to create a schema and tables with your data in mind, converting it to the format of your destination. This runs after your sync and may take a long time if you have a large amount of data synced. If you don't enable Basic Normalization, you'll have to transform the JSON data from that column yourself. +:::note + +Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. + +::: + ## Example Basic Normalization uses a fixed set of rules to map a json object from a source to the types and format that are native to the destination. For example if a source emits data that looks like this: @@ -78,7 +91,7 @@ Additional metadata columns can be added on some tables depending on the usage: - On de-duplicated (and SCD) tables: - `_airbyte_unique_key`: hash of primary keys used to de-duplicate the final table. -The [normalization rules](basic-normalization.md#Rules) are _not_ configurable. They are designed to pick a reasonable set of defaults to hit the 80/20 rule of data normalization. We respect that normalization is a detail-oriented problem and that with a fixed set of rules, we cannot normalize your data in such a way that covers all use cases. If this feature does not meet your normalization needs, we always put the full json blob in destination as well, so that you can parse that object however best meets your use case. We will be adding more advanced normalization functionality shortly. Airbyte is focused on the EL of ELT. If you need a really featureful tool for the transformations then, we suggest trying out dbt. +The [normalization rules](#Rules) are _not_ configurable. They are designed to pick a reasonable set of defaults to hit the 80/20 rule of data normalization. We respect that normalization is a detail-oriented problem and that with a fixed set of rules, we cannot normalize your data in such a way that covers all use cases. If this feature does not meet your normalization needs, we always put the full json blob in destination as well, so that you can parse that object however best meets your use case. We will be adding more advanced normalization functionality shortly. Airbyte is focused on the EL of ELT. If you need a really featureful tool for the transformations then, we suggest trying out dbt. Airbyte places the json blob version of your data in a table called `_airbyte_raw_`. If basic normalization is turned on, it will place a separate copy of the data in a table called ``. Under the hood, Airbyte is using dbt, which means that the data only ingresses into the data store one time. The normalization happens as a query within the datastore. This implementation avoids extra network time and costs. @@ -94,7 +107,7 @@ Airbyte runs this step before handing the final data over to other tools that wi To summarize, we can represent the ELT process in the diagram below. These are steps that happens between your "Source Database or API" and the final "Replicated Tables" with examples of implementation underneath: -![](../.gitbook/assets/connecting-EL-with-T-4.png) +![](../../.gitbook/assets/connecting-EL-with-T-4.png) In Airbyte, the current normalization option is implemented using a dbt Transformer composed of: @@ -103,14 +116,14 @@ In Airbyte, the current normalization option is implemented using a dbt Transfor ## Destinations that Support Basic Normalization -- [BigQuery](../integrations/destinations/bigquery.md) -- [MS Server SQL](../integrations/destinations/mssql.md) -- [MySQL](../integrations/destinations/mysql.md) +- [BigQuery](../../integrations/destinations/bigquery.md) +- [MS Server SQL](../../integrations/destinations/mssql.md) +- [MySQL](../../integrations/destinations/mysql.md) - The server must support the `WITH` keyword. - Require MySQL >= 8.0, or MariaDB >= 10.2.1. -- [Postgres](../integrations/destinations/postgres.md) -- [Redshift](../integrations/destinations/redshift.md) -- [Snowflake](../integrations/destinations/snowflake.md) +- [Postgres](../../integrations/destinations/postgres.md) +- [Redshift](../../integrations/destinations/redshift.md) +- [Snowflake](../../integrations/destinations/snowflake.md) Basic Normalization can be configured when you're creating the connection between your Connection Setup and after in the Transformation Tab. Select the option: **Normalized tabular data**. @@ -131,8 +144,8 @@ Airbyte uses the types described in the catalog to determine the correct type fo | `bit` | boolean | | | `boolean` | boolean | | | `string` with format label `date-time` | timestamp with timezone | | -| `array` | new table | see [nesting](basic-normalization.md#Nesting) | -| `object` | new table | see [nesting](basic-normalization.md#Nesting) | +| `array` | new table | see [nesting](#Nesting) | +| `object` | new table | see [nesting](#Nesting) | ### Nesting @@ -326,11 +339,11 @@ As mentioned in the overview: To enable basic normalization \(which is optional\), you can toggle it on or disable it in the "Normalization and Transformation" section when setting up your connection: -![](../.gitbook/assets/basic-normalization-configuration.png) +![](../../.gitbook/assets/basic-normalization-configuration.png) ## Incremental runs -When the source is configured with sync modes compatible with incremental transformations (using append on destination) such as ( [full_refresh_append](connections/full-refresh-append.md), [incremental append](connections/incremental-append.md) or [incremental deduped history](connections/incremental-append-deduped.md)), only rows that have changed in the source are transferred over the network and written by the destination connector. +When the source is configured with sync modes compatible with incremental transformations (using append on destination) such as ( [full_refresh_append](./sync-modes/full-refresh-append.md), [incremental append](./sync-modes/incremental-append.md) or [incremental deduped history](./sync-modes/incremental-append-deduped.md)), only rows that have changed in the source are transferred over the network and written by the destination connector. Normalization will then try to build the normalized tables incrementally as the rows in the raw tables that have been created or updated since the last time dbt ran. As such, on each dbt run, the models get built incrementally. This limits the amount of data that needs to be transformed, vastly reducing the runtime of the transformations. This improves warehouse performance and reduces compute costs. Because normalization can be either run incrementally and, or, in full refresh, a technical column `_airbyte_normalized_at` can serve to track when was the last time a record has been transformed and written by normalization. This may greatly diverge from the `_airbyte_emitted_at` value as the normalized tables could be totally re-built at a latter time from the data stored in the `_airbyte_raw` tables. @@ -342,15 +355,15 @@ Normalization produces tables that are partitioned, clustered, sorted or indexed In general, normalization needs to do lookup on the last emitted_at column to know if a record is freshly produced and need to be incrementally processed or not. But in certain models, such as SCD tables for example, we also need to retrieve older data to update their type 2 SCD end_date and active_row flags, thus a different partitioning scheme is used to optimize that use case. -On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](connections/incremental-append-deduped.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). +On Postgres destination, an additional table suffixed with `_stg` for every stream replicated in [incremental deduped history](./sync-modes/incremental-append-deduped.md) needs to be persisted (in a different staging schema) for incremental transformations to work because of a [limitation](https://github.com/dbt-labs/docs.getdbt.com/issues/335#issuecomment-694199569). ## Extending Basic Normalization Note that all the choices made by Normalization as described in this documentation page in terms of naming (and more) could be overridden by your own custom choices. To do so, you can follow the following tutorials: -- to build a [custom SQL view](../operator-guides/transformation-and-normalization/transformations-with-sql.md) with your own naming conventions -- to export, edit and run [custom dbt normalization](../operator-guides/transformation-and-normalization/transformations-with-dbt.md) yourself -- or further, you can configure the use of a custom dbt project within Airbyte by following [this guide](../operator-guides/transformation-and-normalization/transformations-with-airbyte.md). +- to build a [custom SQL view](../../operator-guides/transformation-and-normalization/transformations-with-sql.md) with your own naming conventions +- to export, edit and run [custom dbt normalization](../../operator-guides/transformation-and-normalization/transformations-with-dbt.md) yourself +- or further, you can configure the use of a custom dbt project within Airbyte by following [this guide](../../operator-guides/transformation-and-normalization/transformations-with-airbyte.md). ## CHANGELOG diff --git a/docs/using-airbyte/core-concepts/namespaces.md b/docs/using-airbyte/core-concepts/namespaces.md new file mode 100644 index 000000000000..31e092e0d862 --- /dev/null +++ b/docs/using-airbyte/core-concepts/namespaces.md @@ -0,0 +1,98 @@ +# Namespaces + +## High-Level Overview + +Namespaces are used to generally organize data, separate tests and production data, and enforce permissions. In most cases, namespaces are schemas in the database you're replicating to. + +As a part of connection setup, you select where in the destination you want to write your data. Note: The default configuration is **Destination default**. + +| Destination Namepsace | Description | +| ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | +| Destination default | All streams will be replicated to the single default namespace defined by the Destination. | +| Mirror source structure | Some sources (for example, databases) provide namespace information for a stream. If a source provides namespace information, the destination will mirror the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will default to the "Destination default" option. | +| Custom format | All streams will be replicated to a single user-defined namespace. See Custom format for more details | + +Most of our destinations support this feature. To learn if your connector supports this, head to the individual connector page to learn more. If your desired destination doesn't support it, you can ignore this feature. + +## What is a Namespace? + +Systems often group their underlying data into namespaces with each namespace's data isolated from another namespace. This isolation allows for better organisation and flexibility, leading to better usability. + +An example of a namespace is the RDMS's `schema` concept. Some common use cases for schemas are enforcing permissions, segregating test and production data and general data organisation. + +In a source, the namespace is the location from where the data is replicated to the destination. In a destination, the namespace is the location where the replicated data is stored in the destination. + +Airbyte supports namespaces and allows Sources to define namespaces, and Destinations to write to various namespaces. In Airbyte, the following options are available and are set on each individual connection. + +### Destination default + +All streams will be replicated and stored in the default namespace defined on the destination settings page, which is typically defined when the destination was set up. Depending on your destination, the namespace refers to: + +| Destination Connector | Namespace setting | +| :--- | :--- | +| BigQuery | dataset | +| MSSQL | schema | +| MySql | database | +| Oracle DB | schema | +| Postgres | schema | +| Redshift | schema | +| Snowflake | schema | +| S3 | path prefix | + +:::tip +If you prefer to replicate multiple sources into the same namespace, use the `Stream Prefix` configuration to differentiate data from these sources to ensure no streams collide when writing to the destination. +::: + +### Mirror source structure + +Some sources \(such as databases based on JDBC\) provide namespace information from which a stream has been extracted. Whenever a source is able to fill this field in the catalog.json file, the destination will try to write to exactly the same namespace when this configuration is set. For sources or streams where the source namespace is not known, the behavior will fall back to the "Destination default". Most APIs do not provide namespace information. + +### Custom format + +When replicating multiple sources into the same destination, you may create table conflicts where tables are overwritten by different syncs. This is where using a custom namespace will ensure data is synced accurately. + +For example, a Github source can be replicated into a `github` schema. However, you may have multiple connections writing from different GitHub repositories \(common in multi-tenant scenarios\). + +:::tip +To keep the same table names, Airbyte recommends writing the connections to unique namespaces to avoid mixing data from the different GitHub repositories. +::: + +You can enter plain text (most common) or additionally add a dynamic parameter `${SOURCE_NAMESPACE}`, which uses the namespace provided by the source if available. + +### Examples + +The following table summarises how this works. In this example, we're looking at the replication configuration between a Postgres Source and Snowflake Destination \(with settings of schema = "my\_schema"\): + +| Namespace Configuration | Source Namespace | Source Table Name | Destination Namespace | Destination Table Name | +| :--- | :--- | :--- | :--- | :--- | +| Destination default | public | my\_table | my\_schema | my\_table | +| Destination default | | my\_table | my\_schema | my\_table | +| Mirror source structure | public | my\_table | public | my\_table | +| Mirror source structure | | my\_table | my\_schema | my\_table | +| Custom format = "custom" | public | my\_table | custom | my\_table | +| Custom format = "${SOURCE\_NAMESPACE}" | public | my\_table | public | my\_table | +| Custom format = "my\_${SOURCE\_NAMESPACE}\_schema" | public | my\_table | my\_public\_schema | my\_table | +| Custom format = " " | public | my\_table | my\_schema | my\_table | + +## Syncing Details + +If the Source does not support namespaces, the data will be replicated into the Destination's default namespace. For databases, the default namespace is the schema provided in the destination configuration. + +If the Destination does not support namespaces, any preference set in the connection is ignored. + +## Using Namespaces with Basic Normalization + +As part of the connections sync settings, it is possible to configure the namespace used by: 1. destination connectors: to store the `_airbyte_raw_*` tables. 2. basic normalization: to store the final normalized tables. + +:::info +When basic normalization is enabled, this is the location that both your normalized and raw data will get written to. Your raw data will show up with the prefix `_airbyte_raw_` in the namespace you define. If you don't enable basic normalization, you will only receive the raw tables. +:::note + +Note custom transformation outputs are not affected by the namespace settings from Airbyte: It is up to the configuration of the custom dbt project, and how it is written to handle its [custom schemas](https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas). The default target schema for dbt in this case, will always be the destination namespace. + +## Requirements + +* Both Source and Destination connectors need to support namespaces. +* Relevant Source and Destination connectors need to be at least version `0.3.0` or later. +* Airbyte version `0.21.0-alpha` or later. + diff --git a/docs/using-airbyte/core-concepts/readme.md b/docs/using-airbyte/core-concepts/readme.md new file mode 100644 index 000000000000..9d8e495a62d5 --- /dev/null +++ b/docs/using-airbyte/core-concepts/readme.md @@ -0,0 +1,108 @@ +# Core Concepts + +Airbyte enables you to build data pipelines and replicate data from a source to a destination. You can configure how frequently the data is synced, what data is replicated, and how the data is written to in the destination. + +This page describes the concepts you need to know to use Airbyte. + +## Source + +A source is an API, file, database, or data warehouse that you want to ingest data from. + +## Destination + +A destination is a data warehouse, data lake, database, or an analytics tool where you want to load your ingested data. + +## Connector + +An Airbyte component which pulls data from a source or pushes data to a destination. + +## Connection + +A connection is an automated data pipeline that replicates data from a source to a destination. Setting up a connection enables configuration of the following parameters: + +| Concept | Description | +|---------------------|---------------------------------------------------------------------------------------------------------------------| +| Replication Frequency | When should a data sync be triggered? | +| Destination Namespace and Stream Prefix | Where should the replicated data be written? | +| Sync Mode | How should the streams be replicated (read and written)? | +| Schema Propagation | How should Airbyte handle schema drift in sources? | +| Catalog Selection | What data should be replicated from the source to the destination? | + +## Stream + +A stream is a group of related records. + +Examples of streams: + +- A table in a relational database +- A resource or API endpoint for a REST API +- The records from a directory containing many files in a filesystem + +## Field + +A field is an attribute of a record in a stream. + +Examples of fields: + +- A column in the table in a relational database +- A field in an API response + +## Sync Schedules + +There are three options for scheduling a sync to run: +- Scheduled (ie. every 24 hours, every 2 hours) +- [CRON schedule](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) +- Manual \(i.e: clicking the "Sync Now" button in the UI or through the API\) + +For more details, see our [Sync Schedules documentation](sync-schedules.md). + +## Destination Namespace + +A namespace defines where the data will be written to your destination. You can use the namespace to group streams in a source or destination. In a relational database system, this is typically known as a schema. + +For more details, see our [Namespace documentation](namespaces.md). + +## Sync Mode + +A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes depending on what you want to accomplish. + +Read more about each [sync mode](using-airbyte/core-concepts/sync-modes) and how they differ. + +## Typing and Deduping + +Typing and deduping ensures the data emitted from sources is written into the correct type-cast relational columns and only contains unique records. Typing and deduping is only relevant for the following relational database & warehouse destinations: + +- Snowflake +- BigQuery + +:::info +Typing and Deduping is the default method of transforming datasets within data warehouse and database destinations after they've been replicated. We are retaining documentation about normalization to support legacy destinations. +::: + +For more details, see our [Typing & Deduping documentation](/understanding-airbyte/typing-deduping). + +## Basic Normalization + +Basic Normalization transforms data after a sync to denest columns into their own tables. Note that normalization is only available for the following relational database & warehouse destinations: + +- Redshift +- Postgres +- Oracle +- MySQL +- MSSQL + +For more details, see our [Basic Normalization documentation](/using-airbyte/core-concepts/basic-normalization.md). + +## Custom Transformations + +Airbyte integrates natively with dbt to allow you to use dbt for post-sync transformations. This is useful if you would like to trigger dbt models after a sync successfully completes. + +For more details, see our [dbt integration documentation](/cloud/managing-airbyte-cloud/dbt-cloud-integration.md). + +## Workspace + +A workspace is a grouping of sources, destinations, connections, and other configurations. It lets you collaborate with team members and share resources across your team under a shared billing account. + +## Glossary of Terms + +You can find a extended list of [Airbyte specific terms](https://glossary.airbyte.com/term/airbyte-glossary-of-terms/), [data engineering concepts](https://glossary.airbyte.com/term/data-engineering-concepts) or many [other data related terms](https://glossary.airbyte.com/). diff --git a/docs/using-airbyte/core-concepts/sync-modes/README.md b/docs/using-airbyte/core-concepts/sync-modes/README.md new file mode 100644 index 000000000000..a561506a1f73 --- /dev/null +++ b/docs/using-airbyte/core-concepts/sync-modes/README.md @@ -0,0 +1,20 @@ +# Sync Modes + +A sync mode governs how Airbyte reads from a source and writes to a destination. Airbyte provides different sync modes to account for various use cases. To minimize confusion, a mode's behavior is reflected in its name. The easiest way to understand Airbyte's sync modes is to understand how the modes are named. + +1. The first part of the name denotes how the source connector reads data from the source: + 1. Incremental: Read records added to the source since the last sync job. \(The first sync using Incremental is equivalent to a Full Refresh\) + - Method 1: Using a cursor. Generally supported by all connectors whose data source allows extracting records incrementally. + - Method 2: Using change data capture. Only supported by some sources. See [CDC](../../../understanding-airbyte/cdc.md) for more info. + 2. Full Refresh: Read everything in the source. +2. The second part of the sync mode name denotes how the destination connector writes data. This is not affected by how the source connector produced the data: + 1. Overwrite: Overwrite by first deleting existing data in the destination. + 2. Append: Write by adding data to existing tables in the destination. + 3. Deduped History: Write by first adding data to existing tables in the destination to keep a history of changes. The final table is produced by de-duplicating the intermediate ones using a primary key. + +A sync mode is a combination of a source and destination mode together. The UI exposes the following options, whenever both source and destination connectors are capable to support it for the corresponding stream: + +- [Incremental Append + Deduped](./incremental-append-deduped.md): Sync new records from stream and append data in destination, also provides a de-duplicated view mirroring the state of the stream in the source. +- [Full Refresh Overwrite](./full-refresh-overwrite.md): Sync the whole stream and replace data in destination by overwriting it. +- [Full Refresh Append](./full-refresh-append.md): Sync the whole stream and append data in destination. +- [Incremental Append](./incremental-append.md): Sync new records from stream and append data in destination. diff --git a/docs/understanding-airbyte/connections/full-refresh-append.md b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-append.md similarity index 92% rename from docs/understanding-airbyte/connections/full-refresh-append.md rename to docs/using-airbyte/core-concepts/sync-modes/full-refresh-append.md index b7343fc1c07b..ccdd7951bbe5 100644 --- a/docs/understanding-airbyte/connections/full-refresh-append.md +++ b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-append.md @@ -2,7 +2,7 @@ ## Overview -The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available data requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](incremental-append.md), which does not sync data that has already been synced before. +The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available data requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](./incremental-append.md), which does not sync data that has already been synced before. In the **Append** variant, new syncs will take all data from the sync and append it to the destination table. Therefore, if syncing similar information multiple times, every sync will create duplicates of already existing data. diff --git a/docs/understanding-airbyte/connections/full-refresh-overwrite.md b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md similarity index 91% rename from docs/understanding-airbyte/connections/full-refresh-overwrite.md rename to docs/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md index 44d4ff5f6699..6de7d266c9ce 100644 --- a/docs/understanding-airbyte/connections/full-refresh-overwrite.md +++ b/docs/using-airbyte/core-concepts/sync-modes/full-refresh-overwrite.md @@ -2,7 +2,7 @@ ## Overview -The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available information requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](incremental-append.md), which does not sync data that has already been synced before. +The **Full Refresh** modes are the simplest methods that Airbyte uses to sync data, as they always retrieve all available information requested from the source, regardless of whether it has been synced before. This contrasts with [**Incremental sync**](./incremental-append.md), which does not sync data that has already been synced before. In the **Overwrite** variant, new syncs will destroy all data in the existing destination table and then pull the new data in. Therefore, data that has been removed from the source after an old sync will be deleted in the destination table. diff --git a/docs/understanding-airbyte/connections/incremental-append-deduped.md b/docs/using-airbyte/core-concepts/sync-modes/incremental-append-deduped.md similarity index 89% rename from docs/understanding-airbyte/connections/incremental-append-deduped.md rename to docs/using-airbyte/core-concepts/sync-modes/incremental-append-deduped.md index 86e8ee92ee75..6fa0272fda6e 100644 --- a/docs/understanding-airbyte/connections/incremental-append-deduped.md +++ b/docs/using-airbyte/core-concepts/sync-modes/incremental-append-deduped.md @@ -69,19 +69,19 @@ In the final de-duplicated table: ## Source-Defined Cursor -Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. +Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. -![](../../.gitbook/assets/incremental_source_defined.png) +![](../../../.gitbook/assets/incremental_source_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## User-Defined Cursor -Some sources cannot define the cursor without user input. For example, in the [postgres source](../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. +Some sources cannot define the cursor without user input. For example, in the [postgres source](../../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. -![](../../.gitbook/assets/incremental_user_defined.png) +![](../../../.gitbook/assets/incremental_user_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## Source-Defined Primary key @@ -91,7 +91,7 @@ Some sources are able to determine the primary key that they use without any use Some sources cannot define the cursor without user input or the user may want to specify their own primary key on the destination that is different from the source definitions. In these cases, select the column in the sync settings dropdown that should be used as the `primary key` or `composite primary keys`. -![](../../.gitbook/assets/primary_key_user_defined.png) +![](../../../.gitbook/assets/primary_key_user_defined.png) In this example, we selected both the `campaigns.id` and `campaigns.name` as the composite primary key of our `campaigns` table. @@ -118,4 +118,4 @@ select * from table where cursor_field > 'last_sync_max_cursor_field_value' **Note**: -Previous versions of Airbyte destinations supported SCD tables, which would sore every entry seen for a record. This was removed with Destinations V2 and [Typing and Deduplication](/understanding-airbyte/typing-deduping.md). +Previous versions of Airbyte destinations supported SCD tables, which would sore every entry seen for a record. This was removed with Destinations V2 and [Typing and Deduplication](../typing-deduping.md). diff --git a/docs/understanding-airbyte/connections/incremental-append.md b/docs/using-airbyte/core-concepts/sync-modes/incremental-append.md similarity index 88% rename from docs/understanding-airbyte/connections/incremental-append.md rename to docs/using-airbyte/core-concepts/sync-modes/incremental-append.md index c380d2226912..c9facb4711f3 100644 --- a/docs/understanding-airbyte/connections/incremental-append.md +++ b/docs/using-airbyte/core-concepts/sync-modes/incremental-append.md @@ -2,7 +2,7 @@ ## Overview -Airbyte supports syncing data in **Incremental Append** mode i.e: syncing only replicate _new_ or _modified_ data. This prevents re-fetching data that you have already replicated from a source. If the sync is running for the first time, it is equivalent to a [Full Refresh](full-refresh-append.md) since all data will be considered as _new_. +Airbyte supports syncing data in **Incremental Append** mode i.e: syncing only replicate _new_ or _modified_ data. This prevents re-fetching data that you have already replicated from a source. If the sync is running for the first time, it is equivalent to a [Full Refresh](./full-refresh-append.md) since all data will be considered as _new_. In this flavor of incremental, records in the warehouse destination will never be deleted or mutated. A copy of each new or updated record is _appended_ to the data in the warehouse. This means you can find multiple copies of the same record in the destination warehouse. We provide an "at least once" guarantee of replicating each record that is present when the sync runs. @@ -62,25 +62,25 @@ The output we expect to see in the warehouse is as follows: ## Source-Defined Cursor -Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. +Some sources are able to determine the cursor that they use without any user input. For example, in the [exchange rates source](../../../integrations/sources/exchange-rates.md), the source knows that the date field should be used to determine the last record that was synced. In these cases, simply select the incremental option in the UI. -![](../../.gitbook/assets/incremental_source_defined.png) +![](../../../.gitbook/assets/incremental_source_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## User-Defined Cursor -Some sources cannot define the cursor without user input. For example, in the [postgres source](../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. +Some sources cannot define the cursor without user input. For example, in the [postgres source](../../../integrations/sources/postgres.md), the user needs to choose which column in a database table they want to use as the `cursor field`. In these cases, select the column in the sync settings dropdown that should be used as the `cursor field`. -![](../../.gitbook/assets/incremental_user_defined.png) +![](../../../.gitbook/assets/incremental_user_defined.png) -\(You can find a more technical details about the configuration data model [here](../airbyte-protocol.md#catalog)\). +\(You can find a more technical details about the configuration data model [here](../../../understanding-airbyte/airbyte-protocol.md#catalog)\). ## Getting the Latest Snapshot of data As demonstrated in the examples above, with **Incremental Append,** a record which was updated in the source will be appended to the destination rather than updated in-place. This means that if data in the source uses a primary key \(e.g: `user_id` in the `users` table\), then the destination will end up having multiple records with the same primary key value. -However, some use cases require only the latest snapshot of the data. This is available by using other flavors of sync modes such as [Incremental - Append + Deduped](incremental-append-deduped.md) instead. +However, some use cases require only the latest snapshot of the data. This is available by using other flavors of sync modes such as [Incremental - Append + Deduped](./incremental-append-deduped.md) instead. Note that in **Incremental Append**, the size of the data in your warehouse increases monotonically since an updated record in the source is appended to the destination rather than updated in-place. @@ -122,7 +122,7 @@ At the end of the second incremental sync, the data warehouse would still contai Similarly, if multiple modifications are made during the same day to the same records. If the frequency of the sync is not granular enough \(for example, set for every 24h\), then intermediate modifications to the data are not going to be detected and emitted. Only the state of data at the time the sync runs will be reflected in the destination. -Those concerns could be solved by using a different incremental approach based on binary logs, Write-Ahead-Logs \(WAL\), or also called [Change Data Capture \(CDC\)](../cdc.md). +Those concerns could be solved by using a different incremental approach based on binary logs, Write-Ahead-Logs \(WAL\), or also called [Change Data Capture \(CDC\)](../../../understanding-airbyte/cdc.md). The current behavior of **Incremental** is not able to handle source schema changes yet, for example, when a column is added, renamed or deleted from an existing table etc. It is recommended to trigger a [Full refresh - Overwrite](full-refresh-overwrite.md) to correctly replicate the data to the destination with the new schema changes. diff --git a/docs/using-airbyte/core-concepts/sync-schedules.md b/docs/using-airbyte/core-concepts/sync-schedules.md new file mode 100644 index 000000000000..a0d6c22fbee9 --- /dev/null +++ b/docs/using-airbyte/core-concepts/sync-schedules.md @@ -0,0 +1,39 @@ +# Sync Schedules + +For each connection, you can select between three options that allow a sync to run. The three options for `Replication Frequency` are: + +- Scheduled (e.g. every 24 hours, every 2 hours) +- Cron scheduling +- Manual + +## Sync Limitations + +* Only one sync per connection can run at a time. +* If a sync is scheduled to run before the previous sync finishes, the scheduled sync will start after the completion of the previous sync. +* Syncs can run at most every 60 minutes. Reach out to [Sales](https://airbyte.com/company/talk-to-sales) if you require replication more frequently than once per hour. + +## Scheduled syncs +When a scheduled connection is first created, a sync is executed immediately after creation. After that, a sync is run once the time since the last sync \(whether it was triggered manually or due to a schedule\) has exceeded the schedule interval. For example: + +- **October 1st, 2pm**, a user sets up a connection to sync data every 24 hours. +- **October 1st, 2:01pm**: sync job runs +- **October 2nd, 2:01pm:** 24 hours have passed since the last sync, so a sync is triggered. +- **October 2nd, 5pm**: The user manually triggers a sync from the UI +- **October 3rd, 2:01pm:** since the last sync was less than 24 hours ago, no sync is run +- **October 3rd, 5:01pm:** It has been more than 24 hours since the last sync, so a sync is run + +## Cron Scheduling +If you prefer more flexibility in scheduling your sync, you can also use CRON scheduling to set a precise time of day or month. + +Airbyte uses the CRON scheduler from [Quartz](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html). We recommend reading their [documentation](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) to learn more about how to + +When setting up the cron extpression, you will also be asked to choose a time zone the sync will run in. + +:::note +For Scheduled or cron scheduled syncs, Airbyte guarantees syncs will initiate with a schedule accuracy of +/- 30 minutes. +::: + +## Manual Syncs +When the connection is set to replicate with `Manual` frequency, the sync will not automatically run. + +It can be triggered by clicking the "Sync Now" button at any time through the UI or be triggered through the UI. \ No newline at end of file diff --git a/docs/understanding-airbyte/typing-deduping.md b/docs/using-airbyte/core-concepts/typing-deduping.md similarity index 87% rename from docs/understanding-airbyte/typing-deduping.md rename to docs/using-airbyte/core-concepts/typing-deduping.md index f66e6a3c59ba..1cd029e47a03 100644 --- a/docs/understanding-airbyte/typing-deduping.md +++ b/docs/using-airbyte/core-concepts/typing-deduping.md @@ -1,6 +1,6 @@ # Typing and Deduping -This page refers to new functionality added by [Destinations V2](/release_notes/upgrading_to_destinations_v2/). Typing and deduping is the default method of transforming datasets within data warehouse and database destinations after they've been replicated. Please check each destination to learn if Typing and deduping is supported. +This page refers to new functionality added by [Destinations V2](/release_notes/upgrading_to_destinations_v2/). Typing and deduping is the default method of transforming datasets within data warehouse and database destinations after they've been replicated. Please check each destination to learn if Typing and Deduping is supported. ## What is Destinations V2? @@ -11,6 +11,12 @@ This page refers to new functionality added by [Destinations V2](/release_notes/ - Internal Airbyte tables in the `airbyte_internal` schema: Airbyte will now generate all raw tables in the `airbyte_internal` schema. We no longer clutter your desired schema with raw data tables. - Incremental delivery for large syncs: Data will be incrementally delivered to your final tables when possible. No more waiting hours to see the first rows in your destination table. +:::note + +Typing and Deduping may cause an increase in your destination's compute cost. This cost will vary depending on the amount of data that is transformed and is not related to Airbyte credit usage. + +::: + ## `_airbyte_meta` Errors "Per-row error handling" is a new paradigm for Airbyte which provides greater flexibility for our users. Airbyte now separates `data-moving problems` from `data-content problems`. Prior to Destinations V2, both types of errors were handled the same way: by failing the sync. Now, a failing sync means that Airbyte could not _move_ all of your data. You can query the `_airbyte_meta` column to see which rows failed for _content_ reasons, and why. This is a more flexible approach, as you can now decide how to handle rows with errors on a case-by-case basis. @@ -34,7 +40,7 @@ Depending on your use-case, it may still be valuable to consider rows with error ## Destinations V2 Example -Consider the following [source schema](https://docs.airbyte.com/integrations/sources/faker) for stream `users`: +Consider the following [source schema](/integrations/sources/faker) for stream `users`: ```json { @@ -58,7 +64,7 @@ The data from one stream will now be mapped to one table in your schema as below | Failed typing that didn’t break other rows ⟶ | yyy-yyy-yyy | 2022-01-01 12:00:00 | { errors: {[“fish” is not a valid integer for column “age”]} | 2 | evan | NULL | { city: “Menlo Park”, zip: “94002” } | | Not-yet-typed ⟶ | | | | | | | | -In legacy normalization, columns of [Airbyte type](https://docs.airbyte.com/understanding-airbyte/supported-data-types/#the-types) `Object` in the Destination were "unnested" into separate tables. In this example, with Destinations V2, the previously unnested `public.users_address` table with columns `city` and `zip` will no longer be generated. +In legacy normalization, columns of [Airbyte type](/understanding-airbyte/supported-data-types/#the-types) `Object` in the Destination were "unnested" into separate tables. In this example, with Destinations V2, the previously unnested `public.users_address` table with columns `city` and `zip` will no longer be generated. #### Destination Table Name: _airbyte.raw_public_users_ (`airbyte.{namespace}_{stream}`) @@ -70,4 +76,4 @@ In legacy normalization, columns of [Airbyte type](https://docs.airbyte.com/unde You also now see the following changes in Airbyte-provided columns: -![Airbyte Destinations V2 Column Changes](../release_notes/assets/updated_table_columns.png) +![Airbyte Destinations V2 Column Changes](../../release_notes/assets/updated_table_columns.png) diff --git a/docs/quickstart/add-a-destination.md b/docs/using-airbyte/getting-started/add-a-destination.md similarity index 81% rename from docs/quickstart/add-a-destination.md rename to docs/using-airbyte/getting-started/add-a-destination.md index 594acd02cf9e..cc473d8384f3 100644 --- a/docs/quickstart/add-a-destination.md +++ b/docs/using-airbyte/getting-started/add-a-destination.md @@ -1,20 +1,20 @@ # Add a Destination -Destinations are the data warehouses, data lakes, databases and analytics tools where you will load the data from your chosen source(s). The steps to setting up your first destination are very similar to those for [setting up a source](https://docs.airbyte.com/quickstart/add-a-source). +Destinations are the data warehouses, data lakes, databases and analytics tools where you will load the data from your chosen source(s). The steps to setting up your first destination are very similar to those for [setting up a source](./add-a-source). Once you've logged in to your Airbyte Open Source deployment, click on the **Destinations** tab in the navigation bar found on the left side of the dashboard. This will take you to the list of available destinations. -![Destination List](../.gitbook/assets/add-a-destination/getting-started-destination-list.png) +![Destination List](../../.gitbook/assets/add-a-destination/getting-started-destination-list.png) You can use the provided search bar at the top of the page, or scroll down the list to find the destination you want to replicate data from. :::tip -You can filter the list of destinations by support level. Airbyte connectors are categorized in two support levels, Certified and Community. See our [Product Support Levels](https://docs.airbyte.com/project-overview/product-support-levels) page for more information on this topic. +You can filter the list of destinations by support level. Airbyte connectors are categorized in two support levels, Certified and Community. See our [Connector Support Levels](/integrations/connector-support-levels.md) page for more information on this topic. ::: As an example, we'll be setting up a simple JSON file that will be saved on our local system as the destination. Select **Local JSON** from the list of destinations. This will take you to the destination setup page. -![Destination Page](../.gitbook/assets/add-a-destination/getting-started-destination-page.png) +![Destination Page](../../.gitbook/assets/add-a-destination/getting-started-destination-page.png) The left half of the page contains a set of fields that you will have to fill out. In the **Destination name** field, you can enter a name of your choosing to help you identify this instance of the connector. By default, this will be set to the name of the destination (i.e., `Local JSON`). @@ -26,4 +26,4 @@ Each destination will have its own set of required fields to configure during se Some destinations will also have an **Optional Fields** tab located beneath the required fields. You can open this tab to view and configure any additional optional parameters that exist for the source. These fields generally grant you more fine-grained control over your data replication, but you can safely ignore them. ::: -Once you've filled out the required fields, select **Set up destination**. A connection check will run to verify that a successful connection can be established. Now you're ready to [set up your first connection](https://docs.airbyte.com/quickstart/set-up-a-connection)! +Once you've filled out the required fields, select **Set up destination**. A connection check will run to verify that a successful connection can be established. Now you're ready to [set up your first connection](./set-up-a-connection)! diff --git a/docs/quickstart/add-a-source.md b/docs/using-airbyte/getting-started/add-a-source.md similarity index 86% rename from docs/quickstart/add-a-source.md rename to docs/using-airbyte/getting-started/add-a-source.md index 633d9a1d8b77..e5f59b2f7517 100644 --- a/docs/quickstart/add-a-source.md +++ b/docs/using-airbyte/getting-started/add-a-source.md @@ -2,11 +2,11 @@ Setting up a new source in Airbyte is a quick and simple process! When viewing the Airbyte UI, you'll see the main navigation bar on the left side of your screen. Click the **Sources** tab to bring up a list of all available sources. -![](../.gitbook/assets/add-a-source/getting-started-source-list.png) +![](../../.gitbook/assets/add-a-source/getting-started-source-list.png) You can use the provided search bar, or simply scroll down the list to find the source you want to replicate data from. Let's use Google Sheets as an example. Clicking on the **Google Sheets** card will bring us to its setup page. -![](../.gitbook/assets/add-a-source/getting-started-source-page.png) +![](../../.gitbook/assets/add-a-source/getting-started-source-page.png) The left half of the page contains a set of fields that you will have to fill out. In the **Source name** field, you can enter a name of your choosing to help you identify this instance of the connector. By default, this will be set to the name of the source (ie, `Google Sheets`). @@ -18,5 +18,5 @@ Some sources will also have an **Optional Fields** tab. You can open this tab to Once you've filled out all the required fields, click on the **Set up source** button and Airbyte will run a check to verify the connection. Happy replicating! -Can't find the connectors that you want? Try your hand at easily building one yourself using our [Connector Builder!](../connector-development/connector-builder-ui/overview.md) +Can't find the connectors that you want? Try your hand at easily building one yourself using our [Connector Builder!](../../connector-development/connector-builder-ui/overview.md) diff --git a/docs/using-airbyte/getting-started/readme.md b/docs/using-airbyte/getting-started/readme.md new file mode 100644 index 000000000000..ab860999e2fb --- /dev/null +++ b/docs/using-airbyte/getting-started/readme.md @@ -0,0 +1,32 @@ +# Getting Started + +Getting started with Airbyte takes only a few steps! This page guides you through the initial steps to get started and you'll learn how to setup your first connection on the following pages. + +You have two options to run Airbyte: Use **Airbyte Cloud** (recommended) or **self-host Airbyte** in your infrastructure. + +## Sign Up for Airbyte Cloud + +To use Airbyte Cloud, [sign up](https://cloud.airbyte.io/signup) with your email address, Google login, or GitHub login. Upon signing up, you'll be taken to your workspace, which lets you collaborate with team members and share resources across your team under a shared billing account. + +Airbyte Cloud offers a 14-day free trial that begins after your first successful sync. For more details on our pricing model, see our [pricing page](https://www.airbyte.com/pricing). + +To start setting up a data pipeline, see how to [set up a source](./add-a-source.md). + +:::info +Depending on your data residency, you may need to [allowlist IP addresses](/operating-airbyte/security.md#network-security-1) to enable access to Airbyte. +::: + +## Deploy Airbyte (Open Source) + +To use Airbyte Open Source, you can use on the following options to deploy it on your infrastructure. + +- [Local Deployment](/deploying-airbyte/local-deployment.md) (recommended when trying out Airbyte) +- [On Aws](/deploying-airbyte/on-aws-ec2.md) +- [On Azure VM Cloud Shell](/deploying-airbyte/on-azure-vm-cloud-shell.md) +- [On Digital Ocean Droplet](/deploying-airbyte/on-digitalocean-droplet.md) +- [On GCP.md](/deploying-airbyte/on-gcp-compute-engine.md) +- [On Kubernetes](/deploying-airbyte/on-kubernetes-via-helm.md) +- [On OCI VM](/deploying-airbyte/on-oci-vm.md) +- [On Restack](/deploying-airbyte/on-restack.md) +- [On Plural](/deploying-airbyte/on-plural.md) +- [On AWS ECS](/deploying-airbyte/on-aws-ecs.md) (Spoiler alert: it doesn't work) diff --git a/docs/quickstart/set-up-a-connection.md b/docs/using-airbyte/getting-started/set-up-a-connection.md similarity index 63% rename from docs/quickstart/set-up-a-connection.md rename to docs/using-airbyte/getting-started/set-up-a-connection.md index c9144ec08c43..7948eeeda06a 100644 --- a/docs/quickstart/set-up-a-connection.md +++ b/docs/using-airbyte/getting-started/set-up-a-connection.md @@ -1,38 +1,44 @@ # Set up a Connection -Now that you've learned how to [deploy Airbyte locally](https://docs.airbyte.com/quickstart/deploy-airbyte) and set up your first [source](https://docs.airbyte.com/quickstart/add-a-source) and [destination](https://docs.airbyte.com/quickstart/add-a-destination), it's time to finish the job by creating your very first connection! +Now that you've learned how to set up your first [source](./add-a-source) and [destination](./add-a-destination), it's time to finish the job by creating your very first connection! On the left side of your main Airbyte dashboard, select **Connections**. You will be prompted to choose which source and destination to use for this connection. As an example, we'll use the **Google Sheets** source and **Local JSON** destination. ## Configure the connection -Once you've chosen your source and destination, you'll be able to configure the connection. You can refer to [this page](https://docs.airbyte.com/cloud/managing-airbyte-cloud/configuring-connections) for more information on each available configuration. For this demo, we'll simply set the **Replication frequency** to a 24 hour interval and leave the other fields at their default values. +Once you've chosen your source and destination, you'll be able to configure the connection. You can refer to [this page](/cloud/managing-airbyte-cloud/configuring-connections.md) for more information on each available configuration. For this demo, we'll simply set the **Replication frequency** to a 24 hour interval and leave the other fields at their default values. -![Connection config](../.gitbook/assets/set-up-a-connection/getting-started-connection-config.png) +![Connection config](../../.gitbook/assets/set-up-a-connection/getting-started-connection-config.png) -Next, you can toggle which streams you want to replicate, as well as setting up the desired sync mode for each stream. For more information on the nature of each sync mode supported by Airbyte, see [this page](https://docs.airbyte.com/understanding-airbyte/connections/#sync-modes). +:::note +By default, data will sync to the default defined in the destination. To ensure your data is synced to the correct place, see our examples for [Destination Namespace](/using-airbyte/core-concepts/namespaces.md) +::: + +Next, you can toggle which streams you want to replicate, as well as setting up the desired sync mode for each stream. For more information on the nature of each sync mode supported by Airbyte, see [this page](/using-airbyte/core-concepts/sync-modes). Our test data consists of a single stream cleverly named `Test Data`, which we've enabled and set to `Full Refresh - Overwrite` sync mode. -![Stream config](../.gitbook/assets/set-up-a-connection/getting-started-connection-streams.png) +![Stream config](../../.gitbook/assets/set-up-a-connection/getting-started-connection-streams.png) Click **Set up connection** to complete your first connection. Your first sync is about to begin! -## Connector Dashboard +## Connection Overview -Once you've finished setting up the connection, you will be automatically redirected to a dashboard containing all the tools you need to keep track of your connection. +Once you've finished setting up the connection, you will be automatically redirected to a connection overview containing all the tools you need to keep track of your connection. -![Connection dashboard](../.gitbook/assets/set-up-a-connection/getting-started-connection-success.png) +![Connection dashboard](../../.gitbook/assets/set-up-a-connection/getting-started-connection-success.png) Here's a basic overview of the tabs and their use: -1. The **Status** tab shows you an overview of your connector's sync schedule and health. +1. The **Status** tab shows you an overview of your connector's sync health. 2. The **Job History** tab allows you to check the logs for each sync. If you encounter any errors or unexpected behaviors during a sync, checking the logs is always a good first step to finding the cause and solution. 3. The **Replication** tab allows you to modify the configurations you chose during the connection setup. 4. The **Settings** tab contains additional settings, and the option to delete the connection if you no longer wish to use it. ### Check the data from your first sync +Once the first sync has completed, you can verify the sync has completed by checking the data in your destination. + If you followed along and created your own connection using a `Local JSON` destination, you can use this command to check the file's contents to make sure the replication worked as intended (be sure to replace YOUR_PATH with the path you chose in your destination setup, and YOUR_STREAM_NAME with the name of an actual stream you replicated): ```bash @@ -42,12 +48,12 @@ cat /tmp/airbyte_local/YOUR_PATH/_airbyte_raw_YOUR_STREAM_NAME.jsonl You should see a list of JSON objects, each containing a unique `airbyte_ab_id`, an `emitted_at` timestamp, and `airbyte_data` containing the extracted record. :::tip -If you are using Airbyte on Windows with WSL2 and Docker, refer to [this guide](https://docs.airbyte.com/operator-guides/locating-files-local-destination) to locate the replicated folder and file. +If you are using Airbyte on Windows with WSL2 and Docker, refer to [this guide](/integrations/locating-files-local-destination.md) to locate the replicated folder and file. ::: ## What's next? -Congratulations on successfully setting up your first connection using Airbyte Open Source! We hope that this will be just the first step on your journey with us. We support a large, ever-growing [catalog of sources and destinations](https://docs.airbyte.com/integrations/), and you can even [contribute your own](https://docs.airbyte.com/connector-development/). +Congratulations on successfully setting up your first connection using Airbyte Open Source! We hope that this will be just the first step on your journey with us. We support a large, ever-growing [catalog of sources and destinations](/integrations/), and you can even [contribute your own](/connector-development/). If you have any questions at all, please reach out to us on [Slack](https://slack.airbyte.io/). If you would like to see a missing feature or connector added, please create an issue on our [Github](https://github.com/airbytehq/airbyte). Our community's participation is invaluable in helping us grow and improve every day, and we always welcome your feedback. diff --git a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md b/docs/using-airbyte/workspaces.md similarity index 86% rename from docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md rename to docs/using-airbyte/workspaces.md index 40336d7b9273..710242ca4728 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace.md +++ b/docs/using-airbyte/workspaces.md @@ -1,6 +1,8 @@ # Manage your workspace -An Airbyte Cloud workspace allows you to collaborate with other users and manage connections under a shared billing account. +A workspace in Airbyte allows you to collaborate with other users and manage connections together. On Airbyte Cloud it will allow you to share billing details for a workspace. + + :::info Airbyte [credits](https://airbyte.com/pricing) are assigned per workspace and cannot be transferred between workspaces. @@ -10,7 +12,7 @@ Airbyte [credits](https://airbyte.com/pricing) are assigned per workspace and ca To add a user to your workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **Access Management**. @@ -28,7 +30,7 @@ To add a user to your workspace: To remove a user from your workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **Access Management**. @@ -40,7 +42,7 @@ To remove a user from your workspace: To rename a workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **General Settings**. @@ -52,7 +54,7 @@ To rename a workspace: To delete a workspace: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click **Settings**. +1. Go to the **Settings** via the side navigation in Airbyte. 2. Click **General Settings**. @@ -78,6 +80,6 @@ You can use one or multiple workspaces with Airbyte Cloud, which gives you flexi To switch between workspaces: -1. On the [Airbyte Cloud](http://cloud.airbyte.com) dashboard, click the current workspace name under the Airbyte logo in the navigation bar. +1. Click the current workspace name under the Airbyte logo in the navigation bar. 2. Search for the workspace or click the name of the workspace you want to switch to. diff --git a/docusaurus/redirects.yml b/docusaurus/redirects.yml index 28a7f499bc15..080a84f1dd4c 100644 --- a/docusaurus/redirects.yml +++ b/docusaurus/redirects.yml @@ -1,30 +1,16 @@ # A list of URLs that should be redirected to new pathes -- from: /airbyte-pro - to: /enterprise-setup/self-managed/ -- from: /airbyte-enterprise - to: /enterprise-setup/self-managed/ - from: /upgrading-airbyte to: /operator-guides/upgrading-airbyte - from: /catalog to: /understanding-airbyte/airbyte-protocol - from: /integrations/sources/appstore-singer to: /integrations/sources/appstore -- from: - - /project-overview/security - - /operator-guides/securing-airbyte - to: /operator-guides/security - from: /connector-development/config-based/ to: /connector-development/config-based/low-code-cdk-overview - from: /project-overview/changelog to: /category/release-notes - from: /connector-development/config-based/understanding-the-yaml-file/stream-slicers/ to: /connector-development/config-based/understanding-the-yaml-file/partition-router -- from: /cloud/managing-airbyte-cloud - to: /category/using-airbyte-cloud -- from: /category/managing-airbyte-cloud - to: /category/using-airbyte-cloud -- from: /category/airbyte-open-source-quick-start - to: /category/getting-started - from: /cloud/dbt-cloud-integration to: /cloud/managing-airbyte-cloud/dbt-cloud-integration - from: /cloud/managing-airbyte-cloud/review-sync-summary @@ -33,5 +19,74 @@ to: /cloud/managing-airbyte-cloud/manage-connection-state - from: /cloud/managing-airbyte-cloud/edit-stream-configuration to: /cloud/managing-airbyte-cloud/configuring-connections -- from: /project-overview/product-release-stages - to: /project-overview/product-support-levels +# November 2023 documentation restructure: +- from: + - /project-overview/product-support-levels + - /project-overview/product-release-stages + to: /integrations/connector-support-levels +- from: + - /project-overview/code-of-conduct + - /project-overview/slack-code-of-conduct + to: /community/code-of-conduct +- from: /project-overview/licenses/ + to: /developer-guides/licenses/ +- from: /project-overview/licenses/license-faq + to: /developer-guides/licenses/license-faq +- from: /project-overview/licenses/elv2-license + to: /developer-guides/licenses/elv2-license +- from: /project-overview/licenses/mit-license + to: /developer-guides/licenses/mit-license +- from: /project-overview/licenses/examples + to: /developer-guides/licenses/examples +- from: + - /enterprise-setup/self-managed/ + - /airbyte-pro + - /airbyte-enterprise + to: /enterprise-setup/ +- from: /enterprise-setup/self-managed/implementation-guide + to: /enterprise-setup/implementation-guide +- from: /enterprise-setup/self-managed/sso + to: /enterprise-setup/sso +- from: + - /project-overview/security + - /operator-guides/securing-airbyte + - /operator-guides/security + to: /operating-airbyte/security +- from: + - /cloud/getting-started-with-airbyte-cloud + - /quickstart/deploy-airbyte + - /category/getting-started + - /category/airbyte-open-source-quick-start + to: /using-airbyte/getting-started/ +- from: /quickstart/add-a-source + to: /using-airbyte/getting-started/add-a-source +- from: /quickstart/add-a-destination + to: /using-airbyte/getting-started/add-a-destination +- from: /quickstart/set-up-a-connection + to: /using-airbyte/getting-started/set-up-a-connection +- from: /cloud/core-concepts + to: /using-airbyte/core-concepts/ +- from: /understanding-airbyte/namespaces + to: /using-airbyte/core-concepts/namespaces +- from: /understanding-airbyte/connections/ + to: /using-airbyte/core-concepts/sync-modes/ +- from: /understanding-airbyte/connections/full-refresh-overwrite + to: /using-airbyte/core-concepts/sync-modes/full-refresh-overwrite +- from: /understanding-airbyte/connections/full-refresh-append + to: /using-airbyte/core-concepts/sync-modes/full-refresh-append +- from: /understanding-airbyte/connections/incremental-append + to: /using-airbyte/core-concepts/sync-modes/incremental-append +- from: /understanding-airbyte/connections/incremental-append-deduped + to: /using-airbyte/core-concepts/sync-modes/incremental-append-deduped +- from: /understanding-airbyte/basic-normalization + to: /using-airbyte/core-concepts/basic-normalization +- from: /understanding-airbyte/typing-deduping + to: /using-airbyte/core-concepts/typing-deduping +- from: + - /troubleshooting + - /operator-guides/contact-support + to: /community/getting-support +- from: /cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace + to: /using-airbyte/workspaces +- from: /operator-guides/locating-files-local-destination + to: /integrations/locating-files-local-destination diff --git a/docusaurus/sidebars.js b/docusaurus/sidebars.js index 55f4497d1e22..e42c14f55492 100644 --- a/docusaurus/sidebars.js +++ b/docusaurus/sidebars.js @@ -319,49 +319,6 @@ const contributeToAirbyte = { ], }; -const airbyteCloud = [ - { - type: "doc", - label: "Getting Started", - id: "cloud/getting-started-with-airbyte-cloud", - }, - "cloud/core-concepts", - { - type: "category", - label: "Using Airbyte Cloud", - link: { - type: "generated-index", - }, - items: [ - "cloud/managing-airbyte-cloud/configuring-connections", - "cloud/managing-airbyte-cloud/review-connection-status", - "cloud/managing-airbyte-cloud/review-sync-history", - "cloud/managing-airbyte-cloud/manage-schema-changes", - "cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications", - "cloud/managing-airbyte-cloud/manage-data-residency", - "cloud/managing-airbyte-cloud/dbt-cloud-integration", - "cloud/managing-airbyte-cloud/manage-credits", - "cloud/managing-airbyte-cloud/manage-connection-state", - "cloud/managing-airbyte-cloud/manage-airbyte-cloud-workspace", - "cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits", - ], - }, -]; - -const ossGettingStarted = { - type: "category", - label: "Getting Started", - link: { - type: "generated-index", - }, - items: [ - "quickstart/deploy-airbyte", - "quickstart/add-a-source", - "quickstart/add-a-destination", - "quickstart/set-up-a-connection", - ], -}; - const deployAirbyte = { type: "category", label: "Deploy Airbyte", @@ -379,7 +336,11 @@ const deployAirbyte = { label: "On AWS EC2", id: "deploying-airbyte/on-aws-ec2", }, - + { + type: "doc", + label: "On AWS ECS", + id: "deploying-airbyte/on-aws-ecs", + }, { type: "doc", label: "On Azure", @@ -418,53 +379,6 @@ const deployAirbyte = { ], }; -const airbyteSelfManaged = { - type: "category", - label: "Airbyte Self-Managed", - link: { - type: "doc", - id: "enterprise-setup/self-managed/README", - }, - items: [ - "enterprise-setup/self-managed/implementation-guide", - "enterprise-setup/self-managed/sso", - ] -} - -const operatorGuide = { - type: "category", - label: "Manage Airbyte", - link: { - type: "generated-index", - }, - items: [ - "operator-guides/upgrading-airbyte", - "operator-guides/reset", - "operator-guides/configuring-airbyte-db", - "operator-guides/configuring-connector-resources", - "operator-guides/browsing-output-logs", - "operator-guides/using-the-airflow-airbyte-operator", - "operator-guides/using-prefect-task", - "operator-guides/using-dagster-integration", - "operator-guides/using-kestra-plugin", - "operator-guides/locating-files-local-destination", - "operator-guides/collecting-metrics", - { - type: "category", - label: "Transformations and Normalization", - items: [ - "operator-guides/transformation-and-normalization/transformations-with-sql", - "operator-guides/transformation-and-normalization/transformations-with-dbt", - "operator-guides/transformation-and-normalization/transformations-with-airbyte", - ], - }, - "operator-guides/configuring-airbyte", - "operator-guides/using-custom-connectors", - "operator-guides/scaling-airbyte", - "operator-guides/configuring-sync-notifications", - ], -}; - const understandingAirbyte = { type: "category", label: "Understand Airbyte", @@ -472,67 +386,155 @@ const understandingAirbyte = { "understanding-airbyte/beginners-guide-to-catalog", "understanding-airbyte/airbyte-protocol", "understanding-airbyte/airbyte-protocol-docker", - "understanding-airbyte/basic-normalization", - "understanding-airbyte/typing-deduping", - { - type: "category", - label: "Connections and Sync Modes", - items: [ - { - type: "doc", - label: "Connections Overview", - id: "understanding-airbyte/connections/README", - }, - "understanding-airbyte/connections/full-refresh-overwrite", - "understanding-airbyte/connections/full-refresh-append", - "understanding-airbyte/connections/incremental-append", - "understanding-airbyte/connections/incremental-append-deduped", - ], - }, "understanding-airbyte/operations", "understanding-airbyte/high-level-view", "understanding-airbyte/jobs", "understanding-airbyte/tech-stack", "understanding-airbyte/cdc", - "understanding-airbyte/namespaces", "understanding-airbyte/supported-data-types", "understanding-airbyte/json-avro-conversion", "understanding-airbyte/database-data-catalog", ], }; -const security = { - type: "doc", - id: "operator-guides/security", -}; - -const support = { - type: "doc", - id: "operator-guides/contact-support", -}; - module.exports = { - mySidebar: [ - { - type: "doc", - label: "Start here", - id: "readme", - }, + docs: [ sectionHeader("Airbyte Connectors"), connectorCatalog, buildAConnector, - sectionHeader("Airbyte Cloud"), - ...airbyteCloud, - sectionHeader("Airbyte Open Source (OSS)"), - ossGettingStarted, + "integrations/connector-support-levels", + sectionHeader("Using Airbyte"), + { + type: "category", + label: "Getting Started", + link: { + type: "doc", + id: "using-airbyte/getting-started/readme", + }, + items: [ + "using-airbyte/getting-started/add-a-source", + "using-airbyte/getting-started/add-a-destination", + "using-airbyte/getting-started/set-up-a-connection", + ], + }, + { + type: "category", + label: "Core Concepts", + link: { + type: "doc", + id: "using-airbyte/core-concepts/readme" + }, + items: [ + "using-airbyte/core-concepts/sync-schedules", + "using-airbyte/core-concepts/namespaces", + { + type: "category", + label: "Sync Modes", + link: { + type: "doc", + id: "using-airbyte/core-concepts/sync-modes/README" + }, + items: [ + "using-airbyte/core-concepts/sync-modes/incremental-append-deduped", + "using-airbyte/core-concepts/sync-modes/incremental-append", + "using-airbyte/core-concepts/sync-modes/full-refresh-append", + "using-airbyte/core-concepts/sync-modes/full-refresh-overwrite", + ], + }, + "using-airbyte/core-concepts/typing-deduping", + "using-airbyte/core-concepts/basic-normalization", + ], + }, + { + type: "category", + label: "Configuring Connections", + link: { + type: "doc", + id: "cloud/managing-airbyte-cloud/configuring-connections" + }, + items: [ + "cloud/managing-airbyte-cloud/manage-schema-changes", + "cloud/managing-airbyte-cloud/manage-data-residency", + "cloud/managing-airbyte-cloud/manage-connection-state", + { + type: "category", + label: "Transformations", + items: [ + "cloud/managing-airbyte-cloud/dbt-cloud-integration", + "operator-guides/transformation-and-normalization/transformations-with-sql", + "operator-guides/transformation-and-normalization/transformations-with-dbt", + "operator-guides/transformation-and-normalization/transformations-with-airbyte", + ] + }, + ] + }, + { + type: "category", + label: "Managing Syncs", + items: [ + "cloud/managing-airbyte-cloud/review-connection-status", + "cloud/managing-airbyte-cloud/review-sync-history", + "operator-guides/browsing-output-logs", + "operator-guides/reset", + ], + }, + { + type: "category", + label: "Workspace Management", + items: [ + "using-airbyte/workspaces", + "cloud/managing-airbyte-cloud/manage-airbyte-cloud-notifications", + "cloud/managing-airbyte-cloud/manage-credits", + "operator-guides/using-custom-connectors", + ] + }, + sectionHeader("Operating Airbyte"), deployAirbyte, - operatorGuide, { - type: "doc", - id: "troubleshooting", + type: "category", + label: "Airbyte Enterprise", + link: { + type: "doc", + id: "enterprise-setup/README", + }, + items: [ + "enterprise-setup/implementation-guide", + "enterprise-setup/sso", + ] + }, + "operator-guides/upgrading-airbyte", + { + type: "category", + label: "Configuring Airbyte", + link: { + type: "doc", + id: "operator-guides/configuring-airbyte", + }, + items: [ + "operator-guides/configuring-airbyte-db", + "operator-guides/configuring-connector-resources", + ] + }, + { + type: "category", + label: "Airbyte at Scale", + items: [ + "operator-guides/collecting-metrics", + "operator-guides/scaling-airbyte", + "cloud/managing-airbyte-cloud/understand-airbyte-cloud-limits", + ] + }, + "operating-airbyte/security", + { + type: "category", + label: "Integrating with Airbyte", + items: [ + "operator-guides/using-the-airflow-airbyte-operator", + "operator-guides/using-prefect-task", + "operator-guides/using-dagster-integration", + "operator-guides/using-kestra-plugin", + ], }, - sectionHeader("Enterprise Setup"), - airbyteSelfManaged, sectionHeader("Developer Guides"), { type: "doc", @@ -548,42 +550,29 @@ module.exports = { }, understandingAirbyte, contributeToAirbyte, - sectionHeader("Resources"), - support, - security, { type: "category", - label: "Project Overview", + label: "Licenses", + link: { + type: "doc", + id: "developer-guides/licenses/README", + }, items: [ - { - type: "link", - label: "Roadmap", - href: "https://go.airbyte.com/roadmap", - }, - "project-overview/product-support-levels", - "project-overview/slack-code-of-conduct", - "project-overview/code-of-conduct", - { - type: "link", - label: "Airbyte Repository", - href: "https://github.com/airbytehq/airbyte", - }, - { - type: "category", - label: "Licenses", - link: { - type: "doc", - id: "project-overview/licenses/README", - }, - items: [ - "project-overview/licenses/license-faq", - "project-overview/licenses/elv2-license", - "project-overview/licenses/mit-license", - "project-overview/licenses/examples", - ], - }, + "developer-guides/licenses/license-faq", + "developer-guides/licenses/elv2-license", + "developer-guides/licenses/mit-license", + "developer-guides/licenses/examples", ], }, + sectionHeader("Community"), + "community/getting-support", + "community/code-of-conduct", + sectionHeader("Product Updates"), + { + type: "link", + label: "Roadmap", + href: "https://go.airbyte.com/roadmap", + }, { type: "category", label: "Release Notes", diff --git a/docusaurus/src/components/ConnectorRegistry.jsx b/docusaurus/src/components/ConnectorRegistry.jsx index 3b81708e3192..d3548c350d34 100644 --- a/docusaurus/src/components/ConnectorRegistry.jsx +++ b/docusaurus/src/components/ConnectorRegistry.jsx @@ -1,6 +1,8 @@ import React from "react"; import { useEffect, useState } from "react"; +import styles from "./ConnectorRegistry.module.css"; + const registry_url = "https://connectors.airbyte.com/files/generated_reports/connector_registry_report.json"; @@ -46,7 +48,6 @@ export default function ConnectorRegistry({ type }) { Connector Name - Icon Links Support Level OSS @@ -64,14 +65,12 @@ export default function ConnectorRegistry({ type }) { return ( - +
+ {connector.iconUrl_oss && ( + + )} {connector.name_oss} - - - - {connector.iconUrl_oss ? ( - - ) : null} +
{/* min width to prevent wrapping */} diff --git a/docusaurus/src/components/ConnectorRegistry.module.css b/docusaurus/src/components/ConnectorRegistry.module.css new file mode 100644 index 000000000000..e3d085db4932 --- /dev/null +++ b/docusaurus/src/components/ConnectorRegistry.module.css @@ -0,0 +1,6 @@ +.connectorName { + display: flex; + align-items: center; + gap: 4px; + font-weight: bold; +} diff --git a/docusaurus/src/css/custom.css b/docusaurus/src/css/custom.css index 56563f0b9d24..ba56dadcae02 100644 --- a/docusaurus/src/css/custom.css +++ b/docusaurus/src/css/custom.css @@ -124,11 +124,19 @@ html[data-theme="dark"] .docusaurus-highlight-code-line { font-weight: 700; font-size: 0.8em; padding: 0.4em 0 0.4em 0.4em; - margin-top: 1.1em; color: var(--docsearch-text-color); background-color: var(--ifm-hover-overlay); } +.navbar__category:not(:first-child) { + margin-top: 1.1em; +} + +/* Hide the breadcrumbs if they have only the house as an entry (i.e. on the start page) */ +.breadcrumbs:has(li:first-child:last-child) { + display: none; +} + .cloudStatusLink { display: flex; gap: 4px; diff --git a/docusaurus/src/scripts/cloudStatus.js b/docusaurus/src/scripts/cloudStatus.js index fa1844409227..e3428ac94ed3 100644 --- a/docusaurus/src/scripts/cloudStatus.js +++ b/docusaurus/src/scripts/cloudStatus.js @@ -9,12 +9,12 @@ if (ExecutionEnvironment.canUseDOM) { .then((summary) => { const status = summary.page.status; const el = document.querySelector(".cloudStatusLink"); - el.classList.forEach((className) => { + el?.classList.forEach((className) => { if (className.startsWith("status-")) { el.classList.remove(className); } }); - el.classList.add(`status-${status.toLowerCase()}`) + el?.classList.add(`status-${status.toLowerCase()}`) }); } From ab5571b89205c5864554cbb6565d397fa35a1700 Mon Sep 17 00:00:00 2001 From: FriedrichtenHagen <108153620+FriedrichtenHagen@users.noreply.github.com> Date: Mon, 27 Nov 2023 16:59:24 +0100 Subject: [PATCH 51/57] Small typo fixes on google-ads.md (#32821) --- docs/integrations/sources/google-ads.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index 318b30508a54..240f88b90ea2 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -137,7 +137,7 @@ Represents the bidding strategy at the campaign level. Represents labels that can be attached to different entities such as campaigns or ads. - [ad_group_ad](https://developers.google.com/google-ads/api/fields/v14/ad_group_ad) -Different attributtes of ads from ag groups segmented by date. +Different attributes of ads from ad groups segmented by date. - [ad_group_ad_label](https://developers.google.com/google-ads/api/fields/v14/ad_group_ad_label) - [ad_group](https://developers.google.com/google-ads/api/fields/v14/ad_group) @@ -203,7 +203,7 @@ Due to Google Ads API constraints, the `click_view` stream retrieves data one da ::: :::warning -Google Ads doesn't support `PERFORMACE_MAX` campaigns on `ad_group` or `ad` stream level, only on `campaign` level. +Google Ads doesn't support `PERFORMANCE_MAX` campaigns on `ad_group` or `ad` stream level, only on `campaign` level. If you have this type of campaign Google will remove them from the results for the `ads` reports. More [info](https://github.com/airbytehq/airbyte/issues/11062) and [Google Discussions](https://groups.google.com/g/adwords-api/c/_mxbgNckaLQ). ::: @@ -368,4 +368,4 @@ Due to a limitation in the Google Ads API which does not allow getting performan | `0.1.4` | 2021-07-28 | [4962](https://github.com/airbytehq/airbyte/pull/4962) | Support new Report streams | | `0.1.3` | 2021-07-23 | [4788](https://github.com/airbytehq/airbyte/pull/4788) | Support main streams, fix bug with exception `DATE_RANGE_TOO_NARROW` for incremental streams | | `0.1.2` | 2021-07-06 | [4539](https://github.com/airbytehq/airbyte/pull/4539) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | -| `0.1.1` | 2021-06-23 | [4288](https://github.com/airbytehq/airbyte/pull/4288) | Fix `Bugfix: Correctly declare required parameters` | \ No newline at end of file +| `0.1.1` | 2021-06-23 | [4288](https://github.com/airbytehq/airbyte/pull/4288) | Fix `Bugfix: Correctly declare required parameters` | From c88d5010401ee08a980405d53ad1865b6d96b6fb Mon Sep 17 00:00:00 2001 From: Leo Griffiths Date: Mon, 27 Nov 2023 16:02:06 +0000 Subject: [PATCH 52/57] Fix broken link to Google Sheet source example (#32337) --- docs/connector-development/cdk-python/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/connector-development/cdk-python/README.md b/docs/connector-development/cdk-python/README.md index 3809d26ee331..f0eb2387c8b0 100644 --- a/docs/connector-development/cdk-python/README.md +++ b/docs/connector-development/cdk-python/README.md @@ -74,7 +74,7 @@ You can find a complete tutorial for implementing an HTTP source connector in [t **Simple Python connectors using the barebones `Source` abstraction**: -- [Google Sheets](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-google-sheets/google_sheets_source/google_sheets_source.py) +- [Google Sheets](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-google-sheets/source_google_sheets/source.py) - [Mailchimp](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-mailchimp/source_mailchimp/source.py) ## Contributing From 1a0a9dc8e47088b09a8724a0b904d4873b54d4be Mon Sep 17 00:00:00 2001 From: andreaslillebo Date: Mon, 27 Nov 2023 17:03:47 +0100 Subject: [PATCH 53/57] Destination Google Sheets: Add instructions for Airbyte OSS (#32044) --- .../integrations/destinations/google-sheets.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/integrations/destinations/google-sheets.md b/docs/integrations/destinations/google-sheets.md index 4152e284459a..1bf21c51b225 100644 --- a/docs/integrations/destinations/google-sheets.md +++ b/docs/integrations/destinations/google-sheets.md @@ -39,12 +39,18 @@ To create a Google account, visit [Google](https://support.google.com/accounts/a **For Airbyte Open Source:** - Authentication to Google Sheets is only available using OAuth for authentication. - - 1. Select **Google Sheets** from the Source type dropdown and enter a name for this connector. -2. Follow [Google's OAuth instructions](https://developers.google.com/identity/protocols/oauth2) to create an authentication app. You will need to grant the scopes described in the [Google Sheets API](https://developers.google.com/identity/protocols/oauth2/scopes#sheets). -3. Copy your Client ID, Client secret, and Refresh Token from the previous step. -4. Copy the Google Sheet link to **Spreadsheet Link** + +Authentication to Google Sheets is only available using OAuth for authentication. + +1. Create a new [Google Cloud project](https://console.cloud.google.com/projectcreate). +2. Enable the [Google Sheets API](https://console.cloud.google.com/apis/library/sheets.googleapis.com). +3. Create a new [OAuth client ID](https://console.cloud.google.com/apis/credentials/oauthclient). Select `Web application` as the Application type, give it a `name` and add `https://developers.google.com/oauthplayground` as an Authorized redirect URI. +4. Add a `Client Secret` (Add secret), and take note of both the `Client Secret` and `Client ID`. +5. Go to [Google OAuth Playground](https://developers.google.com/oauthplayground/) +6. Click the cog in the top-right corner, select `Use your own OAuth credentials` and enter the `OAuth Client ID` and `OAuth Client secret` from the previous step. +7. In the left sidebar, find and select `Google Sheets API v4`, then choose the `https://www.googleapis.com/auth/spreadsheets` scope. Click `Authorize APIs`. +8. In **step 2**, click `Exchange authorization code for tokens`. Take note of the `Refresh token`. +9. Set up a new destination in Airbyte, select `Google Sheets` and enter the `Client ID`, `Client Secret`, `Refresh Token` and `Spreadsheet Link` from the previous steps. ### Output schema From bfa468f8ca1647e0875c879a2c3ce421bca8832d Mon Sep 17 00:00:00 2001 From: Rex <14366016+rexwangcc@users.noreply.github.com> Date: Mon, 27 Nov 2023 08:05:19 -0800 Subject: [PATCH 54/57] [Doc] Update the Google Analytics V4 Source setup guide. (#30569) --- docs/integrations/sources/google-analytics-data-api.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/integrations/sources/google-analytics-data-api.md b/docs/integrations/sources/google-analytics-data-api.md index 96993283d498..03e569c0997f 100644 --- a/docs/integrations/sources/google-analytics-data-api.md +++ b/docs/integrations/sources/google-analytics-data-api.md @@ -68,6 +68,7 @@ Before you can use the service account to access Google Analytics data, you need 1. Go to the [Google Analytics Reporting API dashboard](https://console.developers.google.com/apis/api/analyticsreporting.googleapis.com/overview). Make sure you have selected the associated project for your service account, and enable the API. You can also set quotas and check usage. 2. Go to the [Google Analytics API dashboard](https://console.developers.google.com/apis/api/analytics.googleapis.com/overview). Make sure you have selected the associated project for your service account, and enable the API. +3. Go to the [Google Analytics Data API dashboard](https://console.developers.google.com/apis/api/analyticsdata.googleapis.com/overview). Make sure you have selected the associated project for your service account, and enable the API. #### Set up the Google Analytics connector in Airbyte From 8c71120406537f5c32681b29cc308f1a6b7a6521 Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 27 Nov 2023 08:42:44 -0800 Subject: [PATCH 55/57] airbyte-ci: bump tag, update readme, remove 'nightly_builds' (#32806) Co-authored-by: postamar --- .../workflows/connectors_nightly_build.yml | 2 +- .github/workflows/connectors_weekly_build.yml | 2 +- airbyte-ci/connectors/pipelines/README.md | 3 +- .../connectors/test/steps/common.py | 2 +- .../pipelines/airbyte_ci/format/containers.py | 1 + .../pipelines/pipelines/cli/airbyte_ci.py | 17 +---- .../connectors/pipelines/pipelines/consts.py | 1 - .../pipelines/dagger/containers/git.py | 38 ++++++++++ .../pipelines/pipelines/helpers/git.py | 71 ++++--------------- .../pipelines/pipelines/helpers/utils.py | 10 +-- .../connectors/pipelines/pyproject.toml | 2 +- 11 files changed, 66 insertions(+), 83 deletions(-) create mode 100644 airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index 6b9d5d6ce5fa..f56c5ede0d6d 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -34,7 +34,7 @@ jobs: - name: Test connectors uses: ./.github/actions/run-dagger-pipeline with: - context: "nightly_builds" + context: "master" docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }} docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }} gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }} diff --git a/.github/workflows/connectors_weekly_build.yml b/.github/workflows/connectors_weekly_build.yml index aa96a832b9b8..53ae27b4360c 100644 --- a/.github/workflows/connectors_weekly_build.yml +++ b/.github/workflows/connectors_weekly_build.yml @@ -34,7 +34,7 @@ jobs: - name: Test connectors uses: ./.github/actions/run-dagger-pipeline with: - context: "nightly_builds" + context: "master" ci_job_key: "weekly_alpha_test" docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }} docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }} diff --git a/airbyte-ci/connectors/pipelines/README.md b/airbyte-ci/connectors/pipelines/README.md index bc9ff2271bca..5c2fe6a92ff9 100644 --- a/airbyte-ci/connectors/pipelines/README.md +++ b/airbyte-ci/connectors/pipelines/README.md @@ -432,7 +432,8 @@ This command runs the Python tests for a airbyte-ci poetry package. ## Changelog | Version | PR | Description | -| ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| ------- | ---------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------| +| 2.7.1 | [#32806](https://github.com/airbytehq/airbyte/pull/32806) | Improve --modified behaviour for pull requests. | | 2.7.0 | [#31930](https://github.com/airbytehq/airbyte/pull/31930) | Merge airbyte-ci-internal into airbyte-ci | | 2.6.0 | [#31831](https://github.com/airbytehq/airbyte/pull/31831) | Add `airbyte-ci format` commands, remove connector-specific formatting check | | 2.5.9 | [#32427](https://github.com/airbytehq/airbyte/pull/32427) | Re-enable caching for source-postgres | diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py index 13b52e67d51d..8d384d6bbd60 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py @@ -71,7 +71,7 @@ def validate(self) -> StepResult: async def _run(self) -> StepResult: if not self.should_run: return StepResult(self, status=StepStatus.SKIPPED, stdout="No modified files required a version bump.") - if self.context.ci_context in [CIContext.MASTER, CIContext.NIGHTLY_BUILDS]: + if self.context.ci_context == CIContext.MASTER: return StepResult(self, status=StepStatus.SKIPPED, stdout="Version check are not running in master context.") try: return self.validate() diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py index f9414b22edfd..3c1817accf17 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py @@ -76,6 +76,7 @@ def format_java_container(dagger_client: dagger.Client) -> dagger.Container: "yum install -y findutils", # gradle requires xargs, which is shipped in findutils. "yum clean all", ], + env_vars={"RUN_IN_AIRBYTE_CI": "1"}, ) diff --git a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py index 82ca893e67cb..33a42c8f16df 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py +++ b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py @@ -10,7 +10,7 @@ import os import sys from pathlib import Path -from typing import List, Optional +from typing import Optional, Set import asyncclick as click import docker @@ -27,7 +27,6 @@ get_current_git_revision, get_modified_files_in_branch, get_modified_files_in_commit, - get_modified_files_in_pull_request, ) from pipelines.helpers.utils import get_current_epoch_time, transform_strs_to_paths @@ -142,9 +141,7 @@ def set_working_directory_to_root() -> None: os.chdir(working_dir) -async def get_modified_files( - git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext, pull_request: PullRequest -) -> List[str]: +async def get_modified_files(git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext) -> Set[str]: """Get the list of modified files in the current git branch. If the current branch is master, it will return the list of modified files in the head commit. The head commit on master should be the merge commit of the latest merged pull request as we squash commits on merge. @@ -154,15 +151,8 @@ async def get_modified_files( If the current branch is not master, it will return the list of modified files in the current branch. This latest case is the one we encounter when running the pipeline locally, on a local branch, or manually on GHA with a workflow dispatch event. """ - if ci_context is CIContext.MASTER or ci_context is CIContext.NIGHTLY_BUILDS: + if ci_context is CIContext.MASTER or (ci_context is CIContext.MANUAL and git_branch == "master"): return await get_modified_files_in_commit(git_branch, git_revision, is_local) - if ci_context is CIContext.PULL_REQUEST and pull_request is not None: - return get_modified_files_in_pull_request(pull_request) - if ci_context is CIContext.MANUAL: - if git_branch == "master": - return await get_modified_files_in_commit(git_branch, git_revision, is_local) - else: - return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local) return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local) @@ -251,7 +241,6 @@ async def get_modified_files_str(ctx: click.Context): ctx.obj["diffed_branch"], ctx.obj["is_local"], ctx.obj["ci_context"], - ctx.obj["pull_request"], ) return transform_strs_to_paths(modified_files) diff --git a/airbyte-ci/connectors/pipelines/pipelines/consts.py b/airbyte-ci/connectors/pipelines/pipelines/consts.py index 5f5f4a278cd7..a7fe12569221 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/consts.py +++ b/airbyte-ci/connectors/pipelines/pipelines/consts.py @@ -58,7 +58,6 @@ class CIContext(str, Enum): MANUAL = "manual" PULL_REQUEST = "pull_request" - NIGHTLY_BUILDS = "nightly_builds" MASTER = "master" def __str__(self) -> str: diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py new file mode 100644 index 000000000000..bd9a8a5b5b8d --- /dev/null +++ b/airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py @@ -0,0 +1,38 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from typing import Optional + +from dagger import Client, Container +from pipelines.helpers.utils import AIRBYTE_REPO_URL + + +async def checked_out_git_container( + dagger_client: Client, + current_git_branch: str, + current_git_revision: str, + diffed_branch: Optional[str] = None, +) -> Container: + """Builds git-based container with the current branch checked out.""" + current_git_branch = current_git_branch.removeprefix("origin/") + diffed_branch = current_git_branch if diffed_branch is None else diffed_branch.removeprefix("origin/") + return await ( + dagger_client.container() + .from_("alpine/git:latest") + .with_workdir("/repo") + .with_exec(["init"]) + .with_env_variable("CACHEBUSTER", current_git_revision) + .with_exec( + [ + "remote", + "add", + "--fetch", + "--track", + current_git_branch, + "--track", + diffed_branch if diffed_branch is not None else current_git_branch, + "origin", + AIRBYTE_REPO_URL, + ] + ) + .with_exec(["checkout", "-t", f"origin/{current_git_branch}"]) + ) diff --git a/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py b/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py index c27d3f138db4..34f229f4a775 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py +++ b/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py @@ -3,12 +3,12 @@ # import functools -from typing import List, Set +from typing import Set import git from dagger import Connection -from github import PullRequest -from pipelines.helpers.utils import AIRBYTE_REPO_URL, DAGGER_CONFIG, DIFF_FILTER +from pipelines.dagger.containers.git import checked_out_git_container +from pipelines.helpers.utils import DAGGER_CONFIG, DIFF_FILTER def get_current_git_revision() -> str: # noqa D103 @@ -24,38 +24,17 @@ async def get_modified_files_in_branch_remote( ) -> Set[str]: """Use git diff to spot the modified files on the remote branch.""" async with Connection(DAGGER_CONFIG) as dagger_client: - modified_files = await ( - dagger_client.container() - .from_("alpine/git:latest") - .with_workdir("/repo") - .with_exec(["init"]) - .with_env_variable("CACHEBUSTER", current_git_revision) - .with_exec( - [ - "remote", - "add", - "--fetch", - "--track", - diffed_branch.split("/")[-1], - "--track", - current_git_branch, - "origin", - AIRBYTE_REPO_URL, - ] - ) - .with_exec(["checkout", "-t", f"origin/{current_git_branch}"]) - .with_exec(["diff", f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}"]) - .stdout() - ) + container = await checked_out_git_container(dagger_client, current_git_branch, current_git_revision, diffed_branch) + modified_files = await container.with_exec( + ["diff", f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}"] + ).stdout() return set(modified_files.split("\n")) -def get_modified_files_in_branch_local(current_git_revision: str, diffed_branch: str = "master") -> Set[str]: - """Use git diff and git status to spot the modified files on the local branch.""" +def get_modified_files_local(current_git_revision: str, diffed: str = "master") -> Set[str]: + """Use git diff and git status to spot the modified files in the local repo.""" airbyte_repo = git.Repo() - modified_files = airbyte_repo.git.diff( - f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}" - ).split("\n") + modified_files = airbyte_repo.git.diff(f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed}...{current_git_revision}").split("\n") status_output = airbyte_repo.git.status("--porcelain") for not_committed_change in status_output.split("\n"): file_path = not_committed_change.strip().split(" ")[-1] @@ -69,34 +48,15 @@ async def get_modified_files_in_branch( ) -> Set[str]: """Retrieve the list of modified files on the branch.""" if is_local: - return get_modified_files_in_branch_local(current_git_revision, diffed_branch) + return get_modified_files_local(current_git_revision, diffed_branch) else: return await get_modified_files_in_branch_remote(current_git_branch, current_git_revision, diffed_branch) async def get_modified_files_in_commit_remote(current_git_branch: str, current_git_revision: str) -> Set[str]: async with Connection(DAGGER_CONFIG) as dagger_client: - modified_files = await ( - dagger_client.container() - .from_("alpine/git:latest") - .with_workdir("/repo") - .with_exec(["init"]) - .with_env_variable("CACHEBUSTER", current_git_revision) - .with_exec( - [ - "remote", - "add", - "--fetch", - "--track", - current_git_branch, - "origin", - AIRBYTE_REPO_URL, - ] - ) - .with_exec(["checkout", "-t", f"origin/{current_git_branch}"]) - .with_exec(["diff-tree", "--no-commit-id", "--name-only", current_git_revision, "-r"]) - .stdout() - ) + container = await checked_out_git_container(dagger_client, current_git_branch, current_git_revision) + modified_files = await container.with_exec(["diff-tree", "--no-commit-id", "--name-only", current_git_revision, "-r"]).stdout() return set(modified_files.split("\n")) @@ -113,11 +73,6 @@ async def get_modified_files_in_commit(current_git_branch: str, current_git_revi return await get_modified_files_in_commit_remote(current_git_branch, current_git_revision) -def get_modified_files_in_pull_request(pull_request: PullRequest) -> List[str]: - """Retrieve the list of modified files in a pull request.""" - return [f.filename for f in pull_request.get_files()] - - @functools.cache def get_git_repo() -> git.Repo: """Retrieve the git repo.""" diff --git a/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py b/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py index 76575eaf5664..d2709257e449 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py +++ b/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py @@ -13,7 +13,7 @@ import unicodedata from io import TextIOWrapper from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, Tuple import anyio import asyncer @@ -308,16 +308,16 @@ def sh_dash_c(lines: List[str]) -> List[str]: return ["sh", "-c", " && ".join(["set -o xtrace"] + lines)] -def transform_strs_to_paths(str_paths: List[str]) -> List[Path]: - """Transform a list of string paths to a list of Path objects. +def transform_strs_to_paths(str_paths: Set[str]) -> List[Path]: + """Transform a list of string paths to an ordered list of Path objects. Args: - str_paths (List[str]): A list of string paths. + str_paths (Set[str]): A set of string paths. Returns: List[Path]: A list of Path objects. """ - return [Path(str_path) for str_path in str_paths] + return sorted([Path(str_path) for str_path in str_paths]) def fail_if_missing_docker_hub_creds(ctx: click.Context): diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index d98c22f8be33..39b4b841eb56 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "2.7.0" +version = "2.7.1" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] From cdd3952d4ca39c5aff05fa95babf20fe5ce8841b Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 27 Nov 2023 10:20:55 -0800 Subject: [PATCH 56/57] airbyte-ci: revert #32806 (#32839) --- .../workflows/connectors_nightly_build.yml | 2 +- .github/workflows/connectors_weekly_build.yml | 2 +- airbyte-ci/connectors/pipelines/README.md | 3 +- .../connectors/test/steps/common.py | 2 +- .../pipelines/airbyte_ci/format/containers.py | 1 - .../pipelines/pipelines/cli/airbyte_ci.py | 17 ++++- .../connectors/pipelines/pipelines/consts.py | 1 + .../pipelines/dagger/containers/git.py | 38 ---------- .../pipelines/pipelines/helpers/git.py | 71 +++++++++++++++---- .../pipelines/pipelines/helpers/utils.py | 10 +-- .../connectors/pipelines/pyproject.toml | 2 +- 11 files changed, 83 insertions(+), 66 deletions(-) delete mode 100644 airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py diff --git a/.github/workflows/connectors_nightly_build.yml b/.github/workflows/connectors_nightly_build.yml index f56c5ede0d6d..6b9d5d6ce5fa 100644 --- a/.github/workflows/connectors_nightly_build.yml +++ b/.github/workflows/connectors_nightly_build.yml @@ -34,7 +34,7 @@ jobs: - name: Test connectors uses: ./.github/actions/run-dagger-pipeline with: - context: "master" + context: "nightly_builds" docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }} docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }} gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }} diff --git a/.github/workflows/connectors_weekly_build.yml b/.github/workflows/connectors_weekly_build.yml index 53ae27b4360c..aa96a832b9b8 100644 --- a/.github/workflows/connectors_weekly_build.yml +++ b/.github/workflows/connectors_weekly_build.yml @@ -34,7 +34,7 @@ jobs: - name: Test connectors uses: ./.github/actions/run-dagger-pipeline with: - context: "master" + context: "nightly_builds" ci_job_key: "weekly_alpha_test" docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }} docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }} diff --git a/airbyte-ci/connectors/pipelines/README.md b/airbyte-ci/connectors/pipelines/README.md index 5c2fe6a92ff9..bc9ff2271bca 100644 --- a/airbyte-ci/connectors/pipelines/README.md +++ b/airbyte-ci/connectors/pipelines/README.md @@ -432,8 +432,7 @@ This command runs the Python tests for a airbyte-ci poetry package. ## Changelog | Version | PR | Description | -| ------- | ---------------------------------------------------------- |-----------------------------------------------------------------------------------------------------------| -| 2.7.1 | [#32806](https://github.com/airbytehq/airbyte/pull/32806) | Improve --modified behaviour for pull requests. | +| ------- | ---------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | | 2.7.0 | [#31930](https://github.com/airbytehq/airbyte/pull/31930) | Merge airbyte-ci-internal into airbyte-ci | | 2.6.0 | [#31831](https://github.com/airbytehq/airbyte/pull/31831) | Add `airbyte-ci format` commands, remove connector-specific formatting check | | 2.5.9 | [#32427](https://github.com/airbytehq/airbyte/pull/32427) | Re-enable caching for source-postgres | diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py index 8d384d6bbd60..13b52e67d51d 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/connectors/test/steps/common.py @@ -71,7 +71,7 @@ def validate(self) -> StepResult: async def _run(self) -> StepResult: if not self.should_run: return StepResult(self, status=StepStatus.SKIPPED, stdout="No modified files required a version bump.") - if self.context.ci_context == CIContext.MASTER: + if self.context.ci_context in [CIContext.MASTER, CIContext.NIGHTLY_BUILDS]: return StepResult(self, status=StepStatus.SKIPPED, stdout="Version check are not running in master context.") try: return self.validate() diff --git a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py index 3c1817accf17..f9414b22edfd 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py +++ b/airbyte-ci/connectors/pipelines/pipelines/airbyte_ci/format/containers.py @@ -76,7 +76,6 @@ def format_java_container(dagger_client: dagger.Client) -> dagger.Container: "yum install -y findutils", # gradle requires xargs, which is shipped in findutils. "yum clean all", ], - env_vars={"RUN_IN_AIRBYTE_CI": "1"}, ) diff --git a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py index 33a42c8f16df..82ca893e67cb 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py +++ b/airbyte-ci/connectors/pipelines/pipelines/cli/airbyte_ci.py @@ -10,7 +10,7 @@ import os import sys from pathlib import Path -from typing import Optional, Set +from typing import List, Optional import asyncclick as click import docker @@ -27,6 +27,7 @@ get_current_git_revision, get_modified_files_in_branch, get_modified_files_in_commit, + get_modified_files_in_pull_request, ) from pipelines.helpers.utils import get_current_epoch_time, transform_strs_to_paths @@ -141,7 +142,9 @@ def set_working_directory_to_root() -> None: os.chdir(working_dir) -async def get_modified_files(git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext) -> Set[str]: +async def get_modified_files( + git_branch: str, git_revision: str, diffed_branch: str, is_local: bool, ci_context: CIContext, pull_request: PullRequest +) -> List[str]: """Get the list of modified files in the current git branch. If the current branch is master, it will return the list of modified files in the head commit. The head commit on master should be the merge commit of the latest merged pull request as we squash commits on merge. @@ -151,8 +154,15 @@ async def get_modified_files(git_branch: str, git_revision: str, diffed_branch: If the current branch is not master, it will return the list of modified files in the current branch. This latest case is the one we encounter when running the pipeline locally, on a local branch, or manually on GHA with a workflow dispatch event. """ - if ci_context is CIContext.MASTER or (ci_context is CIContext.MANUAL and git_branch == "master"): + if ci_context is CIContext.MASTER or ci_context is CIContext.NIGHTLY_BUILDS: return await get_modified_files_in_commit(git_branch, git_revision, is_local) + if ci_context is CIContext.PULL_REQUEST and pull_request is not None: + return get_modified_files_in_pull_request(pull_request) + if ci_context is CIContext.MANUAL: + if git_branch == "master": + return await get_modified_files_in_commit(git_branch, git_revision, is_local) + else: + return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local) return await get_modified_files_in_branch(git_branch, git_revision, diffed_branch, is_local) @@ -241,6 +251,7 @@ async def get_modified_files_str(ctx: click.Context): ctx.obj["diffed_branch"], ctx.obj["is_local"], ctx.obj["ci_context"], + ctx.obj["pull_request"], ) return transform_strs_to_paths(modified_files) diff --git a/airbyte-ci/connectors/pipelines/pipelines/consts.py b/airbyte-ci/connectors/pipelines/pipelines/consts.py index a7fe12569221..5f5f4a278cd7 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/consts.py +++ b/airbyte-ci/connectors/pipelines/pipelines/consts.py @@ -58,6 +58,7 @@ class CIContext(str, Enum): MANUAL = "manual" PULL_REQUEST = "pull_request" + NIGHTLY_BUILDS = "nightly_builds" MASTER = "master" def __str__(self) -> str: diff --git a/airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py b/airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py deleted file mode 100644 index bd9a8a5b5b8d..000000000000 --- a/airbyte-ci/connectors/pipelines/pipelines/dagger/containers/git.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. - -from typing import Optional - -from dagger import Client, Container -from pipelines.helpers.utils import AIRBYTE_REPO_URL - - -async def checked_out_git_container( - dagger_client: Client, - current_git_branch: str, - current_git_revision: str, - diffed_branch: Optional[str] = None, -) -> Container: - """Builds git-based container with the current branch checked out.""" - current_git_branch = current_git_branch.removeprefix("origin/") - diffed_branch = current_git_branch if diffed_branch is None else diffed_branch.removeprefix("origin/") - return await ( - dagger_client.container() - .from_("alpine/git:latest") - .with_workdir("/repo") - .with_exec(["init"]) - .with_env_variable("CACHEBUSTER", current_git_revision) - .with_exec( - [ - "remote", - "add", - "--fetch", - "--track", - current_git_branch, - "--track", - diffed_branch if diffed_branch is not None else current_git_branch, - "origin", - AIRBYTE_REPO_URL, - ] - ) - .with_exec(["checkout", "-t", f"origin/{current_git_branch}"]) - ) diff --git a/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py b/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py index 34f229f4a775..c27d3f138db4 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py +++ b/airbyte-ci/connectors/pipelines/pipelines/helpers/git.py @@ -3,12 +3,12 @@ # import functools -from typing import Set +from typing import List, Set import git from dagger import Connection -from pipelines.dagger.containers.git import checked_out_git_container -from pipelines.helpers.utils import DAGGER_CONFIG, DIFF_FILTER +from github import PullRequest +from pipelines.helpers.utils import AIRBYTE_REPO_URL, DAGGER_CONFIG, DIFF_FILTER def get_current_git_revision() -> str: # noqa D103 @@ -24,17 +24,38 @@ async def get_modified_files_in_branch_remote( ) -> Set[str]: """Use git diff to spot the modified files on the remote branch.""" async with Connection(DAGGER_CONFIG) as dagger_client: - container = await checked_out_git_container(dagger_client, current_git_branch, current_git_revision, diffed_branch) - modified_files = await container.with_exec( - ["diff", f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}"] - ).stdout() + modified_files = await ( + dagger_client.container() + .from_("alpine/git:latest") + .with_workdir("/repo") + .with_exec(["init"]) + .with_env_variable("CACHEBUSTER", current_git_revision) + .with_exec( + [ + "remote", + "add", + "--fetch", + "--track", + diffed_branch.split("/")[-1], + "--track", + current_git_branch, + "origin", + AIRBYTE_REPO_URL, + ] + ) + .with_exec(["checkout", "-t", f"origin/{current_git_branch}"]) + .with_exec(["diff", f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}"]) + .stdout() + ) return set(modified_files.split("\n")) -def get_modified_files_local(current_git_revision: str, diffed: str = "master") -> Set[str]: - """Use git diff and git status to spot the modified files in the local repo.""" +def get_modified_files_in_branch_local(current_git_revision: str, diffed_branch: str = "master") -> Set[str]: + """Use git diff and git status to spot the modified files on the local branch.""" airbyte_repo = git.Repo() - modified_files = airbyte_repo.git.diff(f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed}...{current_git_revision}").split("\n") + modified_files = airbyte_repo.git.diff( + f"--diff-filter={DIFF_FILTER}", "--name-only", f"{diffed_branch}...{current_git_revision}" + ).split("\n") status_output = airbyte_repo.git.status("--porcelain") for not_committed_change in status_output.split("\n"): file_path = not_committed_change.strip().split(" ")[-1] @@ -48,15 +69,34 @@ async def get_modified_files_in_branch( ) -> Set[str]: """Retrieve the list of modified files on the branch.""" if is_local: - return get_modified_files_local(current_git_revision, diffed_branch) + return get_modified_files_in_branch_local(current_git_revision, diffed_branch) else: return await get_modified_files_in_branch_remote(current_git_branch, current_git_revision, diffed_branch) async def get_modified_files_in_commit_remote(current_git_branch: str, current_git_revision: str) -> Set[str]: async with Connection(DAGGER_CONFIG) as dagger_client: - container = await checked_out_git_container(dagger_client, current_git_branch, current_git_revision) - modified_files = await container.with_exec(["diff-tree", "--no-commit-id", "--name-only", current_git_revision, "-r"]).stdout() + modified_files = await ( + dagger_client.container() + .from_("alpine/git:latest") + .with_workdir("/repo") + .with_exec(["init"]) + .with_env_variable("CACHEBUSTER", current_git_revision) + .with_exec( + [ + "remote", + "add", + "--fetch", + "--track", + current_git_branch, + "origin", + AIRBYTE_REPO_URL, + ] + ) + .with_exec(["checkout", "-t", f"origin/{current_git_branch}"]) + .with_exec(["diff-tree", "--no-commit-id", "--name-only", current_git_revision, "-r"]) + .stdout() + ) return set(modified_files.split("\n")) @@ -73,6 +113,11 @@ async def get_modified_files_in_commit(current_git_branch: str, current_git_revi return await get_modified_files_in_commit_remote(current_git_branch, current_git_revision) +def get_modified_files_in_pull_request(pull_request: PullRequest) -> List[str]: + """Retrieve the list of modified files in a pull request.""" + return [f.filename for f in pull_request.get_files()] + + @functools.cache def get_git_repo() -> git.Repo: """Retrieve the git repo.""" diff --git a/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py b/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py index d2709257e449..76575eaf5664 100644 --- a/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py +++ b/airbyte-ci/connectors/pipelines/pipelines/helpers/utils.py @@ -13,7 +13,7 @@ import unicodedata from io import TextIOWrapper from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple import anyio import asyncer @@ -308,16 +308,16 @@ def sh_dash_c(lines: List[str]) -> List[str]: return ["sh", "-c", " && ".join(["set -o xtrace"] + lines)] -def transform_strs_to_paths(str_paths: Set[str]) -> List[Path]: - """Transform a list of string paths to an ordered list of Path objects. +def transform_strs_to_paths(str_paths: List[str]) -> List[Path]: + """Transform a list of string paths to a list of Path objects. Args: - str_paths (Set[str]): A set of string paths. + str_paths (List[str]): A list of string paths. Returns: List[Path]: A list of Path objects. """ - return sorted([Path(str_path) for str_path in str_paths]) + return [Path(str_path) for str_path in str_paths] def fail_if_missing_docker_hub_creds(ctx: click.Context): diff --git a/airbyte-ci/connectors/pipelines/pyproject.toml b/airbyte-ci/connectors/pipelines/pyproject.toml index 39b4b841eb56..1eb99d5a25cd 100644 --- a/airbyte-ci/connectors/pipelines/pyproject.toml +++ b/airbyte-ci/connectors/pipelines/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "pipelines" -version = "2.7.1" +version = "2.7.2" description = "Packaged maintained by the connector operations team to perform CI for connectors' pipelines" authors = ["Airbyte "] From d97a399a24e5d86d4a0fcedb2b8055f7ede834eb Mon Sep 17 00:00:00 2001 From: Marius Posta Date: Mon, 27 Nov 2023 10:49:06 -0800 Subject: [PATCH 57/57] source-mysql, source-mssql: parallelize test execution (#32772) Co-authored-by: postamar --- airbyte-cdk/java/airbyte-cdk/README.md | 1 + .../java/airbyte-cdk/core/build.gradle | 1 + .../java/io/airbyte/cdk/db/MySqlUtils.java | 74 --- .../java/io/airbyte/cdk/db/PostgresUtils.java | 73 --- .../cdk/db/factory/DataSourceFactory.java | 51 +- .../base/ssh/SshBastionContainer.java | 33 +- .../src/main/resources/version.properties | 2 +- .../cdk/db/factory/DataSourceFactoryTest.java | 21 +- .../debezium/AirbyteDebeziumHandler.java | 67 +-- .../internals/DebeziumRecordIterator.java | 10 +- ...tTimeUtil.java => RecordWaitTimeUtil.java} | 17 +- .../postgres/PostgresDebeziumStateUtil.java | 42 -- .../internals/DebeziumRecordIteratorTest.java | 1 + .../FirstRecordWaitTimeUtilTest.java | 51 -- .../PostgresDebeziumStateUtilTest.java | 47 +- .../internals/RecordWaitTimeUtilTest.java | 51 ++ .../jdbc/DefaultJdbcSourceAcceptanceTest.java | 129 +++-- .../integrations/debezium/CdcSourceTest.java | 401 +++++++------- .../jdbc/test/JdbcSourceAcceptanceTest.java | 495 ++++++------------ .../cdk/testutils/ContainerFactory.java | 118 +++++ .../cdk/testutils/PostgresTestDatabase.java | 304 ----------- .../airbyte/cdk/testutils/TestDatabase.java | 293 +++++++++++ .../source-mssql-strict-encrypt/build.gradle | 5 +- .../gradle.properties | 1 + .../source-mssql-strict-encrypt/metadata.yaml | 2 +- .../MssqlSourceStrictEncrypt.java | 2 +- ...ssqlStrictEncryptSourceAcceptanceTest.java | 88 +--- ...StrictEncryptJdbcSourceAcceptanceTest.java | 106 +--- .../connectors/source-mssql/build.gradle | 10 +- .../connectors/source-mssql/gradle.properties | 1 + .../connectors/source-mssql/metadata.yaml | 2 +- .../source/mssql/MssqlCdcHelper.java | 14 + .../source/mssql/MssqlSource.java | 23 +- .../AbstractMssqlSourceDatatypeTest.java | 17 +- .../AbstractSshMssqlSourceAcceptanceTest.java | 107 +--- .../mssql/CdcMssqlSourceAcceptanceTest.java | 175 ++----- .../mssql/CdcMssqlSourceDatatypeTest.java | 134 ++--- .../mssql/MssqlSourceAcceptanceTest.java | 81 +-- .../source/mssql/MssqlSourceDatatypeTest.java | 63 +-- .../SslEnabledMssqlSourceAcceptanceTest.java | 84 +-- .../source/mssql/CdcMssqlSourceTest.java | 393 ++++++-------- .../mssql/MssqlJdbcSourceAcceptanceTest.java | 134 ++--- .../source/mssql/MssqlSourceTest.java | 163 ++---- .../source/mssql/MsSQLContainerFactory.java | 35 ++ .../source/mssql/MsSQLTestDatabase.java | 211 ++++++++ .../source-mysql-strict-encrypt/build.gradle | 5 +- .../gradle.properties | 1 + .../source-mysql-strict-encrypt/metadata.yaml | 2 +- .../MySqlStrictEncryptSource.java | 6 +- ...cateStrictEncryptSourceAcceptanceTest.java | 79 --- ...cateStrictEncryptSourceAcceptanceTest.java | 24 +- ...cateStrictEncryptSourceAcceptanceTest.java | 28 +- ...ySqlStrictEncryptSourceAcceptanceTest.java | 96 ++-- ...StrictEncryptJdbcSourceAcceptanceTest.java | 344 ++---------- .../MySqlStrictEncryptSslTest.java | 129 +++++ .../connectors/source-mysql/build.gradle | 6 +- .../connectors/source-mysql/gradle.properties | 1 + .../connectors/source-mysql/metadata.yaml | 2 +- .../source/mysql/MySqlCdcProperties.java | 12 +- .../source/mysql/MySqlSource.java | 15 +- .../initialsync/MySqlInitialReadUtil.java | 14 +- .../AbstractMySqlSourceDatatypeTest.java | 23 +- ...SqlSslCertificateSourceAcceptanceTest.java | 80 --- .../AbstractSshMySqlSourceAcceptanceTest.java | 15 +- .../sources/CDCMySqlDatatypeAccuracyTest.java | 93 +--- .../CdcBinlogsMySqlSourceDatatypeTest.java | 119 +---- ...nitialSnapshotMySqlSourceDatatypeTest.java | 106 +--- .../sources/CdcMySqlSourceAcceptanceTest.java | 124 ++--- ...lSslCaCertificateSourceAcceptanceTest.java | 53 +- ...cMySqlSslRequiredSourceAcceptanceTest.java | 48 +- .../sources/MySqlDatatypeAccuracyTest.java | 78 +-- .../sources/MySqlSourceAcceptanceTest.java | 87 +-- .../sources/MySqlSourceDatatypeTest.java | 73 +-- ...lSslCaCertificateSourceAcceptanceTest.java | 25 +- ...slFullCertificateSourceAcceptanceTest.java | 29 +- .../sources/MySqlSslSourceAcceptanceTest.java | 57 +- .../SshKeyMySqlSourceAcceptanceTest.java | 16 - .../SshPasswordMySqlSourceAcceptanceTest.java | 61 +-- .../sources/utils/TestConstants.java | 11 - .../source/mysql/CdcMysqlSourceTest.java | 238 +++------ .../mysql/MySqlJdbcSourceAcceptanceTest.java | 260 +++------ .../mysql/MySqlSourceOperationsTest.java | 326 +++--------- .../source/mysql/MySqlSourceTests.java | 212 ++------ .../MySqlSslJdbcSourceAcceptanceTest.java | 54 +- .../source/mysql/MySQLContainerFactory.java | 73 +++ .../source/mysql/MySQLTestDatabase.java | 136 +++++ .../connectors/source-postgres/build.gradle | 8 +- .../connectors/source-postgres/metadata.yaml | 2 +- .../source/postgres/PostgresUtils.java | 14 + .../cdc/PostgresCdcCtidInitializer.java | 17 +- ...actCdcPostgresSourceSslAcceptanceTest.java | 54 +- .../AbstractPostgresSourceDatatypeTest.java | 12 +- ...resSourceSSLCertificateAcceptanceTest.java | 45 +- ...stractSshPostgresSourceAcceptanceTest.java | 34 +- ...sSourceCaCertificateSslAcceptanceTest.java | 5 +- ...ourceFullCertificateSslAcceptanceTest.java | 10 +- ...ialSnapshotPostgresSourceDatatypeTest.java | 82 +-- .../CdcPostgresSourceAcceptanceTest.java | 52 +- .../CdcWalLogsPostgresSourceDatatypeTest.java | 82 +-- ...eploymentPostgresSourceAcceptanceTest.java | 36 +- .../sources/PostgresSourceAcceptanceTest.java | 71 ++- .../sources/PostgresSourceDatatypeTest.java | 53 +- ...sSourceSSLCaCertificateAcceptanceTest.java | 5 +- ...ourceSSLFullCertificateAcceptanceTest.java | 9 +- .../XminPostgresSourceAcceptanceTest.java | 61 +-- .../CdcPostgresSourceLegacyCtidTest.java | 3 + .../postgres/CdcPostgresSourceTest.java | 353 ++++++------- .../CloudDeploymentPostgresSourceTest.java | 49 +- .../PostgresCdcGetPublicizedTablesTest.java | 59 +-- .../PostgresJdbcSourceAcceptanceTest.java | 342 +++++------- .../PostgresSourceOperationsTest.java | 9 +- .../postgres/PostgresSourceSSLTest.java | 39 +- .../source/postgres/PostgresSourceTest.java | 95 ++-- .../postgres/XminPostgresSourceTest.java | 59 +-- .../XminPostgresWithOldServerSourceTest.java | 2 +- .../postgres/PostgresContainerFactory.java | 116 ++++ .../source/postgres/PostgresTestDatabase.java | 162 ++++++ .../source-scaffold-java-jdbc/build.gradle | 9 +- .../ScaffoldJavaJdbcSourceAcceptanceTest.java | 17 +- ...ffoldJavaJdbcJdbcSourceAcceptanceTest.java | 61 +-- .../ScaffoldJavaJdbcTestDatabase.java | 65 +++ build.gradle | 2 + .../main/groovy/airbyte-java-connector.gradle | 127 ++--- docs/integrations/sources/mssql.md | 1 + docs/integrations/sources/mysql.md | 267 +++++----- docs/integrations/sources/postgres.md | 5 +- 126 files changed, 3988 insertions(+), 5800 deletions(-) delete mode 100644 airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java rename airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/{FirstRecordWaitTimeUtil.java => RecordWaitTimeUtil.java} (77%) delete mode 100644 airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java create mode 100644 airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java create mode 100644 airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java delete mode 100644 airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java create mode 100644 airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java create mode 100644 airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties create mode 100644 airbyte-integrations/connectors/source-mssql/gradle.properties create mode 100644 airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java create mode 100644 airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java create mode 100644 airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties delete mode 100644 airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java create mode 100644 airbyte-integrations/connectors/source-mysql/gradle.properties delete mode 100644 airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java delete mode 100644 airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java create mode 100644 airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java create mode 100644 airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java create mode 100644 airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java create mode 100644 airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java create mode 100644 airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java diff --git a/airbyte-cdk/java/airbyte-cdk/README.md b/airbyte-cdk/java/airbyte-cdk/README.md index 26b1f6dc8378..f51f376715bf 100644 --- a/airbyte-cdk/java/airbyte-cdk/README.md +++ b/airbyte-cdk/java/airbyte-cdk/README.md @@ -156,6 +156,7 @@ MavenLocal debugging steps: | Version | Date | Pull Request | Subject | | :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 0.5.0 | 2023-11-22 | [\#32656](https://github.com/airbytehq/airbyte/pull/32656) | Introduce TestDatabase test fixture, refactor database source test base classes. | | 0.4.11 | 2023-11-14 | [\#32526](https://github.com/airbytehq/airbyte/pull/32526) | Clean up memory manager logs. | | 0.4.10 | 2023-11-13 | [\#32285](https://github.com/airbytehq/airbyte/pull/32285) | Fix UUID codec ordering for MongoDB connector | | 0.4.9 | 2023-11-13 | [\#32468](https://github.com/airbytehq/airbyte/pull/32468) | Further error grouping improvements for DV2 connectors | diff --git a/airbyte-cdk/java/airbyte-cdk/core/build.gradle b/airbyte-cdk/java/airbyte-cdk/core/build.gradle index 5dccda8a8d05..38c9c3e24b29 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/build.gradle +++ b/airbyte-cdk/java/airbyte-cdk/core/build.gradle @@ -77,6 +77,7 @@ dependencies { testImplementation libs.testcontainers.jdbc testImplementation libs.testcontainers.mysql testImplementation libs.testcontainers.postgresql + testImplementation libs.testcontainers.mssqlserver implementation 'org.codehaus.plexus:plexus-utils:3.4.2' // bouncycastle is pinned to version-match the transitive dependency from kubernetes client-java diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java deleted file mode 100644 index 0ae1829e93aa..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/MySqlUtils.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.db; - -import com.google.common.annotations.VisibleForTesting; -import java.io.IOException; -import org.testcontainers.containers.MySQLContainer; - -public class MySqlUtils { - - @VisibleForTesting - public static Certificate getCertificate(final MySQLContainer container, - final boolean useAllCertificates) - throws IOException, InterruptedException { - // add root and server certificates to config file - container.execInContainer("sh", "-c", "sed -i '31 a ssl' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '32 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '33 a ssl-cert=/var/lib/mysql/server-cert.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '34 a ssl-key=/var/lib/mysql/server-key.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '35 a require_secure_transport=ON' /etc/my.cnf"); - // add client certificates to config file - if (useAllCertificates) { - container.execInContainer("sh", "-c", "sed -i '39 a [client]' /etc/mysql/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '40 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '41 a ssl-cert=/var/lib/mysql/client-cert.pem' /etc/my.cnf"); - container.execInContainer("sh", "-c", "sed -i '42 a ssl-key=/var/lib/mysql/client-key.pem' /etc/my.cnf"); - } - // copy root certificate and client certificates - var caCert = container.execInContainer("sh", "-c", "cat /var/lib/mysql/ca.pem").getStdout().trim(); - - if (useAllCertificates) { - var clientKey = container.execInContainer("sh", "-c", "cat /var/lib/mysql/client-key.pem").getStdout().trim(); - var clientCert = container.execInContainer("sh", "-c", "cat /var/lib/mysql/client-cert.pem").getStdout().trim(); - return new Certificate(caCert, clientCert, clientKey); - } else { - return new Certificate(caCert); - } - } - - public static class Certificate { - - private final String caCertificate; - private final String clientCertificate; - private final String clientKey; - - public Certificate(final String caCertificate) { - this.caCertificate = caCertificate; - this.clientCertificate = null; - this.clientKey = null; - } - - public Certificate(final String caCertificate, final String clientCertificate, final String clientKey) { - this.caCertificate = caCertificate; - this.clientCertificate = clientCertificate; - this.clientKey = clientKey; - } - - public String getCaCertificate() { - return caCertificate; - } - - public String getClientCertificate() { - return clientCertificate; - } - - public String getClientKey() { - return clientKey; - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java index 8781369cb77d..0b16eb5fed00 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/PostgresUtils.java @@ -5,14 +5,11 @@ package io.airbyte.cdk.db; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import java.io.IOException; import java.sql.SQLException; import java.util.List; -import org.testcontainers.containers.PostgreSQLContainer; public class PostgresUtils { @@ -26,74 +23,4 @@ public static PgLsn getLsn(final JdbcDatabase database) throws SQLException { return PgLsn.fromPgString(jsonNodes.get(0).get("pg_current_wal_lsn").asText()); } - @VisibleForTesting - public static Certificate getCertificate(final PostgreSQLContainer container) throws IOException, InterruptedException { - container.execInContainer("su", "-c", "psql -U test -c \"CREATE USER postgres WITH PASSWORD 'postgres';\""); - container.execInContainer("su", "-c", "psql -U test -c \"GRANT CONNECT ON DATABASE \"test\" TO postgres;\""); - container.execInContainer("su", "-c", "psql -U test -c \"ALTER USER postgres WITH SUPERUSER;\""); - - container.execInContainer("su", "-c", "openssl ecparam -name prime256v1 -genkey -noout -out ca.key"); - container.execInContainer("su", "-c", "openssl req -new -x509 -sha256 -key ca.key -out ca.crt -subj \"/CN=127.0.0.1\""); - container.execInContainer("su", "-c", "openssl ecparam -name prime256v1 -genkey -noout -out server.key"); - container.execInContainer("su", "-c", "openssl req -new -sha256 -key server.key -out server.csr -subj \"/CN=localhost\""); - container.execInContainer("su", "-c", - "openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256"); - container.execInContainer("su", "-c", "cp server.key /etc/ssl/private/"); - container.execInContainer("su", "-c", "cp server.crt /etc/ssl/private/"); - container.execInContainer("su", "-c", "cp ca.crt /etc/ssl/private/"); - container.execInContainer("su", "-c", "chmod og-rwx /etc/ssl/private/server.* /etc/ssl/private/ca.*"); - container.execInContainer("su", "-c", "chown postgres:postgres /etc/ssl/private/server.crt /etc/ssl/private/server.key /etc/ssl/private/ca.crt"); - container.execInContainer("su", "-c", "echo \"ssl = on\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "echo \"ssl_cert_file = '/etc/ssl/private/server.crt'\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "echo \"ssl_key_file = '/etc/ssl/private/server.key'\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "echo \"ssl_ca_file = '/etc/ssl/private/ca.crt'\" >> /var/lib/postgresql/data/postgresql.conf"); - container.execInContainer("su", "-c", "mkdir root/.postgresql"); - container.execInContainer("su", "-c", - "echo \"hostssl all all 127.0.0.1/32 cert clientcert=verify-full\" >> /var/lib/postgresql/data/pg_hba.conf"); - - final var caCert = container.execInContainer("su", "-c", "cat ca.crt").getStdout().trim(); - - container.execInContainer("su", "-c", "openssl ecparam -name prime256v1 -genkey -noout -out client.key"); - container.execInContainer("su", "-c", "openssl req -new -sha256 -key client.key -out client.csr -subj \"/CN=postgres\""); - container.execInContainer("su", "-c", - "openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256"); - container.execInContainer("su", "-c", "cp client.crt ~/.postgresql/postgresql.crt"); - container.execInContainer("su", "-c", "cp client.key ~/.postgresql/postgresql.key"); - container.execInContainer("su", "-c", "chmod 0600 ~/.postgresql/postgresql.crt ~/.postgresql/postgresql.key"); - container.execInContainer("su", "-c", "cp ca.crt root/.postgresql/ca.crt"); - container.execInContainer("su", "-c", "chown postgres:postgres ~/.postgresql/ca.crt"); - - container.execInContainer("su", "-c", "psql -U test -c \"SELECT pg_reload_conf();\""); - - final var clientKey = container.execInContainer("su", "-c", "cat client.key").getStdout().trim(); - final var clientCert = container.execInContainer("su", "-c", "cat client.crt").getStdout().trim(); - return new Certificate(caCert, clientCert, clientKey); - } - - public static class Certificate { - - private final String caCertificate; - private final String clientCertificate; - private final String clientKey; - - public Certificate(final String caCertificate, final String clientCertificate, final String clientKey) { - this.caCertificate = caCertificate; - this.clientCertificate = clientCertificate; - this.clientKey = clientKey; - } - - public String getCaCertificate() { - return caCertificate; - } - - public String getClientCertificate() { - return clientCertificate; - } - - public String getClientKey() { - return clientKey; - } - - } - } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java index 38837ac5ef35..c03b6fb7a89b 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/factory/DataSourceFactory.java @@ -11,8 +11,10 @@ import com.zaxxer.hikari.HikariDataSource; import java.io.Closeable; import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.time.temporal.TemporalUnit; import java.util.Map; -import java.util.Objects; +import java.util.Optional; import javax.sql.DataSource; /** @@ -188,10 +190,10 @@ private static class DataSourceBuilder { private DataSourceBuilder() {} /** - * Retrieves connectionTimeout value from connection properties in seconds, default minimum timeout + * Retrieves connectionTimeout value from connection properties in millis, default minimum timeout * is 60 seconds since Hikari default of 30 seconds is not enough for acceptance tests. In the case * the value is 0, pass the value along as Hikari and Postgres use default max value for 0 timeout - * value + * value. * * NOTE: HikariCP uses milliseconds for all time values: * https://github.com/brettwooldridge/HikariCP#gear-configuration-knobs-baby whereas Postgres is @@ -203,27 +205,32 @@ private DataSourceBuilder() {} * @return DataSourceBuilder class used to create dynamic fields for DataSource */ private static long getConnectionTimeoutMs(final Map connectionProperties, String driverClassName) { - // TODO: the usage of CONNECT_TIMEOUT is Postgres specific, may need to extend for other databases - if (driverClassName.equals(DatabaseDriver.POSTGRESQL.getDriverClassName())) { - final String pgPropertyConnectTimeout = CONNECT_TIMEOUT.getName(); - // If the PGProperty.CONNECT_TIMEOUT was set by the user, then take its value, if not take the - // default - if (connectionProperties.containsKey(pgPropertyConnectTimeout) - && (Long.parseLong(connectionProperties.get(pgPropertyConnectTimeout)) >= 0)) { - return Duration.ofSeconds(Long.parseLong(connectionProperties.get(pgPropertyConnectTimeout))).toMillis(); - } else { - return Duration.ofSeconds(Long.parseLong(Objects.requireNonNull(CONNECT_TIMEOUT.getDefaultValue()))).toMillis(); - } + final Optional parsedConnectionTimeout = switch (DatabaseDriver.findByDriverClassName(driverClassName)) { + case POSTGRESQL -> maybeParseDuration(connectionProperties.get(CONNECT_TIMEOUT.getName()), ChronoUnit.SECONDS) + .or(() -> maybeParseDuration(CONNECT_TIMEOUT.getDefaultValue(), ChronoUnit.SECONDS)); + case MYSQL -> maybeParseDuration(connectionProperties.get("connectTimeout"), ChronoUnit.MILLIS); + case MSSQLSERVER -> maybeParseDuration(connectionProperties.get("loginTimeout"), ChronoUnit.SECONDS); + default -> maybeParseDuration(connectionProperties.get(CONNECT_TIMEOUT_KEY), ChronoUnit.SECONDS) + // Enforce minimum timeout duration for unspecified data sources. + .filter(d -> d.compareTo(CONNECT_TIMEOUT_DEFAULT) >= 0); + }; + return parsedConnectionTimeout.orElse(CONNECT_TIMEOUT_DEFAULT).toMillis(); + } + + private static Optional maybeParseDuration(final String stringValue, TemporalUnit unit) { + if (stringValue == null) { + return Optional.empty(); + } + final long number; + try { + number = Long.parseLong(stringValue); + } catch (NumberFormatException __) { + return Optional.empty(); } - final Duration connectionTimeout; - connectionTimeout = - connectionProperties.containsKey(CONNECT_TIMEOUT_KEY) ? Duration.ofSeconds(Long.parseLong(connectionProperties.get(CONNECT_TIMEOUT_KEY))) - : CONNECT_TIMEOUT_DEFAULT; - if (connectionTimeout.getSeconds() == 0) { - return connectionTimeout.toMillis(); - } else { - return (connectionTimeout.compareTo(CONNECT_TIMEOUT_DEFAULT) > 0 ? connectionTimeout : CONNECT_TIMEOUT_DEFAULT).toMillis(); + if (number < 0) { + return Optional.empty(); } + return Optional.of(Duration.of(number, unit)); } public DataSourceBuilder withConnectionProperties(final Map connectionProperties) { diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java index c04c5ccc0907..07a1786f60dd 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/integrations/base/ssh/SshBastionContainer.java @@ -21,7 +21,7 @@ import org.testcontainers.containers.Network; import org.testcontainers.images.builder.ImageFromDockerfile; -public class SshBastionContainer { +public class SshBastionContainer implements AutoCloseable { private static final String SSH_USER = "sshuser"; private static final String SSH_PASSWORD = "secret"; @@ -36,21 +36,27 @@ public void initAndStartBastion(final Network network) { bastion.start(); } + public JsonNode getTunnelMethod(final SshTunnel.TunnelMethod tunnelMethod, + final boolean innerAddress) + throws IOException, InterruptedException { + final var containerAddress = innerAddress ? getInnerContainerAddress(bastion) : getOuterContainerAddress(bastion); + return Jsons.jsonNode(ImmutableMap.builder() + .put("tunnel_host", + Objects.requireNonNull(containerAddress.left)) + .put("tunnel_method", tunnelMethod) + .put("tunnel_port", containerAddress.right) + .put("tunnel_user", SSH_USER) + .put("tunnel_user_password", tunnelMethod.equals(SSH_PASSWORD_AUTH) ? SSH_PASSWORD : "") + .put("ssh_key", tunnelMethod.equals(SSH_KEY_AUTH) ? bastion.execInContainer("cat", "var/bastion/id_rsa").getStdout() : "") + .build()); + } + public JsonNode getTunnelConfig(final SshTunnel.TunnelMethod tunnelMethod, final ImmutableMap.Builder builderWithSchema, final boolean innerAddress) throws IOException, InterruptedException { - final var containerAddress = innerAddress ? getInnerContainerAddress(bastion) : getOuterContainerAddress(bastion); return Jsons.jsonNode(builderWithSchema - .put("tunnel_method", Jsons.jsonNode(ImmutableMap.builder() - .put("tunnel_host", - Objects.requireNonNull(containerAddress.left)) - .put("tunnel_method", tunnelMethod) - .put("tunnel_port", containerAddress.right) - .put("tunnel_user", SSH_USER) - .put("tunnel_user_password", tunnelMethod.equals(SSH_PASSWORD_AUTH) ? SSH_PASSWORD : "") - .put("ssh_key", tunnelMethod.equals(SSH_KEY_AUTH) ? bastion.execInContainer("cat", "var/bastion/id_rsa").getStdout() : "") - .build())) + .put("tunnel_method", getTunnelMethod(tunnelMethod, innerAddress)) .build()); } @@ -83,6 +89,11 @@ public void stopAndClose() { bastion.close(); } + @Override + public void close() { + stopAndClose(); + } + public GenericContainer getContainer() { return bastion; } diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties index 6c39f216d22b..c720ecde1c21 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties +++ b/airbyte-cdk/java/airbyte-cdk/core/src/main/resources/version.properties @@ -1 +1 @@ -version=0.4.11 +version=0.5.0 \ No newline at end of file diff --git a/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java b/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java index c53d9624b66b..a8af1eb4abee 100644 --- a/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java +++ b/airbyte-cdk/java/airbyte-cdk/core/src/test/java/io/airbyte/cdk/db/factory/DataSourceFactoryTest.java @@ -17,6 +17,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.testcontainers.containers.MSSQLServerContainer; import org.testcontainers.containers.MySQLContainer; /** @@ -80,7 +81,7 @@ void testCreatingMySQLDataSourceWithConnectionTimeoutSetBelowDefault() { try (MySQLContainer mySQLContainer = new MySQLContainer<>("mysql:8.0")) { mySQLContainer.start(); final Map connectionProperties = Map.of( - CONNECT_TIMEOUT, "30"); + CONNECT_TIMEOUT, "5000"); final DataSource dataSource = DataSourceFactory.create( mySQLContainer.getUsername(), mySQLContainer.getPassword(), @@ -89,7 +90,23 @@ void testCreatingMySQLDataSourceWithConnectionTimeoutSetBelowDefault() { connectionProperties); assertNotNull(dataSource); assertEquals(HikariDataSource.class, dataSource.getClass()); - assertEquals(60000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); + assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); + } + } + + @Test + void testCreatingMsSQLServerDataSourceWithConnectionTimeoutSetBelowDefault() { + try (var mssqlServerContainer = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense()) { + mssqlServerContainer.start(); + final DataSource dataSource = DataSourceFactory.create( + mssqlServerContainer.getUsername(), + mssqlServerContainer.getPassword(), + mssqlServerContainer.getDriverClassName(), + mssqlServerContainer.getJdbcUrl(), + Map.of("loginTimeout", "5")); + assertNotNull(dataSource); + assertEquals(HikariDataSource.class, dataSource.getClass()); + assertEquals(5000, ((HikariDataSource) dataSource).getHikariConfigMXBean().getConnectionTimeout()); } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java index e292f6629e2a..49f7afa14a6b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/AirbyteDebeziumHandler.java @@ -32,7 +32,6 @@ import io.debezium.engine.DebeziumEngine; import java.time.Duration; import java.time.Instant; -import java.util.Collections; import java.util.Optional; import java.util.OptionalInt; import java.util.Properties; @@ -57,18 +56,20 @@ public class AirbyteDebeziumHandler { private final JsonNode config; private final CdcTargetPosition targetPosition; private final boolean trackSchemaHistory; - private final Duration firstRecordWaitTime; + private final Duration firstRecordWaitTime, subsequentRecordWaitTime; private final OptionalInt queueSize; public AirbyteDebeziumHandler(final JsonNode config, final CdcTargetPosition targetPosition, final boolean trackSchemaHistory, final Duration firstRecordWaitTime, + final Duration subsequentRecordWaitTime, final OptionalInt queueSize) { this.config = config; this.targetPosition = targetPosition; this.trackSchemaHistory = trackSchemaHistory; this.firstRecordWaitTime = firstRecordWaitTime; + this.subsequentRecordWaitTime = subsequentRecordWaitTime; this.queueSize = queueSize; } @@ -97,7 +98,8 @@ public AutoCloseableIterator getSnapshotIterators( targetPosition, tableSnapshotPublisher::hasClosed, new DebeziumShutdownProcedure<>(queue, tableSnapshotPublisher::close, tableSnapshotPublisher::hasClosed), - firstRecordWaitTime); + firstRecordWaitTime, + subsequentRecordWaitTime); return AutoCloseableIterators.concatWithEagerClose(AutoCloseableIterators .transform( @@ -108,10 +110,6 @@ public AutoCloseableIterator getSnapshotIterators( .fromIterator(MoreIterators.singletonIteratorFromSupplier(cdcStateHandler::saveStateAfterCompletionOfSnapshotOfNewStreams))); } - /** - * In the default case here, we don't know for sure whether the Debezium Engine will produce records - * or not. We therefore pass {@link canShortCircuitDebeziumEngine} = false. - */ public AutoCloseableIterator getIncrementalIterators(final ConfiguredAirbyteCatalog catalog, final CdcSavedInfoFetcher cdcSavedInfoFetcher, final CdcStateHandler cdcStateHandler, @@ -120,32 +118,6 @@ public AutoCloseableIterator getIncrementalIterators(final Confi final DebeziumPropertiesManager.DebeziumConnectorType debeziumConnectorType, final Instant emittedAt, final boolean addDbNameToState) { - return getIncrementalIterators( - catalog, - cdcSavedInfoFetcher, - cdcStateHandler, - cdcMetadataInjector, - connectorProperties, - debeziumConnectorType, - emittedAt, addDbNameToState, - false); - } - - /** - * - * @param canShortCircuitDebeziumEngine This argument may be set to true in cases where we already - * know that the Debezium Engine is not going to be producing any change events. In this - * case, this method skips provisioning a Debezium Engine altogether. - */ - public AutoCloseableIterator getIncrementalIterators(final ConfiguredAirbyteCatalog catalog, - final CdcSavedInfoFetcher cdcSavedInfoFetcher, - final CdcStateHandler cdcStateHandler, - final CdcMetadataInjector cdcMetadataInjector, - final Properties connectorProperties, - final DebeziumPropertiesManager.DebeziumConnectorType debeziumConnectorType, - final Instant emittedAt, - final boolean addDbNameToState, - final boolean canShortCircuitDebeziumEngine) { LOGGER.info("Using CDC: {}", true); LOGGER.info("Using DBZ version: {}", DebeziumEngine.class.getPackage().getImplementationVersion()); final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState( @@ -157,23 +129,18 @@ public AutoCloseableIterator getIncrementalIterators(final Confi cdcStateHandler.compressSchemaHistoryForState()) : Optional.empty(); - final AutoCloseableIterator eventIterator; - if (!canShortCircuitDebeziumEngine) { - final var publisher = new DebeziumRecordPublisher( - connectorProperties, config, catalog, offsetManager, schemaHistoryManager, debeziumConnectorType); - final var queue = new LinkedBlockingQueue>(queueSize.orElse(QUEUE_CAPACITY)); - publisher.start(queue); - // handle state machine around pub/sub logic. - eventIterator = new DebeziumRecordIterator<>( - queue, - targetPosition, - publisher::hasClosed, - new DebeziumShutdownProcedure<>(queue, publisher::close, publisher::hasClosed), - firstRecordWaitTime); - } else { - LOGGER.info("Short-circuiting Debezium Engine: nothing of interest in target replication stream interval."); - eventIterator = AutoCloseableIterators.fromIterator(Collections.emptyIterator()); - } + final var publisher = new DebeziumRecordPublisher( + connectorProperties, config, catalog, offsetManager, schemaHistoryManager, debeziumConnectorType); + final var queue = new LinkedBlockingQueue>(queueSize.orElse(QUEUE_CAPACITY)); + publisher.start(queue); + // handle state machine around pub/sub logic. + final AutoCloseableIterator eventIterator = new DebeziumRecordIterator<>( + queue, + targetPosition, + publisher::hasClosed, + new DebeziumShutdownProcedure<>(queue, publisher::close, publisher::hasClosed), + firstRecordWaitTime, + subsequentRecordWaitTime); final Duration syncCheckpointDuration = config.get(SYNC_CHECKPOINT_DURATION_PROPERTY) != null ? Duration.ofSeconds(config.get(SYNC_CHECKPOINT_DURATION_PROPERTY).asLong()) diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java index 6255acefaa80..a599e0086ff3 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIterator.java @@ -39,13 +39,11 @@ public class DebeziumRecordIterator extends AbstractIterator, Field> heartbeatEventSourceField; private final LinkedBlockingQueue> queue; private final CdcTargetPosition targetPosition; private final Supplier publisherStatusSupplier; - private final Duration firstRecordWaitTime; + private final Duration firstRecordWaitTime, subsequentRecordWaitTime; private final DebeziumShutdownProcedure> debeziumShutdownProcedure; private boolean receivedFirstRecord; @@ -59,12 +57,14 @@ public DebeziumRecordIterator(final LinkedBlockingQueue targetPosition, final Supplier publisherStatusSupplier, final DebeziumShutdownProcedure> debeziumShutdownProcedure, - final Duration firstRecordWaitTime) { + final Duration firstRecordWaitTime, + final Duration subsequentRecordWaitTime) { this.queue = queue; this.targetPosition = targetPosition; this.publisherStatusSupplier = publisherStatusSupplier; this.debeziumShutdownProcedure = debeziumShutdownProcedure; this.firstRecordWaitTime = firstRecordWaitTime; + this.subsequentRecordWaitTime = subsequentRecordWaitTime; this.heartbeatEventSourceField = new HashMap<>(1); this.receivedFirstRecord = false; @@ -90,7 +90,7 @@ protected ChangeEventWithMetadata computeNext() { while (!MoreBooleans.isTruthy(publisherStatusSupplier.get()) || !queue.isEmpty()) { final ChangeEvent next; - final Duration waitTime = receivedFirstRecord ? SUBSEQUENT_RECORD_WAIT_TIME : this.firstRecordWaitTime; + final Duration waitTime = receivedFirstRecord ? this.subsequentRecordWaitTime : this.firstRecordWaitTime; try { next = queue.poll(waitTime.getSeconds(), TimeUnit.SECONDS); } catch (final InterruptedException e) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtil.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtil.java similarity index 77% rename from airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtil.java rename to airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtil.java index 74c426f35029..4bcec783a70b 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtil.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtil.java @@ -10,13 +10,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class FirstRecordWaitTimeUtil { +public class RecordWaitTimeUtil { - private static final Logger LOGGER = LoggerFactory.getLogger(FirstRecordWaitTimeUtil.class); + private static final Logger LOGGER = LoggerFactory.getLogger(RecordWaitTimeUtil.class); public static final Duration MIN_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(2); public static final Duration MAX_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(20); public static final Duration DEFAULT_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(5); + public static final Duration DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME = Duration.ofMinutes(1); public static void checkFirstRecordWaitTime(final JsonNode config) { // we need to skip the check because in tests, we set initial_waiting_seconds @@ -59,6 +60,18 @@ public static Duration getFirstRecordWaitTime(final JsonNode config) { return firstRecordWaitTime; } + public static Duration getSubsequentRecordWaitTime(final JsonNode config) { + Duration subsequentRecordWaitTime = DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME; + final boolean isTest = config.has("is_test") && config.get("is_test").asBoolean(); + final Optional firstRecordWaitSeconds = getFirstRecordWaitSeconds(config); + if (isTest && firstRecordWaitSeconds.isPresent()) { + // In tests, reuse the initial_waiting_seconds property to speed things up. + subsequentRecordWaitTime = Duration.ofSeconds(firstRecordWaitSeconds.get()); + } + LOGGER.info("Subsequent record waiting time: {} seconds", subsequentRecordWaitTime.getSeconds()); + return subsequentRecordWaitTime; + } + public static Optional getFirstRecordWaitSeconds(final JsonNode config) { final JsonNode replicationMethod = config.get("replication_method"); if (replicationMethod != null && replicationMethod.has("initial_waiting_seconds")) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java index 938fd11e903e..174c03893fa2 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/internals/postgres/PostgresDebeziumStateUtil.java @@ -124,48 +124,6 @@ public void commitLSNToPostgresDatabase(final JsonNode jdbcConfig, } } - public boolean maybeReplicationStreamIntervalHasRecords(final JsonNode jdbcConfig, - final String slotName, - final String publicationName, - final String plugin, - final long startOffset, - final long endOffset) { - try (final BaseConnection pgConnection = (BaseConnection) PostgresReplicationConnection.createConnection(jdbcConfig)) { - ChainedLogicalStreamBuilder streamBuilder = pgConnection - .getReplicationAPI() - .replicationStream() - .logical() - .withSlotName("\"" + slotName + "\"") - .withStartPosition(LogSequenceNumber.valueOf(startOffset)); - streamBuilder = addSlotOption(publicationName, plugin, pgConnection, streamBuilder); - - try (final PGReplicationStream stream = streamBuilder.start()) { - LogSequenceNumber current = stream.getLastReceiveLSN(); - final LogSequenceNumber end = LogSequenceNumber.valueOf(endOffset); - // Attempt to read from the stream. - // This will advance the stream past any bookkeeping entries, until: - // - either the end of the stream is reached, - // - or a meaningful entry is read. - // In the first case, we can update the current position and conclude that the stream contains - // nothing of - // interest to us between the starting position and the current position. - final var msg = stream.readPending(); - if (msg == null) { - current = stream.getLastReceiveLSN(); - } - if (current.compareTo(end) >= 0) { - // If we've reached or gone past the end of the interval which interests us, - // then there's nothing in it that we could possibly care about. - return false; - } - } - } catch (SQLException e) { - throw new RuntimeException(e); - } - // In all other cases, we can't draw any conclusions as to the contents of the stream interval. - return true; - } - private ChainedLogicalStreamBuilder addSlotOption(final String publicationName, final String plugin, final BaseConnection pgConnection, diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java index c1ef4f83de75..e386b100c647 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/DebeziumRecordIteratorTest.java @@ -36,6 +36,7 @@ public Long extractPositionFromHeartbeatOffset(final Map sourceOffset }, () -> false, mock(DebeziumShutdownProcedure.class), + Duration.ZERO, Duration.ZERO); final Long lsn = debeziumRecordIterator.getHeartbeatPosition(new ChangeEvent() { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java deleted file mode 100644 index 01c5d2ea47c5..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/FirstRecordWaitTimeUtilTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.integrations.debezium.internals; - -import static io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil.MAX_FIRST_RECORD_WAIT_TIME; -import static io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil.MIN_FIRST_RECORD_WAIT_TIME; -import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; - -import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.commons.json.Jsons; -import java.time.Duration; -import java.util.Collections; -import java.util.Map; -import java.util.Optional; -import org.junit.jupiter.api.Test; - -public class FirstRecordWaitTimeUtilTest { - - @Test - void testGetFirstRecordWaitTime() { - final JsonNode emptyConfig = Jsons.jsonNode(Collections.emptyMap()); - assertDoesNotThrow(() -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(emptyConfig)); - assertEquals(Optional.empty(), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(emptyConfig)); - assertEquals(FirstRecordWaitTimeUtil.DEFAULT_FIRST_RECORD_WAIT_TIME, FirstRecordWaitTimeUtil.getFirstRecordWaitTime(emptyConfig)); - - final JsonNode normalConfig = Jsons.jsonNode(Map.of("replication_method", - Map.of("method", "CDC", "initial_waiting_seconds", 500))); - assertDoesNotThrow(() -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(normalConfig)); - assertEquals(Optional.of(500), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(normalConfig)); - assertEquals(Duration.ofSeconds(500), FirstRecordWaitTimeUtil.getFirstRecordWaitTime(normalConfig)); - - final int tooShortTimeout = (int) MIN_FIRST_RECORD_WAIT_TIME.getSeconds() - 1; - final JsonNode tooShortConfig = Jsons.jsonNode(Map.of("replication_method", - Map.of("method", "CDC", "initial_waiting_seconds", tooShortTimeout))); - assertThrows(IllegalArgumentException.class, () -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(tooShortConfig)); - assertEquals(Optional.of(tooShortTimeout), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(tooShortConfig)); - assertEquals(MIN_FIRST_RECORD_WAIT_TIME, FirstRecordWaitTimeUtil.getFirstRecordWaitTime(tooShortConfig)); - - final int tooLongTimeout = (int) MAX_FIRST_RECORD_WAIT_TIME.getSeconds() + 1; - final JsonNode tooLongConfig = Jsons.jsonNode(Map.of("replication_method", - Map.of("method", "CDC", "initial_waiting_seconds", tooLongTimeout))); - assertThrows(IllegalArgumentException.class, () -> FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(tooLongConfig)); - assertEquals(Optional.of(tooLongTimeout), FirstRecordWaitTimeUtil.getFirstRecordWaitSeconds(tooLongConfig)); - assertEquals(MAX_FIRST_RECORD_WAIT_TIME, FirstRecordWaitTimeUtil.getFirstRecordWaitTime(tooLongConfig)); - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java index d504c6dd3dfa..280d0ac2709e 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/PostgresDebeziumStateUtilTest.java @@ -26,7 +26,6 @@ import java.util.OptionalLong; import java.util.Properties; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -146,10 +145,9 @@ public void emptyState() { Assertions.assertTrue(savedOffsetAfterReplicationSlotLSN); } - @ParameterizedTest - @Disabled - @ValueSource(strings = {"pgoutput", "wal2json"}) - public void LsnCommitTest(final String plugin) throws SQLException { + @Test + public void LsnCommitTest() throws SQLException { + final String plugin = "pgoutput"; final DockerImageName myImage = DockerImageName.parse("debezium/postgres:13-alpine").asCompatibleSubstituteFor("postgres"); final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); final String fullReplicationSlot = "debezium_slot" + "_" + dbName; @@ -200,45 +198,6 @@ public void LsnCommitTest(final String plugin) throws SQLException { Assertions.assertEquals(targetLsn, lsnAfterCommit.asLong()); Assertions.assertNotEquals(slotStateAtTheBeginning, slotStateAfterCommit); - // Now check that maybeReplicationStreamIntervalHasRecords behaves as expected. - - final long lsnBeforeBookkeepingStatements = PostgresUtils.getLsn(database).asLong(); - - database.execute("SELECT txid_current();"); - database.execute("CHECKPOINT"); - final long lsnAfterBookkeepingStatements = PostgresUtils.getLsn(database).asLong(); - Assertions.assertNotEquals(lsnBeforeBookkeepingStatements, lsnAfterBookkeepingStatements); - - Assertions.assertFalse(postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - Jsons.jsonNode(databaseConfig), - fullReplicationSlot, - publication, - plugin, - lsnBeforeBookkeepingStatements, - lsnAfterBookkeepingStatements)); - - database.execute("INSERT INTO public.test_table VALUES (3, 'baz');"); - final long lsnAfterMeaningfulStatement = PostgresUtils.getLsn(database).asLong(); - Assertions.assertNotEquals(lsnBeforeBookkeepingStatements, lsnAfterMeaningfulStatement); - - Assertions.assertTrue(postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - Jsons.jsonNode(databaseConfig), - fullReplicationSlot, - publication, - plugin, - lsnBeforeBookkeepingStatements, - lsnAfterMeaningfulStatement)); - Assertions.assertTrue(postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - Jsons.jsonNode(databaseConfig), - fullReplicationSlot, - publication, - plugin, - lsnAfterBookkeepingStatements, - lsnAfterMeaningfulStatement)); - - final var slotStateAtTheEnd = getReplicationSlot(database, fullReplicationSlot, plugin, dbName); - Assertions.assertEquals(slotStateAfterCommit, slotStateAtTheEnd); - container.stop(); } } diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java new file mode 100644 index 000000000000..64701dd40668 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/debezium/internals/RecordWaitTimeUtilTest.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.integrations.debezium.internals; + +import static io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil.MAX_FIRST_RECORD_WAIT_TIME; +import static io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil.MIN_FIRST_RECORD_WAIT_TIME; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import java.time.Duration; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import org.junit.jupiter.api.Test; + +public class RecordWaitTimeUtilTest { + + @Test + void testGetFirstRecordWaitTime() { + final JsonNode emptyConfig = Jsons.jsonNode(Collections.emptyMap()); + assertDoesNotThrow(() -> RecordWaitTimeUtil.checkFirstRecordWaitTime(emptyConfig)); + assertEquals(Optional.empty(), RecordWaitTimeUtil.getFirstRecordWaitSeconds(emptyConfig)); + assertEquals(RecordWaitTimeUtil.DEFAULT_FIRST_RECORD_WAIT_TIME, RecordWaitTimeUtil.getFirstRecordWaitTime(emptyConfig)); + + final JsonNode normalConfig = Jsons.jsonNode(Map.of("replication_method", + Map.of("method", "CDC", "initial_waiting_seconds", 500))); + assertDoesNotThrow(() -> RecordWaitTimeUtil.checkFirstRecordWaitTime(normalConfig)); + assertEquals(Optional.of(500), RecordWaitTimeUtil.getFirstRecordWaitSeconds(normalConfig)); + assertEquals(Duration.ofSeconds(500), RecordWaitTimeUtil.getFirstRecordWaitTime(normalConfig)); + + final int tooShortTimeout = (int) MIN_FIRST_RECORD_WAIT_TIME.getSeconds() - 1; + final JsonNode tooShortConfig = Jsons.jsonNode(Map.of("replication_method", + Map.of("method", "CDC", "initial_waiting_seconds", tooShortTimeout))); + assertThrows(IllegalArgumentException.class, () -> RecordWaitTimeUtil.checkFirstRecordWaitTime(tooShortConfig)); + assertEquals(Optional.of(tooShortTimeout), RecordWaitTimeUtil.getFirstRecordWaitSeconds(tooShortConfig)); + assertEquals(MIN_FIRST_RECORD_WAIT_TIME, RecordWaitTimeUtil.getFirstRecordWaitTime(tooShortConfig)); + + final int tooLongTimeout = (int) MAX_FIRST_RECORD_WAIT_TIME.getSeconds() + 1; + final JsonNode tooLongConfig = Jsons.jsonNode(Map.of("replication_method", + Map.of("method", "CDC", "initial_waiting_seconds", tooLongTimeout))); + assertThrows(IllegalArgumentException.class, () -> RecordWaitTimeUtil.checkFirstRecordWaitTime(tooLongConfig)); + assertEquals(Optional.of(tooLongTimeout), RecordWaitTimeUtil.getFirstRecordWaitSeconds(tooLongConfig)); + assertEquals(MAX_FIRST_RECORD_WAIT_TIME, RecordWaitTimeUtil.getFirstRecordWaitTime(tooLongConfig)); + } + +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java index f7356a00bc7f..15bca65f35a1 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/test/java/io/airbyte/cdk/integrations/source/jdbc/DefaultJdbcSourceAcceptanceTest.java @@ -16,87 +16,62 @@ import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.cdk.testutils.PostgreSQLContainerHelper; +import io.airbyte.cdk.testutils.TestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.io.IOs; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType; import java.sql.JDBCType; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Stream; +import org.jooq.SQLDialect; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.utility.MountableFile; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; /** * Runs the acceptance tests in the source-jdbc test module. We want this module to run these tests * itself as a sanity check. The trade off here is that this class is duplicated from the one used * in source-postgres. */ -@ExtendWith(SystemStubsExtension.class) -class DefaultJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +class DefaultJdbcSourceAcceptanceTest + extends JdbcSourceAcceptanceTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - private static PostgreSQLContainer PSQL_DB; - - private JsonNode config; - private String dbName; + private static PostgreSQLContainer PSQL_CONTAINER; @BeforeAll static void init() { - PSQL_DB = new PostgreSQLContainer<>("postgres:13-alpine"); - PSQL_DB.start(); + PSQL_CONTAINER = new PostgreSQLContainer<>("postgres:13-alpine"); + PSQL_CONTAINER.start(); CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s BIT(3) NOT NULL);"; INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(B'101');"; } - @BeforeEach - public void setup() throws Exception { - dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, PSQL_DB.getHost()) - .put(JdbcUtils.PORT_KEY, PSQL_DB.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, PSQL_DB.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, PSQL_DB.getPassword()) - .build()); - - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - - final String initScriptName = "init_" + dbName.concat(".sql"); - final String tmpFilePath = IOs.writeFileToRandomTmpDir(initScriptName, "CREATE DATABASE " + dbName + ";"); - PostgreSQLContainerHelper.runSqlScript(MountableFile.forHostPath(tmpFilePath), PSQL_DB); - - super.setup(); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder().build(); } @Override - public boolean supportsSchemas() { - return true; + protected PostgresTestSource source() { + final var source = new PostgresTestSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } @Override - public AbstractJdbcSource getJdbcSource() { - return new PostgresTestSource(); + protected BareBonesTestDatabase createTestDatabase() { + return new BareBonesTestDatabase(PSQL_CONTAINER).initialized(); } @Override - public JsonNode getConfig() { - return config; + public boolean supportsSchemas() { + return true; } public JsonNode getConfigWithConnectionProperties(final PostgreSQLContainer psqlDb, final String dbName, final String additionalParameters) { @@ -111,11 +86,6 @@ public JsonNode getConfigWithConnectionProperties(final PostgreSQLContainer p .build()); } - @Override - public String getDriverClass() { - return PostgresTestSource.DRIVER_CLASS; - } - @Override protected boolean supportsPerStream() { return true; @@ -123,10 +93,10 @@ protected boolean supportsPerStream() { @AfterAll static void cleanUp() { - PSQL_DB.close(); + PSQL_CONTAINER.close(); } - private static class PostgresTestSource extends AbstractJdbcSource implements Source { + public static class PostgresTestSource extends AbstractJdbcSource implements Source { private static final Logger LOGGER = LoggerFactory.getLogger(PostgresTestSource.class); @@ -171,10 +141,63 @@ public static void main(final String[] args) throws Exception { } + static protected class BareBonesTestDatabase + extends TestDatabase, BareBonesTestDatabase, BareBonesTestDatabase.BareBonesConfigBuilder> { + + public BareBonesTestDatabase(PostgreSQLContainer container) { + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + final var sql = Stream.of( + String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER %s PASSWORD '%s'", getUserName(), getPassword()), + String.format("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", getDatabaseName(), getUserName()), + String.format("ALTER USER %s WITH SUPERUSER", getUserName())); + return Stream.of(Stream.concat( + Stream.of("psql", + "-d", getContainer().getDatabaseName(), + "-U", getContainer().getUsername(), + "-v", "ON_ERROR_STOP=1", + "-a"), + sql.flatMap(stmt -> Stream.of("-c", stmt)))); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.POSTGRESQL; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.POSTGRES; + } + + @Override + public BareBonesConfigBuilder configBuilder() { + return new BareBonesConfigBuilder(this); + } + + static protected class BareBonesConfigBuilder extends TestDatabase.ConfigBuilder { + + private BareBonesConfigBuilder(BareBonesTestDatabase testDatabase) { + super(testDatabase); + } + + } + + } + @Test void testCustomParametersOverwriteDefaultParametersExpectException() { final String connectionPropertiesUrl = "ssl=false"; - final JsonNode config = getConfigWithConnectionProperties(PSQL_DB, dbName, connectionPropertiesUrl); + final JsonNode config = getConfigWithConnectionProperties(PSQL_CONTAINER, testdb.getDatabaseName(), connectionPropertiesUrl); final Map customParameters = JdbcUtils.parseJdbcParameters(config, JdbcUtils.CONNECTION_PROPERTIES_KEY, "&"); final Map defaultParameters = Map.of( "ssl", "true", diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java index c186f0084a72..c6bc26a7d14c 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/debezium/CdcSourceTest.java @@ -16,8 +16,8 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.collect.Streams; -import io.airbyte.cdk.db.Database; import io.airbyte.cdk.integrations.base.Source; +import io.airbyte.cdk.testutils.TestDatabase; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; @@ -36,7 +36,6 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; @@ -48,65 +47,24 @@ import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public abstract class CdcSourceTest { +public abstract class CdcSourceTest> { - private static final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class); + static private final Logger LOGGER = LoggerFactory.getLogger(CdcSourceTest.class); - protected static final String MODELS_SCHEMA = "models_schema"; - protected static final String MODELS_STREAM_NAME = "models"; - protected static final Set STREAM_NAMES = Sets - .newHashSet(MODELS_STREAM_NAME); - protected static final String COL_ID = "id"; - protected static final String COL_MAKE_ID = "make_id"; - protected static final String COL_MODEL = "model"; - protected static final int INITIAL_WAITING_SECONDS = 5; + static protected final String MODELS_STREAM_NAME = "models"; + static protected final Set STREAM_NAMES = Set.of(MODELS_STREAM_NAME); + static protected final String COL_ID = "id"; + static protected final String COL_MAKE_ID = "make_id"; + static protected final String COL_MODEL = "model"; - protected final List MODEL_RECORDS_RANDOM = ImmutableList.of( - Jsons - .jsonNode(ImmutableMap - .of(COL_ID + "_random", 11000, COL_MAKE_ID + "_random", 1, COL_MODEL + "_random", - "Fiesta-random")), - Jsons.jsonNode(ImmutableMap - .of(COL_ID + "_random", 12000, COL_MAKE_ID + "_random", 1, COL_MODEL + "_random", - "Focus-random")), - Jsons - .jsonNode(ImmutableMap - .of(COL_ID + "_random", 13000, COL_MAKE_ID + "_random", 1, COL_MODEL + "_random", - "Ranger-random")), - Jsons.jsonNode(ImmutableMap - .of(COL_ID + "_random", 14000, COL_MAKE_ID + "_random", 2, COL_MODEL + "_random", - "GLA-random")), - Jsons.jsonNode(ImmutableMap - .of(COL_ID + "_random", 15000, COL_MAKE_ID + "_random", 2, COL_MODEL + "_random", - "A 220-random")), - Jsons - .jsonNode(ImmutableMap - .of(COL_ID + "_random", 16000, COL_MAKE_ID + "_random", 2, COL_MODEL + "_random", - "E 350-random"))); - - protected static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(List.of( - CatalogHelpers.createAirbyteStream( - MODELS_STREAM_NAME, - MODELS_SCHEMA, - Field.of(COL_ID, JsonSchemaType.INTEGER), - Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), - Field.of(COL_MODEL, JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))))); - protected static final ConfiguredAirbyteCatalog CONFIGURED_CATALOG = CatalogHelpers - .toDefaultConfiguredCatalog(CATALOG); - - // set all streams to incremental. - static { - CONFIGURED_CATALOG.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); - } - - protected static final List MODEL_RECORDS = ImmutableList.of( + static protected final List MODEL_RECORDS = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 11, COL_MAKE_ID, 1, COL_MODEL, "Fiesta")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 12, COL_MAKE_ID, 1, COL_MODEL, "Focus")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 13, COL_MAKE_ID, 1, COL_MODEL, "Ranger")), @@ -114,87 +72,134 @@ public abstract class CdcSourceTest { Jsons.jsonNode(ImmutableMap.of(COL_ID, 15, COL_MAKE_ID, 2, COL_MODEL, "A 220")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 16, COL_MAKE_ID, 2, COL_MODEL, "E 350"))); - protected void setup() throws SQLException { - createAndPopulateTables(); - } + static protected final String RANDOM_TABLE_NAME = MODELS_STREAM_NAME + "_random"; - private void createAndPopulateTables() { - createAndPopulateActualTable(); - createAndPopulateRandomTable(); - } + static protected final List MODEL_RECORDS_RANDOM = MODEL_RECORDS.stream() + .map(r -> Jsons.jsonNode(ImmutableMap.of( + COL_ID + "_random", r.get(COL_ID).asInt() * 1000, + COL_MAKE_ID + "_random", r.get(COL_MAKE_ID), + COL_MODEL + "_random", r.get(COL_MODEL).asText() + "-random"))) + .toList(); - protected void executeQuery(final String query) { - try { - getDatabase().query( - ctx -> ctx - .execute(query)); - } catch (final SQLException e) { - throw new RuntimeException(e); - } - } + protected T testdb; - public String columnClause(final Map columnsWithDataType, final Optional primaryKey) { - final StringBuilder columnClause = new StringBuilder(); - int i = 0; - for (final Map.Entry column : columnsWithDataType.entrySet()) { - columnClause.append(column.getKey()); - columnClause.append(" "); - columnClause.append(column.getValue()); - if (i < (columnsWithDataType.size() - 1)) { - columnClause.append(","); - columnClause.append(" "); - } - i++; - } - primaryKey.ifPresent(s -> columnClause.append(", PRIMARY KEY (").append(s).append(")")); + protected String createTableSqlFmt() { + return "CREATE TABLE %s.%s(%s);"; + } - return columnClause.toString(); + protected String createSchemaSqlFmt() { + return "CREATE SCHEMA %s;"; } - public void createTable(final String schemaName, final String tableName, final String columnClause) { - executeQuery(createTableQuery(schemaName, tableName, columnClause)); + protected String modelsSchema() { + return "models_schema"; } - public String createTableQuery(final String schemaName, final String tableName, final String columnClause) { - return String.format("CREATE TABLE %s.%s(%s);", schemaName, tableName, columnClause); + /** + * The schema of a random table which is used as a new table in snapshot test + */ + protected String randomSchema() { + return "models_schema_random"; } - public void createSchema(final String schemaName) { - executeQuery(createSchemaQuery(schemaName)); + protected AirbyteCatalog getCatalog() { + return new AirbyteCatalog().withStreams(List.of( + CatalogHelpers.createAirbyteStream( + MODELS_STREAM_NAME, + modelsSchema(), + Field.of(COL_ID, JsonSchemaType.INTEGER), + Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), + Field.of(COL_MODEL, JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))))); } - public String createSchemaQuery(final String schemaName) { - return "CREATE DATABASE " + schemaName + ";"; + protected ConfiguredAirbyteCatalog getConfiguredCatalog() { + final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog()); + configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); + return configuredCatalog; } - private void createAndPopulateActualTable() { - createSchema(MODELS_SCHEMA); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME, - columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); + protected abstract T createTestDatabase(); + + protected abstract S source(); + + protected abstract JsonNode config(); + + protected abstract CdcTargetPosition cdcLatestTargetPosition(); + + protected abstract CdcTargetPosition extractPosition(final JsonNode record); + + protected abstract void assertNullCdcMetaData(final JsonNode data); + + protected abstract void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull); + + protected abstract void removeCDCColumns(final ObjectNode data); + + protected abstract void addCdcMetadataColumns(final AirbyteStream stream); + + protected abstract void addCdcDefaultCursorField(final AirbyteStream stream); + + protected abstract void assertExpectedStateMessages(final List stateMessages); + + @BeforeEach + protected void setup() { + testdb = createTestDatabase(); + + // create and populate actual table + final var actualColumns = ImmutableMap.of( + COL_ID, "INTEGER", + COL_MAKE_ID, "INTEGER", + COL_MODEL, "VARCHAR(200)"); + testdb + .with(createSchemaSqlFmt(), modelsSchema()) + .with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME, columnClause(actualColumns, Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS) { writeModelRecord(recordJson); } - } - /** - * This database and table is not part of Airbyte sync. It is being created just to make sure the - * databases not being synced by Airbyte are not causing issues with our debezium logic - */ - private void createAndPopulateRandomTable() { - if (!randomTableSchema().equals(MODELS_SCHEMA)) { - createSchema(randomTableSchema()); + // Create and populate random table. + // This table is not part of Airbyte sync. It is being created just to make sure the schemas not + // being synced by Airbyte are not causing issues with our debezium logic. + final var randomColumns = ImmutableMap.of( + COL_ID + "_random", "INTEGER", + COL_MAKE_ID + "_random", "INTEGER", + COL_MODEL + "_random", "VARCHAR(200)"); + if (!randomSchema().equals(modelsSchema())) { + testdb.with(createSchemaSqlFmt(), randomSchema()); } - createTable(randomTableSchema(), MODELS_STREAM_NAME + "_random", - columnClause(ImmutableMap.of(COL_ID + "_random", "INTEGER", COL_MAKE_ID + "_random", "INTEGER", COL_MODEL + "_random", "VARCHAR(200)"), - Optional.of(COL_ID + "_random"))); + testdb.with(createTableSqlFmt(), randomSchema(), RANDOM_TABLE_NAME, columnClause(randomColumns, Optional.of(COL_ID + "_random"))); for (final JsonNode recordJson : MODEL_RECORDS_RANDOM) { - writeRecords(recordJson, randomTableSchema(), MODELS_STREAM_NAME + "_random", + writeRecords(recordJson, randomSchema(), RANDOM_TABLE_NAME, COL_ID + "_random", COL_MAKE_ID + "_random", COL_MODEL + "_random"); } } + @AfterEach + protected void tearDown() { + testdb.close(); + } + + protected String columnClause(final Map columnsWithDataType, final Optional primaryKey) { + final StringBuilder columnClause = new StringBuilder(); + int i = 0; + for (final Map.Entry column : columnsWithDataType.entrySet()) { + columnClause.append(column.getKey()); + columnClause.append(" "); + columnClause.append(column.getValue()); + if (i < (columnsWithDataType.size() - 1)) { + columnClause.append(","); + columnClause.append(" "); + } + i++; + } + primaryKey.ifPresent(s -> columnClause.append(", PRIMARY KEY (").append(s).append(")")); + + return columnClause.toString(); + } + protected void writeModelRecord(final JsonNode recordJson) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL); + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL); } protected void writeRecords( @@ -204,14 +209,13 @@ protected void writeRecords( final String idCol, final String makeIdCol, final String modelCol) { - executeQuery( - String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", dbName, streamName, - idCol, makeIdCol, modelCol, - recordJson.get(idCol).asInt(), recordJson.get(makeIdCol).asInt(), - recordJson.get(modelCol).asText())); + testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", dbName, streamName, + idCol, makeIdCol, modelCol, + recordJson.get(idCol).asInt(), recordJson.get(makeIdCol).asInt(), + recordJson.get(modelCol).asText()); } - protected static Set removeDuplicates(final Set messages) { + static protected Set removeDuplicates(final Set messages) { final Set existingDataRecordsWithoutUpdated = new HashSet<>(); final Set output = new HashSet<>(); @@ -272,7 +276,7 @@ protected void assertExpectedRecords(final Set expectedRecords, final private void assertExpectedRecords(final Set expectedRecords, final Set actualRecords, final Set cdcStreams) { - assertExpectedRecords(expectedRecords, actualRecords, cdcStreams, STREAM_NAMES, MODELS_SCHEMA); + assertExpectedRecords(expectedRecords, actualRecords, cdcStreams, STREAM_NAMES, modelsSchema()); } protected void assertExpectedRecords(final Set expectedRecords, @@ -309,7 +313,7 @@ protected void assertExpectedRecords(final Set expectedRecords, @DisplayName("On the first sync, produce returns records that exist in the database.") void testExistingData() throws Exception { final CdcTargetPosition targetPosition = cdcLatestTargetPosition(); - final AutoCloseableIterator read = getSource().read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read = source().read(config(), getConfiguredCatalog(), null); final List actualRecords = AutoCloseableIterators.toListAndClose(read); final Set recordMessages = extractRecordMessages(actualRecords); @@ -332,19 +336,17 @@ protected void compareTargetPositionFromTheRecordsWithTargetPostionGeneratedBefo @Test @DisplayName("When a record is deleted, produces a deletion record.") void testDelete() throws Exception { - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final List stateMessages1 = extractStateMessages(actualRecords1); assertExpectedStateMessages(stateMessages1); - executeQuery(String - .format("DELETE FROM %s.%s WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, - 11)); + testdb.with("DELETE FROM %s.%s WHERE %s = %s", modelsSchema(), MODELS_STREAM_NAME, COL_ID, 11); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(stateMessages1.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List recordMessages2 = new ArrayList<>( extractRecordMessages(actualRecords2)); @@ -363,19 +365,18 @@ protected void assertExpectedStateMessagesFromIncrementalSync(final List read1 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final List stateMessages1 = extractStateMessages(actualRecords1); assertExpectedStateMessages(stateMessages1); - executeQuery(String - .format("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", MODELS_SCHEMA, MODELS_STREAM_NAME, - COL_MODEL, updatedModel, COL_ID, 11)); + testdb.with("UPDATE %s.%s SET %s = '%s' WHERE %s = %s", modelsSchema(), MODELS_STREAM_NAME, + COL_MODEL, updatedModel, COL_ID, 11); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(stateMessages1.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List recordMessages2 = new ArrayList<>( extractRecordMessages(actualRecords2)); @@ -402,8 +403,8 @@ protected void testRecordsProducedDuringAndAfterSync() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -422,8 +423,8 @@ protected void testRecordsProducedDuringAndAfterSync() throws Exception { } final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -457,7 +458,7 @@ protected void assertExpectedStateMessagesForRecordsProducedDuringAndAfterSync(f @Test @DisplayName("When both incremental CDC and full refresh are configured for different streams in a sync, the data is replicated as expected.") void testCdcAndFullRefreshInSameSync() throws Exception { - final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(CONFIGURED_CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); final List MODEL_RECORDS_2 = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), @@ -467,18 +468,17 @@ void testCdcAndFullRefreshInSameSync() throws Exception { Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", - columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); + final var columns = ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"); + testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2", columnClause(columns, Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS_2) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", COL_ID, - COL_MAKE_ID, COL_MODEL); + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID, COL_MAKE_ID, COL_MODEL); } final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() .withStream(CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + modelsSchema(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)) @@ -491,8 +491,8 @@ void testCdcAndFullRefreshInSameSync() throws Exception { streams.add(airbyteStream); configuredCatalog.withStreams(streams); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final Set recordMessages1 = extractRecordMessages(actualRecords1); @@ -505,15 +505,15 @@ void testCdcAndFullRefreshInSameSync() throws Exception { recordMessages1, Collections.singleton(MODELS_STREAM_NAME), names, - MODELS_SCHEMA); + modelsSchema()); final JsonNode puntoRecord = Jsons .jsonNode(ImmutableMap.of(COL_ID, 100, COL_MAKE_ID, 3, COL_MODEL, "Punto")); writeModelRecord(puntoRecord); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(stateMessages1.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, state); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final Set recordMessages2 = extractRecordMessages(actualRecords2); @@ -525,17 +525,16 @@ void testCdcAndFullRefreshInSameSync() throws Exception { recordMessages2, Collections.singleton(MODELS_STREAM_NAME), names, - MODELS_SCHEMA); + modelsSchema()); } @Test @DisplayName("When no records exist, no records are returned.") void testNoData() throws Exception { - executeQuery(String.format("DELETE FROM %s.%s", MODELS_SCHEMA, MODELS_STREAM_NAME)); + testdb.with("DELETE FROM %s.%s", modelsSchema(), MODELS_STREAM_NAME); - final AutoCloseableIterator read = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read = source().read(config(), getConfiguredCatalog(), null); final List actualRecords = AutoCloseableIterators.toListAndClose(read); final Set recordMessages = extractRecordMessages(actualRecords); @@ -551,14 +550,14 @@ protected void assertExpectedStateMessagesForNoData(final List read1 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read1 = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final List stateMessagesFromFirstSync = extractStateMessages(actualRecords1); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessagesFromFirstSync.get(stateMessagesFromFirstSync.size() - 1))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator read2 = source() + .read(config(), getConfiguredCatalog(), state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final Set recordMessages2 = extractRecordMessages(actualRecords2); @@ -570,14 +569,14 @@ void testNoDataOnSecondSync() throws Exception { @Test void testCheck() throws Exception { - final AirbyteConnectionStatus status = getSource().check(getConfig()); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED); } @Test void testDiscover() throws Exception { final AirbyteCatalog expectedCatalog = expectedCatalogForDiscover(); - final AirbyteCatalog actualCatalog = getSource().discover(getConfig()); + final AirbyteCatalog actualCatalog = source().discover(config()); assertEquals( expectedCatalog.getStreams().stream().sorted(Comparator.comparing(AirbyteStream::getName)) @@ -588,8 +587,8 @@ void testDiscover() throws Exception { @Test public void newTableSnapshotTest() throws Exception { - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final Set recordsFromFirstBatch = extractRecordMessages( @@ -605,7 +604,7 @@ public void newTableSnapshotTest() throws Exception { .map(AirbyteStreamState::getStreamDescriptor) .collect(Collectors.toSet()); assertEquals(1, streamsInStateAfterFirstSyncCompletion.size()); - assertTrue(streamsInStateAfterFirstSyncCompletion.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))); + assertTrue(streamsInStateAfterFirstSyncCompletion.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()))); assertNotNull(stateMessageEmittedAfterFirstSyncCompletion.getData()); assertEquals((MODEL_RECORDS.size()), recordsFromFirstBatch.size()); @@ -616,8 +615,8 @@ public void newTableSnapshotTest() throws Exception { final ConfiguredAirbyteCatalog newTables = CatalogHelpers .toDefaultConfiguredCatalog(new AirbyteCatalog().withStreams(List.of( CatalogHelpers.createAirbyteStream( - MODELS_STREAM_NAME + "_random", - randomTableSchema(), + RANDOM_TABLE_NAME, + randomSchema(), Field.of(COL_ID + "_random", JsonSchemaType.NUMBER), Field.of(COL_MAKE_ID + "_random", JsonSchemaType.NUMBER), Field.of(COL_MODEL + "_random", JsonSchemaType.STRING)) @@ -626,7 +625,7 @@ public void newTableSnapshotTest() throws Exception { newTables.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); final List combinedStreams = new ArrayList<>(); - combinedStreams.addAll(CONFIGURED_CATALOG.getStreams()); + combinedStreams.addAll(getConfiguredCatalog().getStreams()); combinedStreams.addAll(newTables.getStreams()); final ConfiguredAirbyteCatalog updatedCatalog = new ConfiguredAirbyteCatalog().withStreams(combinedStreams); @@ -644,8 +643,8 @@ public void newTableSnapshotTest() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), updatedCatalog, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), updatedCatalog, state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -654,10 +653,10 @@ public void newTableSnapshotTest() throws Exception { final Map> recordsStreamWise = extractRecordMessagesStreamWise(dataFromSecondBatch); assertTrue(recordsStreamWise.containsKey(MODELS_STREAM_NAME)); - assertTrue(recordsStreamWise.containsKey(MODELS_STREAM_NAME + "_random")); + assertTrue(recordsStreamWise.containsKey(RANDOM_TABLE_NAME)); final Set recordsForModelsStreamFromSecondBatch = recordsStreamWise.get(MODELS_STREAM_NAME); - final Set recordsForModelsRandomStreamFromSecondBatch = recordsStreamWise.get(MODELS_STREAM_NAME + "_random"); + final Set recordsForModelsRandomStreamFromSecondBatch = recordsStreamWise.get(RANDOM_TABLE_NAME); assertEquals((MODEL_RECORDS_RANDOM.size()), recordsForModelsRandomStreamFromSecondBatch.size()); assertEquals(20, recordsForModelsStreamFromSecondBatch.size()); @@ -665,8 +664,8 @@ public void newTableSnapshotTest() throws Exception { recordsForModelsRandomStreamFromSecondBatch.stream().map(AirbyteRecordMessage::getStream).collect( Collectors.toSet()), Sets - .newHashSet(MODELS_STREAM_NAME + "_random"), - randomTableSchema()); + .newHashSet(RANDOM_TABLE_NAME), + randomSchema()); assertExpectedRecords(recordsWritten, recordsForModelsStreamFromSecondBatch); /* @@ -686,14 +685,14 @@ public void newTableSnapshotTest() throws Exception { .jsonNode(ImmutableMap .of(COL_ID + "_random", 11000 + recordsCreated, COL_MAKE_ID + "_random", 1 + recordsCreated, COL_MODEL + "_random", "Fiesta-random" + recordsCreated)); - writeRecords(record2, randomTableSchema(), MODELS_STREAM_NAME + "_random", + writeRecords(record2, randomSchema(), RANDOM_TABLE_NAME, COL_ID + "_random", COL_MAKE_ID + "_random", COL_MODEL + "_random"); recordsWrittenInRandomTable.add(record2); } final JsonNode state2 = stateAfterSecondBatch.get(stateAfterSecondBatch.size() - 1).getData(); - final AutoCloseableIterator thirdBatchIterator = getSource() - .read(getConfig(), updatedCatalog, state2); + final AutoCloseableIterator thirdBatchIterator = source() + .read(config(), updatedCatalog, state2); final List dataFromThirdBatch = AutoCloseableIterators .toListAndClose(thirdBatchIterator); @@ -710,16 +709,17 @@ public void newTableSnapshotTest() throws Exception { .collect(Collectors.toSet()); assertTrue( streamsInSyncCompletionStateAfterThirdSync.contains( - new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomTableSchema()))); - assertTrue(streamsInSyncCompletionStateAfterThirdSync.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))); + new StreamDescriptor().withName(RANDOM_TABLE_NAME).withNamespace(randomSchema()))); + assertTrue( + streamsInSyncCompletionStateAfterThirdSync.contains(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()))); assertNotNull(stateMessageEmittedAfterThirdSyncCompletion.getData()); final Map> recordsStreamWiseFromThirdBatch = extractRecordMessagesStreamWise(dataFromThirdBatch); assertTrue(recordsStreamWiseFromThirdBatch.containsKey(MODELS_STREAM_NAME)); - assertTrue(recordsStreamWiseFromThirdBatch.containsKey(MODELS_STREAM_NAME + "_random")); + assertTrue(recordsStreamWiseFromThirdBatch.containsKey(RANDOM_TABLE_NAME)); final Set recordsForModelsStreamFromThirdBatch = recordsStreamWiseFromThirdBatch.get(MODELS_STREAM_NAME); - final Set recordsForModelsRandomStreamFromThirdBatch = recordsStreamWiseFromThirdBatch.get(MODELS_STREAM_NAME + "_random"); + final Set recordsForModelsRandomStreamFromThirdBatch = recordsStreamWiseFromThirdBatch.get(RANDOM_TABLE_NAME); assertEquals(20, recordsForModelsStreamFromThirdBatch.size()); assertEquals(20, recordsForModelsRandomStreamFromThirdBatch.size()); @@ -728,8 +728,8 @@ public void newTableSnapshotTest() throws Exception { recordsForModelsRandomStreamFromThirdBatch.stream().map(AirbyteRecordMessage::getStream).collect( Collectors.toSet()), Sets - .newHashSet(MODELS_STREAM_NAME + "_random"), - randomTableSchema()); + .newHashSet(RANDOM_TABLE_NAME), + randomSchema()); } protected void assertStateMessagesForNewTableSnapshotTest(final List stateMessages, @@ -745,8 +745,8 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List streams = expectedCatalog.getStreams(); // stream with PK @@ -779,7 +779,7 @@ protected AirbyteCatalog expectedCatalogForDiscover() { final AirbyteStream streamWithoutPK = CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + modelsSchema(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)); @@ -789,8 +789,8 @@ protected AirbyteCatalog expectedCatalogForDiscover() { addCdcMetadataColumns(streamWithoutPK); final AirbyteStream randomStream = CatalogHelpers.createAirbyteStream( - MODELS_STREAM_NAME + "_random", - randomTableSchema(), + RANDOM_TABLE_NAME, + randomSchema(), Field.of(COL_ID + "_random", JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID + "_random", JsonSchemaType.INTEGER), Field.of(COL_MODEL + "_random", JsonSchemaType.STRING)) @@ -807,31 +807,4 @@ protected AirbyteCatalog expectedCatalogForDiscover() { return expectedCatalog; } - /** - * The schema of a random table which is used as a new table in snapshot test - */ - protected abstract String randomTableSchema(); - - protected abstract CdcTargetPosition cdcLatestTargetPosition(); - - protected abstract CdcTargetPosition extractPosition(final JsonNode record); - - protected abstract void assertNullCdcMetaData(final JsonNode data); - - protected abstract void assertCdcMetaData(final JsonNode data, final boolean deletedAtNull); - - protected abstract void removeCDCColumns(final ObjectNode data); - - protected abstract void addCdcMetadataColumns(final AirbyteStream stream); - - protected abstract void addCdcDefaultCursorField(final AirbyteStream stream); - - protected abstract Source getSource(); - - protected abstract JsonNode getConfig(); - - protected abstract Database getDatabase(); - - protected abstract void assertExpectedStateMessages(final List stateMessages); - } diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java index bb613be8ac02..fea6f5709024 100644 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/integrations/source/jdbc/test/JdbcSourceAcceptanceTest.java @@ -4,7 +4,6 @@ package io.airbyte.cdk.integrations.source.jdbc.test; -import static io.airbyte.cdk.db.jdbc.JdbcUtils.getDefaultSourceOperations; import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier; import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -17,20 +16,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcSourceOperations; +import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; -import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; import io.airbyte.cdk.integrations.base.Source; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils; import io.airbyte.cdk.integrations.source.relationaldb.models.DbState; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; +import io.airbyte.cdk.testutils.TestDatabase; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; @@ -60,134 +54,97 @@ import java.util.Map; import java.util.Optional; import java.util.Set; -import java.util.function.Function; import java.util.stream.Collectors; -import javax.sql.DataSource; import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** * Tests that should be run on all Sources that extend the AbstractJdbcSource. */ -// How leverage these tests: -// 1. Extend this class in the test module of the Source. -// 2. From the class that extends this one, you MUST call super.setup() in a @BeforeEach method. -// Otherwise you'll see many NPE issues. Your before each should also handle providing a fresh -// database between each test. -// 3. From the class that extends this one, implement a @AfterEach that cleans out the database -// between each test. -// 4. Then implement the abstract methods documented below. @SuppressFBWarnings( value = {"MS_SHOULD_BE_FINAL"}, - justification = "The static variables are updated in sub classes for convenience, and cannot be final.") -public abstract class JdbcSourceAcceptanceTest { - - // schema name must be randomized for each test run, - // otherwise parallel runs can interfere with each other - public static String SCHEMA_NAME = Strings.addRandomSuffix("jdbc_integration_test1", "_", 5).toLowerCase(); - public static String SCHEMA_NAME2 = Strings.addRandomSuffix("jdbc_integration_test2", "_", 5).toLowerCase(); - public static Set TEST_SCHEMAS = Set.of(SCHEMA_NAME, SCHEMA_NAME2); - - public static String TABLE_NAME = "id_and_name"; - public static String TABLE_NAME_WITH_SPACES = "id and name"; - public static String TABLE_NAME_WITHOUT_PK = "id_and_name_without_pk"; - public static String TABLE_NAME_COMPOSITE_PK = "full_name_composite_pk"; - public static String TABLE_NAME_WITHOUT_CURSOR_TYPE = "table_without_cursor_type"; - public static String TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE = "table_with_null_cursor_type"; + justification = "The static variables are updated in subclasses for convenience, and cannot be final.") +abstract public class JdbcSourceAcceptanceTest> { + + static protected String SCHEMA_NAME = "jdbc_integration_test1"; + static protected String SCHEMA_NAME2 = "jdbc_integration_test2"; + static protected Set TEST_SCHEMAS = Set.of(SCHEMA_NAME, SCHEMA_NAME2); + + static protected String TABLE_NAME = "id_and_name"; + static protected String TABLE_NAME_WITH_SPACES = "id and name"; + static protected String TABLE_NAME_WITHOUT_PK = "id_and_name_without_pk"; + static protected String TABLE_NAME_COMPOSITE_PK = "full_name_composite_pk"; + static protected String TABLE_NAME_WITHOUT_CURSOR_TYPE = "table_without_cursor_type"; + static protected String TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE = "table_with_null_cursor_type"; // this table is used in testing incremental sync with concurrent insertions - public static String TABLE_NAME_AND_TIMESTAMP = "name_and_timestamp"; - - public static String COL_ID = "id"; - public static String COL_NAME = "name"; - public static String COL_UPDATED_AT = "updated_at"; - public static String COL_FIRST_NAME = "first_name"; - public static String COL_LAST_NAME = "last_name"; - public static String COL_LAST_NAME_WITH_SPACE = "last name"; - public static String COL_CURSOR = "cursor_field"; - public static String COL_TIMESTAMP = "timestamp"; - public static String COL_TIMESTAMP_TYPE = "TIMESTAMP"; - public static Number ID_VALUE_1 = 1; - public static Number ID_VALUE_2 = 2; - public static Number ID_VALUE_3 = 3; - public static Number ID_VALUE_4 = 4; - public static Number ID_VALUE_5 = 5; - - public static String DROP_SCHEMA_QUERY = "DROP SCHEMA IF EXISTS %s CASCADE"; - public static String COLUMN_CLAUSE_WITH_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; - public static String COLUMN_CLAUSE_WITHOUT_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; - public static String COLUMN_CLAUSE_WITH_COMPOSITE_PK = + static protected String TABLE_NAME_AND_TIMESTAMP = "name_and_timestamp"; + + static protected String COL_ID = "id"; + static protected String COL_NAME = "name"; + static protected String COL_UPDATED_AT = "updated_at"; + static protected String COL_FIRST_NAME = "first_name"; + static protected String COL_LAST_NAME = "last_name"; + static protected String COL_LAST_NAME_WITH_SPACE = "last name"; + static protected String COL_CURSOR = "cursor_field"; + static protected String COL_TIMESTAMP = "timestamp"; + static protected String COL_TIMESTAMP_TYPE = "TIMESTAMP"; + static protected Number ID_VALUE_1 = 1; + static protected Number ID_VALUE_2 = 2; + static protected Number ID_VALUE_3 = 3; + static protected Number ID_VALUE_4 = 4; + static protected Number ID_VALUE_5 = 5; + + static protected String DROP_SCHEMA_QUERY = "DROP SCHEMA IF EXISTS %s CASCADE"; + static protected String COLUMN_CLAUSE_WITH_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; + static protected String COLUMN_CLAUSE_WITHOUT_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; + static protected String COLUMN_CLAUSE_WITH_COMPOSITE_PK = "first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL"; - public static String CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s bit NOT NULL);"; - public static String INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(0);"; - public static String CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s VARCHAR(20));"; - public static String INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES('Hello world :)');"; - public static String INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY = "INSERT INTO %s (name, timestamp) VALUES ('%s', '%s')"; + static protected String CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s bit NOT NULL);"; + static protected String INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(0);"; + static protected String CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s VARCHAR(20));"; + static protected String INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES('Hello world :)');"; + static protected String INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY = "INSERT INTO %s (name, timestamp) VALUES ('%s', '%s')"; - public JsonNode config; - public DataSource dataSource; - public JdbcDatabase database; - public JdbcSourceOperations sourceOperations = getSourceOperations(); - public Source source; - public static String streamName; + protected T testdb; - /** - * These tests write records without specifying a namespace (schema name). They will be written into - * whatever the default schema is for the database. When they are discovered they will be namespaced - * by the schema name (e.g. .). Thus the source needs to tell the - * tests what that default schema name is. If the database does not support schemas, then database - * name should used instead. - * - * @return name that will be used to namespace the record. - */ - public abstract boolean supportsSchemas(); + protected String streamName() { + return TABLE_NAME; + } /** * A valid configuration to connect to a test database. * * @return config */ - public abstract JsonNode getConfig(); - - /** - * Full qualified class name of the JDBC driver for the database. - * - * @return driver - */ - public abstract String getDriverClass(); + abstract protected JsonNode config(); /** * An instance of the source that should be tests. * * @return abstract jdbc source */ - public abstract AbstractJdbcSource getJdbcSource(); + abstract protected S source(); /** - * In some cases the Source that is being tested may be an AbstractJdbcSource, but because it is - * decorated, Java cannot recognize it as such. In these cases, as a workaround a user can choose to - * override getJdbcSource and have it return null. Then they can override this method with the - * decorated source AND override getToDatabaseConfigFunction with the appropriate - * toDatabaseConfigFunction that is hidden behind the decorator. + * Creates a TestDatabase instance to be used in {@link #setup()}. * - * @return source + * @return TestDatabase instance to use for test case. */ - public Source getSource() { - return getJdbcSource(); - } + abstract protected T createTestDatabase(); /** - * See getSource() for when to override this method. + * These tests write records without specifying a namespace (schema name). They will be written into + * whatever the default schema is for the database. When they are discovered they will be namespaced + * by the schema name (e.g. .). Thus the source needs to tell the + * tests what that default schema name is. If the database does not support schemas, then database + * name should used instead. * - * @return a function that maps a source's config to a jdbc config. + * @return name that will be used to namespace the record. */ - public Function getToDatabaseConfigFunction() { - return getJdbcSource()::toDatabaseConfig; - } - - protected JdbcSourceOperations getSourceOperations() { - return getDefaultSourceOperations(); - } + abstract protected boolean supportsSchemas(); protected String createTableQuery(final String tableName, final String columnClause, final String primaryKeyClause) { return String.format("CREATE TABLE %s(%s %s %s)", @@ -211,100 +168,46 @@ protected String primaryKeyClause(final List columns) { return clause.toString(); } - protected String getJdbcParameterDelimiter() { - return "&"; - } - + @BeforeEach public void setup() throws Exception { - source = getSource(); - config = getConfig(); - final JsonNode jdbcConfig = getToDatabaseConfigFunction().apply(config); - - streamName = TABLE_NAME; - - dataSource = getDataSource(jdbcConfig); - - database = new StreamingJdbcDatabase(dataSource, - getDefaultSourceOperations(), - AdaptiveStreamingQueryConfig::new); - + testdb = createTestDatabase(); if (supportsSchemas()) { createSchemas(); } - - if (getDriverClass().toLowerCase().contains("oracle")) { - database.execute(connection -> connection.createStatement() - .execute("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD'")); + if (testdb.getDatabaseDriver().equals(DatabaseDriver.ORACLE)) { + testdb.with("ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD'"); } - - database.execute(connection -> { - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, - primaryKeyClause(Collections.singletonList("id")))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), - COLUMN_CLAUSE_WITHOUT_PK, "")); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), - COLUMN_CLAUSE_WITH_COMPOSITE_PK, - primaryKeyClause(List.of("first_name", "last_name")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('first' ,'picard', '2004-10-19')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('second', 'crusher', '2005-10-19')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at) VALUES ('third', 'vash', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - - }); - } - - protected void maybeSetShorterConnectionTimeout() { + testdb + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, primaryKeyClause(Collections.singletonList("id")))) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), COLUMN_CLAUSE_WITHOUT_PK, "")) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1, 'picard', '2004-10-19')", getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), COLUMN_CLAUSE_WITH_COMPOSITE_PK, + primaryKeyClause(List.of("first_name", "last_name")))) + .with("INSERT INTO %s(first_name, last_name, updated_at) VALUES ('first', 'picard', '2004-10-19')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with("INSERT INTO %s(first_name, last_name, updated_at) VALUES ('second', 'crusher', '2005-10-19')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with("INSERT INTO %s(first_name, last_name, updated_at) VALUES ('third', 'vash', '2006-10-19')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)); + } + + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { // Optionally implement this to speed up test cases which will result in a connection timeout. } - protected DataSource getDataSource(final JsonNode jdbcConfig) { - return DataSourceFactory.create( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - getDriverClass(), - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - JdbcUtils.parseJdbcParameters(jdbcConfig, JdbcUtils.CONNECTION_PROPERTIES_KEY, getJdbcParameterDelimiter())); - } - - public void tearDown() throws SQLException { - dropSchemas(); + @AfterEach + public void tearDown() { + testdb.close(); } @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final String resourceString = MoreResources.readResource("spec.json"); final ConnectorSpecification expected = Jsons.deserialize(resourceString, ConnectorSpecification.class); @@ -313,22 +216,23 @@ void testSpec() throws Exception { @Test void testCheckSuccess() throws Exception { - final AirbyteConnectionStatus actual = source.check(config); + final AirbyteConnectionStatus actual = source().check(config()); final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); assertEquals(expected, actual); } @Test void testCheckFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus actual = source.check(config); + final AirbyteConnectionStatus actual = source().check(config); assertEquals(Status.FAILED, actual.getStatus()); } @Test void testDiscover() throws Exception { - final AirbyteCatalog actual = filterOutOtherSchemas(source.discover(config)); + final AirbyteCatalog actual = filterOutOtherSchemas(source().discover(config())); final AirbyteCatalog expected = getCatalog(getDefaultNamespace()); assertEquals(expected.getStreams().size(), actual.getStreams().size()); actual.getStreams().forEach(actualStream -> { @@ -343,13 +247,9 @@ void testDiscover() throws Exception { @Test protected void testDiscoverWithNonCursorFields() throws Exception { - database.execute(connection -> { - connection.createStatement() - .execute(String.format(CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE), COL_CURSOR)); - connection.createStatement().execute(String.format(INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY, - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE))); - }); - final AirbyteCatalog actual = filterOutOtherSchemas(source.discover(config)); + testdb.with(CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE), COL_CURSOR) + .with(INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITHOUT_CURSOR_TYPE)); + final AirbyteCatalog actual = filterOutOtherSchemas(source().discover(config())); final AirbyteStream stream = actual.getStreams().stream().filter(s -> s.getName().equalsIgnoreCase(TABLE_NAME_WITHOUT_CURSOR_TYPE)).findFirst().orElse(null); assertNotNull(stream); @@ -360,14 +260,9 @@ protected void testDiscoverWithNonCursorFields() throws Exception { @Test protected void testDiscoverWithNullableCursorFields() throws Exception { - database.execute(connection -> { - connection.createStatement() - .execute(String.format(CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE), - COL_CURSOR)); - connection.createStatement().execute(String.format(INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, - getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE))); - }); - final AirbyteCatalog actual = filterOutOtherSchemas(source.discover(config)); + testdb.with(CREATE_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE), COL_CURSOR) + .with(INSERT_TABLE_WITH_NULLABLE_CURSOR_TYPE_QUERY, getFullyQualifiedTableName(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE)); + final AirbyteCatalog actual = filterOutOtherSchemas(source().discover(config())); final AirbyteStream stream = actual.getStreams().stream().filter(s -> s.getName().equalsIgnoreCase(TABLE_NAME_WITH_NULLABLE_CURSOR_TYPE)).findFirst().orElse(null); assertNotNull(stream); @@ -394,28 +289,22 @@ protected AirbyteCatalog filterOutOtherSchemas(final AirbyteCatalog catalog) { @Test void testDiscoverWithMultipleSchemas() throws Exception { // clickhouse and mysql do not have a concept of schemas, so this test does not make sense for them. - String driverClass = getDriverClass().toLowerCase(); - if (driverClass.contains("mysql") || driverClass.contains("clickhouse") || driverClass.contains("teradata")) { - return; + switch (testdb.getDatabaseDriver()) { + case MYSQL, CLICKHOUSE, TERADATA: + return; } // add table and data to a separate schema. - database.execute(connection -> { - connection.createStatement().execute( - String.format("CREATE TABLE %s(id VARCHAR(200) NOT NULL, name VARCHAR(200) NOT NULL)", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES ('1','picard')", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES ('2', 'crusher')", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES ('3', 'vash')", - RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME))); - }); + testdb.with("CREATE TABLE %s(id VARCHAR(200) NOT NULL, name VARCHAR(200) NOT NULL)", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)) + .with("INSERT INTO %s(id, name) VALUES ('1','picard')", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)) + .with("INSERT INTO %s(id, name) VALUES ('2', 'crusher')", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)) + .with("INSERT INTO %s(id, name) VALUES ('3', 'vash')", + RelationalDbQueryUtils.getFullyQualifiedTableName(SCHEMA_NAME2, TABLE_NAME)); - final AirbyteCatalog actual = source.discover(config); + final AirbyteCatalog actual = source().discover(config()); final AirbyteCatalog expected = getCatalog(getDefaultNamespace()); final List catalogStreams = new ArrayList<>(); @@ -438,7 +327,7 @@ void testDiscoverWithMultipleSchemas() throws Exception { void testReadSuccess() throws Exception { final List actualMessages = MoreIterators.toList( - source.read(config, getConfiguredCatalogWithOneStream(getDefaultNamespace()), null)); + source().read(config(), getConfiguredCatalogWithOneStream(getDefaultNamespace()), null)); setEmittedAtToNull(actualMessages); final List expectedMessages = getTestMessages(); @@ -449,9 +338,9 @@ void testReadSuccess() throws Exception { @Test void testReadOneColumn() throws Exception { final ConfiguredAirbyteCatalog catalog = CatalogHelpers - .createConfiguredAirbyteCatalog(streamName, getDefaultNamespace(), Field.of(COL_ID, JsonSchemaType.NUMBER)); + .createConfiguredAirbyteCatalog(streamName(), getDefaultNamespace(), Field.of(COL_ID, JsonSchemaType.NUMBER)); final List actualMessages = MoreIterators - .toList(source.read(config, catalog, null)); + .toList(source().read(config(), catalog, null)); setEmittedAtToNull(actualMessages); @@ -481,23 +370,12 @@ void testReadMultipleTables() throws Exception { final List expectedMessages = new ArrayList<>(getTestMessages()); for (int i = 2; i < 10; i++) { - final int iFinal = i; - final String streamName2 = streamName + i; - database.execute(connection -> { - connection.createStatement() - .execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME + iFinal), - "id INTEGER, name VARCHAR(200)", "")); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES (1,'picard')", - getFullyQualifiedTableName(TABLE_NAME + iFinal))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES (2, 'crusher')", - getFullyQualifiedTableName(TABLE_NAME + iFinal))); - connection.createStatement() - .execute(String.format("INSERT INTO %s(id, name) VALUES (3, 'vash')", - getFullyQualifiedTableName(TABLE_NAME + iFinal))); - }); + final String streamName2 = streamName() + i; + final String tableName = getFullyQualifiedTableName(TABLE_NAME + i); + testdb.with(createTableQuery(tableName, "id INTEGER, name VARCHAR(200)", "")) + .with("INSERT INTO %s(id, name) VALUES (1,'picard')", tableName) + .with("INSERT INTO %s(id, name) VALUES (2, 'crusher')", tableName) + .with("INSERT INTO %s(id, name) VALUES (3, 'vash')", tableName); catalog.getStreams().add(CatalogHelpers.createConfiguredAirbyteStream( streamName2, getDefaultNamespace(), @@ -508,7 +386,7 @@ void testReadMultipleTables() throws Exception { } final List actualMessages = MoreIterators - .toList(source.read(config, catalog, null)); + .toList(source().read(config(), catalog, null)); setEmittedAtToNull(actualMessages); @@ -541,7 +419,7 @@ void testTablesWithQuoting() throws Exception { getConfiguredCatalogWithOneStream(getDefaultNamespace()).getStreams().get(0), streamForTableWithSpaces)); final List actualMessages = MoreIterators - .toList(source.read(config, catalog, null)); + .toList(source().read(config(), catalog, null)); setEmittedAtToNull(actualMessages); @@ -577,7 +455,7 @@ void testReadFailure() { .withStreams(List.of(spiedAbStream)); doCallRealMethod().doThrow(new RuntimeException()).when(spiedAbStream).getStream(); - assertThrows(RuntimeException.class, () -> source.read(config, catalog, null)); + assertThrows(RuntimeException.class, () -> source().read(config(), catalog, null)); } @Test @@ -665,6 +543,7 @@ void testIncrementalCursorChanges() throws Exception { @Test void testReadOneTableIncrementallyTwice() throws Exception { + final var config = config(); final String namespace = getDefaultNamespace(); final ConfiguredAirbyteCatalog configuredCatalog = getConfiguredCatalogWithOneStream(namespace); configuredCatalog.getStreams().forEach(airbyteStream -> { @@ -674,7 +553,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { }); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); + .toList(source().read(config, configuredCatalog, createEmptyState(streamName(), namespace))); final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() .filter(r -> r.getType() == Type.STATE).findFirst(); @@ -683,7 +562,7 @@ void testReadOneTableIncrementallyTwice() throws Exception { executeStatementReadIncrementallyTwice(); final List actualMessagesSecondSync = MoreIterators - .toList(source.read(config, configuredCatalog, extractState(stateAfterFirstSyncOptional.get()))); + .toList(source().read(config, configuredCatalog, extractState(stateAfterFirstSyncOptional.get()))); assertEquals(2, (int) actualMessagesSecondSync.stream().filter(r -> r.getType() == Type.RECORD).count()); @@ -696,33 +575,28 @@ void testReadOneTableIncrementallyTwice() throws Exception { assertTrue(actualMessagesSecondSync.containsAll(expectedMessages)); } - protected void executeStatementReadIncrementallyTwice() throws SQLException { - database.execute(connection -> { - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (4,'riker', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at) VALUES (5, 'data', '2006-10-19')", - getFullyQualifiedTableName(TABLE_NAME))); - }); + protected void executeStatementReadIncrementallyTwice() { + testdb + .with("INSERT INTO %s(id, name, updated_at) VALUES (4, 'riker', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (5, 'data', '2006-10-19')", getFullyQualifiedTableName(TABLE_NAME)); } protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); expectedMessages.add(new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_5, COL_NAME, "data", COL_UPDATED_AT, "2006-10-19"))))); final DbStreamState state = new DbStreamState() - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("5") @@ -734,20 +608,12 @@ protected List getExpectedAirbyteMessagesSecondSync(final String @Test void testReadMultipleTablesIncrementally() throws Exception { final String tableName2 = TABLE_NAME + 2; - final String streamName2 = streamName + 2; - database.execute(ctx -> { - ctx.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(tableName2), "id INTEGER, name VARCHAR(200)", "")); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name) VALUES (1,'picard')", - getFullyQualifiedTableName(tableName2))); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name) VALUES (2, 'crusher')", - getFullyQualifiedTableName(tableName2))); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name) VALUES (3, 'vash')", - getFullyQualifiedTableName(tableName2))); - }); + final String streamName2 = streamName() + 2; + final String fqTableName2 = getFullyQualifiedTableName(tableName2); + testdb.with(createTableQuery(fqTableName2, "id INTEGER, name VARCHAR(200)", "")) + .with("INSERT INTO %s(id, name) VALUES (1,'picard')", fqTableName2) + .with("INSERT INTO %s(id, name) VALUES (2, 'crusher')", fqTableName2) + .with("INSERT INTO %s(id, name) VALUES (3, 'vash')", fqTableName2); final String namespace = getDefaultNamespace(); final ConfiguredAirbyteCatalog configuredCatalog = getConfiguredCatalogWithOneStream( @@ -764,7 +630,7 @@ void testReadMultipleTablesIncrementally() throws Exception { }); final List actualMessagesFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, createEmptyState(streamName, namespace))); + .toList(source().read(config(), configuredCatalog, createEmptyState(streamName(), namespace))); // get last state message. final Optional stateAfterFirstSyncOptional = actualMessagesFirstSync.stream() @@ -779,7 +645,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Represents the state after the first stream has been updated final List expectedStateStreams1 = List.of( new DbStreamState() - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("3") @@ -792,7 +658,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Represents the state after both streams have been updated final List expectedStateStreams2 = List.of( new DbStreamState() - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(List.of(COL_ID)) .withCursor("3") @@ -843,17 +709,14 @@ protected void incrementalCursorCheck( // See https://github.com/airbytehq/airbyte/issues/14732 for rationale and details. @Test public void testIncrementalWithConcurrentInsertion() throws Exception { - final String driverName = getDriverClass().toLowerCase(); final String namespace = getDefaultNamespace(); final String fullyQualifiedTableName = getFullyQualifiedTableName(TABLE_NAME_AND_TIMESTAMP); final String columnDefinition = String.format("name VARCHAR(200) NOT NULL, %s %s NOT NULL", COL_TIMESTAMP, COL_TIMESTAMP_TYPE); // 1st sync - database.execute(ctx -> { - ctx.createStatement().execute(createTableQuery(fullyQualifiedTableName, columnDefinition, "")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "a", "2021-01-01 00:00:00")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "b", "2021-01-01 00:00:00")); - }); + testdb.with(createTableQuery(fullyQualifiedTableName, columnDefinition, "")) + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "a", "2021-01-01 00:00:00") + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "b", "2021-01-01 00:00:00"); final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( new AirbyteCatalog().withStreams(List.of( @@ -870,7 +733,7 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { }); final List firstSyncActualMessages = MoreIterators.toList( - source.read(config, configuredCatalog, createEmptyState(TABLE_NAME_AND_TIMESTAMP, namespace))); + source().read(config(), configuredCatalog, createEmptyState(TABLE_NAME_AND_TIMESTAMP, namespace))); // cursor after 1st sync: 2021-01-01 00:00:00, count 2 final Optional firstSyncStateOptional = firstSyncActualMessages.stream().filter(r -> r.getType() == Type.STATE).findFirst(); @@ -886,19 +749,17 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { .map(r -> r.getRecord().getData().get(COL_NAME).asText()) .toList(); // teradata doesn't make insertion order guarantee when equal ordering value - if (driverName.contains("teradata")) { + if (testdb.getDatabaseDriver().equals(DatabaseDriver.TERADATA)) { assertThat(List.of("a", "b"), Matchers.containsInAnyOrder(firstSyncNames.toArray())); } else { assertEquals(List.of("a", "b"), firstSyncNames); } // 2nd sync - database.execute(ctx -> { - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "c", "2021-01-02 00:00:00")); - }); + testdb.with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "c", "2021-01-02 00:00:00"); final List secondSyncActualMessages = MoreIterators.toList( - source.read(config, configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, firstSyncState))); + source().read(config(), configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, firstSyncState))); // cursor after 2nd sync: 2021-01-02 00:00:00, count 1 final Optional secondSyncStateOptional = secondSyncActualMessages.stream().filter(r -> r.getType() == Type.STATE).findFirst(); @@ -916,14 +777,12 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { assertEquals(List.of("c"), secondSyncNames); // 3rd sync has records with duplicated cursors - database.execute(ctx -> { - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "d", "2021-01-02 00:00:00")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "e", "2021-01-02 00:00:00")); - ctx.createStatement().execute(String.format(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "f", "2021-01-03 00:00:00")); - }); + testdb.with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "d", "2021-01-02 00:00:00") + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "e", "2021-01-02 00:00:00") + .with(INSERT_TABLE_NAME_AND_TIMESTAMP_QUERY, fullyQualifiedTableName, "f", "2021-01-03 00:00:00"); final List thirdSyncActualMessages = MoreIterators.toList( - source.read(config, configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, secondSyncState))); + source().read(config(), configuredCatalog, createState(TABLE_NAME_AND_TIMESTAMP, namespace, secondSyncState))); // Cursor after 3rd sync is: 2021-01-03 00:00:00, count 1. final Optional thirdSyncStateOptional = thirdSyncActualMessages.stream().filter(r -> r.getType() == Type.STATE).findFirst(); @@ -942,12 +801,11 @@ public void testIncrementalWithConcurrentInsertion() throws Exception { .toList(); // teradata doesn't make insertion order guarantee when equal ordering value - if (driverName.contains("teradata")) { + if (testdb.getDatabaseDriver().equals(DatabaseDriver.TERADATA)) { assertThat(List.of("c", "d", "e", "f"), Matchers.containsInAnyOrder(thirdSyncExpectedNames.toArray())); } else { assertEquals(List.of("c", "d", "e", "f"), thirdSyncExpectedNames); } - } protected JsonNode getStateData(final AirbyteMessage airbyteMessage, final String streamName) { @@ -989,7 +847,7 @@ protected void incrementalCursorCheck( final DbStreamState dbStreamState = buildStreamState(airbyteStream, initialCursorField, initialCursorValue); final List actualMessages = MoreIterators - .toList(source.read(config, configuredCatalog, Jsons.jsonNode(createState(List.of(dbStreamState))))); + .toList(source().read(config(), configuredCatalog, Jsons.jsonNode(createState(List.of(dbStreamState))))); setEmittedAtToNull(actualMessages); @@ -1019,7 +877,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalogWithOneStream(final Strin final ConfiguredAirbyteCatalog catalog = CatalogHelpers.toDefaultConfiguredCatalog(getCatalog(defaultNamespace)); // Filter to only keep the main stream name as configured stream catalog.withStreams( - catalog.getStreams().stream().filter(s -> s.getStream().getName().equals(streamName)) + catalog.getStreams().stream().filter(s -> s.getStream().getName().equals(streamName())) .collect(Collectors.toList())); return catalog; } @@ -1056,20 +914,20 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { protected List getTestMessages() { return List.of( new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(getDefaultNamespace()) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_1, COL_NAME, "picard", COL_UPDATED_AT, "2004-10-19")))), new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(getDefaultNamespace()) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_2, COL_NAME, "crusher", COL_UPDATED_AT, "2005-10-19")))), new AirbyteMessage().withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(getDefaultNamespace()) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(getDefaultNamespace()) .withData(Jsons.jsonNode(Map .of(COL_ID, ID_VALUE_3, COL_NAME, "vash", @@ -1108,7 +966,7 @@ protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { final String tableNameWithSpaces = TABLE_NAME_WITH_SPACES + "2"; final String streamName2 = tableNameWithSpaces; - database.execute(connection -> { + try (final var connection = testdb.getDataSource().getConnection()) { final String identifierQuoteString = connection.getMetaData().getIdentifierQuoteString(); connection.createStatement() .execute( @@ -1132,7 +990,7 @@ protected ConfiguredAirbyteStream createTableWithSpaces() throws SQLException { getFullyQualifiedTableName( enquoteIdentifier(tableNameWithSpaces, identifierQuoteString)), enquoteIdentifier(COL_LAST_NAME_WITH_SPACE, identifierQuoteString))); - }); + } return CatalogHelpers.createConfiguredAirbyteStream( streamName2, @@ -1145,32 +1003,27 @@ public String getFullyQualifiedTableName(final String tableName) { return RelationalDbQueryUtils.getFullyQualifiedTableName(getDefaultSchemaName(), tableName); } - public void createSchemas() throws SQLException { + protected void createSchemas() { if (supportsSchemas()) { for (final String schemaName : TEST_SCHEMAS) { - final String createSchemaQuery = String.format("CREATE SCHEMA %s;", schemaName); - database.execute(connection -> connection.createStatement().execute(createSchemaQuery)); + testdb.with("CREATE SCHEMA %s;", schemaName); } } } - public void dropSchemas() throws SQLException { + protected void dropSchemas() { if (supportsSchemas()) { for (final String schemaName : TEST_SCHEMAS) { - final String dropSchemaQuery = String - .format(DROP_SCHEMA_QUERY, schemaName); - database.execute(connection -> connection.createStatement().execute(dropSchemaQuery)); + testdb.with(DROP_SCHEMA_QUERY, schemaName); } } } private JsonNode convertIdBasedOnDatabase(final int idValue) { - final var driverClass = getDriverClass().toLowerCase(); - if (driverClass.contains("oracle") || driverClass.contains("snowflake")) { - return Jsons.jsonNode(BigDecimal.valueOf(idValue)); - } else { - return Jsons.jsonNode(idValue); - } + return switch (testdb.getDatabaseDriver()) { + case ORACLE, SNOWFLAKE -> Jsons.jsonNode(BigDecimal.valueOf(idValue)); + default -> Jsons.jsonNode(idValue); + }; } private String getDefaultSchemaName() { @@ -1178,13 +1031,11 @@ private String getDefaultSchemaName() { } protected String getDefaultNamespace() { - // mysql does not support schemas. it namespaces using database names instead. - if (getDriverClass().toLowerCase().contains("mysql") || getDriverClass().toLowerCase().contains("clickhouse") || - getDriverClass().toLowerCase().contains("teradata")) { - return config.get(JdbcUtils.DATABASE_KEY).asText(); - } else { - return SCHEMA_NAME; - } + return switch (testdb.getDatabaseDriver()) { + // mysql does not support schemas, it namespaces using database names instead. + case MYSQL, CLICKHOUSE, TERADATA -> testdb.getDatabaseName(); + default -> SCHEMA_NAME; + }; } protected static void setEmittedAtToNull(final Iterable messages) { diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java new file mode 100644 index 000000000000..4735716dc05e --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/ContainerFactory.java @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.testutils; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.JdbcDatabaseContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.utility.DockerImageName; + +/** + * ContainerFactory is the companion interface to {@link TestDatabase} for providing it with + * suitable testcontainer instances. + */ +public interface ContainerFactory> { + + /** + * Creates a new, unshared testcontainer instance. This usually wraps the default constructor for + * the testcontainer type. + */ + C createNewContainer(DockerImageName imageName); + + /** + * Returns the class object of the testcontainer. + */ + Class getContainerClass(); + + /** + * Returns a shared instance of the testcontainer. + */ + default C shared(String imageName, String... methods) { + final String mapKey = Stream.concat( + Stream.of(imageName, this.getClass().getCanonicalName()), + Stream.of(methods)) + .collect(Collectors.joining("+")); + return Singleton.getOrCreate(mapKey, this); + } + + /** + * This class is exclusively used by {@link #shared(String, String...)}. It wraps a specific shared + * testcontainer instance, which is created exactly once. + */ + class Singleton> { + + static private final Logger LOGGER = LoggerFactory.getLogger(Singleton.class); + static private final ConcurrentHashMap> LAZY = new ConcurrentHashMap<>(); + + @SuppressWarnings("unchecked") + static private > C getOrCreate(String mapKey, ContainerFactory factory) { + final Singleton singleton = LAZY.computeIfAbsent(mapKey, Singleton::new); + return ((Singleton) singleton).getOrCreate(factory); + } + + final private String imageName; + final private List methodNames; + + private C sharedContainer; + private RuntimeException containerCreationError; + + private Singleton(String imageNamePlusMethods) { + final String[] parts = imageNamePlusMethods.split("\\+"); + this.imageName = parts[0]; + this.methodNames = Arrays.stream(parts).skip(2).toList(); + } + + private synchronized C getOrCreate(ContainerFactory factory) { + if (sharedContainer == null && containerCreationError == null) { + try { + create(imageName, factory, methodNames); + } catch (RuntimeException e) { + sharedContainer = null; + containerCreationError = e; + } + } + if (containerCreationError != null) { + throw new RuntimeException( + "Error during container creation for imageName=" + imageName + + ", factory=" + factory.getClass().getName() + + ", methods=" + methodNames, + containerCreationError); + } + return sharedContainer; + } + + private void create(String imageName, ContainerFactory factory, List methodNames) { + LOGGER.info("Creating new shared container based on {} with {}.", imageName, methodNames); + try { + final var parsed = DockerImageName.parse(imageName); + final var methods = new ArrayList(); + for (String methodName : methodNames) { + methods.add(factory.getClass().getMethod(methodName, factory.getContainerClass())); + } + sharedContainer = factory.createNewContainer(parsed); + sharedContainer.withLogConsumer(new Slf4jLogConsumer(LOGGER)); + for (Method method : methods) { + LOGGER.info("Calling {} in {} on new shared container based on {}.", + method.getName(), factory.getClass().getName(), imageName); + method.invoke(factory, sharedContainer); + } + sharedContainer.start(); + } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + throw new RuntimeException(e); + } + } + + } + +} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java deleted file mode 100644 index b52752f38b64..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/PostgresTestDatabase.java +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.cdk.testutils; - -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.PostgresUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.string.Strings; -import java.io.IOException; -import java.io.UncheckedIOException; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testcontainers.containers.Network; -import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.utility.DockerImageName; -import org.testcontainers.utility.MountableFile; - -/** - * {@link PostgresTestDatabase} is a convenience object which allows for efficient use of - * {@link PostgreSQLContainer} instances in tests. Each test container is shared throughout the - * whole JVM. Isolation is performed by creating a new database and a new user for each - * {@link PostgresTestDatabase} instance. These are dropped when the instance is closed. - */ -public class PostgresTestDatabase implements AutoCloseable { - - static private final Logger LOGGER = LoggerFactory.getLogger(PostgresTestDatabase.class); - - /** - * Create a new {@link PostgresTestDatabase} instance. - * - * @param imageName base image to use for the underlying {@link PostgreSQLContainer}. - * @param methods {@link ContainerFactory} methods that need to be called. - * @return a new {@link PostgresTestDatabase} instance which may reuse a shared - * {@link PostgreSQLContainer}. - */ - static public PostgresTestDatabase make(String imageName, String... methods) { - final String imageNamePlusMethods = Stream.concat( - Stream.of(imageName), - Stream.of(methods)) - .collect(Collectors.joining("+")); - final ContainerFactory factory = ContainerFactory.LAZY.computeIfAbsent(imageNamePlusMethods, ContainerFactory::new); - return new PostgresTestDatabase(factory.getOrCreateSharedContainer()); - } - - private PostgresTestDatabase(PostgreSQLContainer sharedContainer) { - this.container = sharedContainer; - this.suffix = Strings.addRandomSuffix("", "_", 10); - this.dbName = "db" + suffix; - this.userName = "test_user" + suffix; - this.password = "test_password" + suffix; - execSQL( - String.format("CREATE DATABASE %s", dbName), - String.format("CREATE USER %s PASSWORD '%s'", userName, password), - String.format("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", dbName, userName), - String.format("ALTER USER %s WITH SUPERUSER", userName)); - - this.jdbcUrl = String.format( - DatabaseDriver.POSTGRESQL.getUrlFormatString(), - sharedContainer.getHost(), - sharedContainer.getFirstMappedPort(), - dbName); - this.dslContext = DSLContextFactory.create( - userName, - password, - DatabaseDriver.POSTGRESQL.getDriverClassName(), - jdbcUrl, - SQLDialect.POSTGRES); - this.database = new Database(dslContext); - } - - public final PostgreSQLContainer container; - public final String suffix, dbName, userName, password, jdbcUrl; - public final DSLContext dslContext; - public final Database database; - - /** - * Convenience method for building identifiers which are unique to this instance. - */ - public String withSuffix(String str) { - return str + suffix; - } - - /** - * Convenience method for initializing a config builder for use in integration tests. - */ - public ImmutableMap.Builder makeConfigBuilder() { - return ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, userName) - .put(JdbcUtils.PASSWORD_KEY, password); - } - - /** - * @return the {@link PostgresUtils.Certificate} for this instance; requires - * {@link ContainerFactory#withCert} call. - */ - public PostgresUtils.Certificate getCertificate() { - final String caCert, clientKey, clientCert; - try { - caCert = container.execInContainer("su", "-c", "cat ca.crt").getStdout().trim(); - clientKey = container.execInContainer("su", "-c", "cat client.key").getStdout().trim(); - clientCert = container.execInContainer("su", "-c", "cat client.crt").getStdout().trim(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - return new PostgresUtils.Certificate(caCert, clientCert, clientKey); - } - - private void execSQL(String... stmts) { - final List cmd = Stream.concat( - Stream.of("psql", "-a", "-d", container.getDatabaseName(), "-U", container.getUsername()), - Stream.of(stmts).flatMap(stmt -> Stream.of("-c", stmt))) - .toList(); - try { - LOGGER.debug("executing {}", Strings.join(cmd, " ")); - final var exec = container.execInContainer(cmd.toArray(new String[0])); - LOGGER.debug("exit code: {}\nstdout:\n{}\nstderr:\n{}", exec.getExitCode(), exec.getStdout(), exec.getStderr()); - } catch (IOException e) { - throw new UncheckedIOException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - - /** - * Drop the database owned by this instance. - */ - public void dropDatabase() { - execSQL(String.format("DROP DATABASE %s", dbName)); - } - - /** - * Close resources held by this instance. This deliberately avoids dropping the database, which is - * really expensive in Postgres. This is because a DROP DATABASE in Postgres triggers a CHECKPOINT. - * Call {@link #dropDatabase} to explicitly drop the database. - */ - @Override - public void close() { - dslContext.close(); - execSQL(String.format("DROP USER %s", userName)); - } - - static private class ContainerFactory { - - static private final Logger LOGGER = LoggerFactory.getLogger(ContainerFactory.class); - static private final ConcurrentHashMap LAZY = new ConcurrentHashMap<>(); - - final private String imageName; - final private List methods; - private PostgreSQLContainer sharedContainer; - private RuntimeException containerCreationError; - - private ContainerFactory(String imageNamePlusMethods) { - final String[] parts = imageNamePlusMethods.split("\\+"); - this.imageName = parts[0]; - this.methods = Arrays.stream(parts).skip(1).map(methodName -> { - try { - return ContainerFactory.class.getMethod(methodName); - } catch (NoSuchMethodException e) { - throw new RuntimeException(e); - } - }).toList(); - } - - private synchronized PostgreSQLContainer getOrCreateSharedContainer() { - if (sharedContainer == null) { - if (containerCreationError != null) { - throw new RuntimeException( - "Error during container creation for imageName=" + imageName + ", methods=" + methods.stream().map(Method::getName).toList(), - containerCreationError); - } - LOGGER.info("Creating new shared container based on {} with {}.", imageName, methods.stream().map(Method::getName).toList()); - try { - final var parsed = DockerImageName.parse(imageName).asCompatibleSubstituteFor("postgres"); - sharedContainer = new PostgreSQLContainer<>(parsed); - for (Method method : methods) { - LOGGER.info("Calling {} on new shared container based on {}.", method.getName(), - imageName); - method.invoke(this); - } - sharedContainer.start(); - } catch (IllegalAccessException | InvocationTargetException e) { - containerCreationError = new RuntimeException(e); - this.sharedContainer = null; - throw containerCreationError; - } catch (RuntimeException e) { - this.sharedContainer = null; - containerCreationError = e; - throw e; - } - } - return sharedContainer; - } - - /** - * Apply the postgresql.conf file that we've packaged as a resource. - */ - public void withConf() { - sharedContainer - .withCopyFileToContainer( - MountableFile.forClasspathResource("postgresql.conf"), - "/etc/postgresql/postgresql.conf") - .withCommand("postgres -c config_file=/etc/postgresql/postgresql.conf"); - } - - /** - * Create a new network and bind it to the container. - */ - public void withNetwork() { - sharedContainer.withNetwork(Network.newNetwork()); - } - - /** - * Configure postgres with wal_level=logical. - */ - public void withWalLevelLogical() { - sharedContainer.withCommand("postgres -c wal_level=logical"); - } - - /** - * Generate SSL certificates and tell postgres to enable SSL and use them. - */ - public void withCert() { - sharedContainer.start(); - String[] commands = { - "psql -U test -c \"CREATE USER postgres WITH PASSWORD 'postgres';\"", - "psql -U test -c \"GRANT CONNECT ON DATABASE \"test\" TO postgres;\"", - "psql -U test -c \"ALTER USER postgres WITH SUPERUSER;\"", - "openssl ecparam -name prime256v1 -genkey -noout -out ca.key", - "openssl req -new -x509 -sha256 -key ca.key -out ca.crt -subj \"/CN=127.0.0.1\"", - "openssl ecparam -name prime256v1 -genkey -noout -out server.key", - "openssl req -new -sha256 -key server.key -out server.csr -subj \"/CN=localhost\"", - "openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256", - "cp server.key /etc/ssl/private/", - "cp server.crt /etc/ssl/private/", - "cp ca.crt /etc/ssl/private/", - "chmod og-rwx /etc/ssl/private/server.* /etc/ssl/private/ca.*", - "chown postgres:postgres /etc/ssl/private/server.crt /etc/ssl/private/server.key /etc/ssl/private/ca.crt", - "echo \"ssl = on\" >> /var/lib/postgresql/data/postgresql.conf", - "echo \"ssl_cert_file = '/etc/ssl/private/server.crt'\" >> /var/lib/postgresql/data/postgresql.conf", - "echo \"ssl_key_file = '/etc/ssl/private/server.key'\" >> /var/lib/postgresql/data/postgresql.conf", - "echo \"ssl_ca_file = '/etc/ssl/private/ca.crt'\" >> /var/lib/postgresql/data/postgresql.conf", - "mkdir root/.postgresql", - "echo \"hostssl all all 127.0.0.1/32 cert clientcert=verify-full\" >> /var/lib/postgresql/data/pg_hba.conf", - "openssl ecparam -name prime256v1 -genkey -noout -out client.key", - "openssl req -new -sha256 -key client.key -out client.csr -subj \"/CN=postgres\"", - "openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256", - "cp client.crt ~/.postgresql/postgresql.crt", - "cp client.key ~/.postgresql/postgresql.key", - "chmod 0600 ~/.postgresql/postgresql.crt ~/.postgresql/postgresql.key", - "cp ca.crt root/.postgresql/ca.crt", - "chown postgres:postgres ~/.postgresql/ca.crt", - "psql -U test -c \"SELECT pg_reload_conf();\"", - }; - for (String cmd : commands) { - try { - sharedContainer.execInContainer("su", "-c", cmd); - } catch (IOException e) { - throw new UncheckedIOException(e); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - } - - /** - * Tell postgres to enable SSL. - */ - public void withSSL() { - sharedContainer.withCommand("postgres " + - "-c ssl=on " + - "-c ssl_cert_file=/var/lib/postgresql/server.crt " + - "-c ssl_key_file=/var/lib/postgresql/server.key"); - } - - /** - * Configure postgres with client_encoding=sql_ascii. - */ - public void withASCII() { - sharedContainer.withCommand("postgres -c client_encoding=sql_ascii"); - } - - } - -} diff --git a/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java new file mode 100644 index 000000000000..6a5d80104718 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/db-sources/src/testFixtures/java/io/airbyte/cdk/testutils/TestDatabase.java @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.testutils; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.ContextQueryFunction; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.string.Strings; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.sql.SQLException; +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Stream; +import javax.sql.DataSource; +import org.jooq.DSLContext; +import org.jooq.SQLDialect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.JdbcDatabaseContainer; + +/** + * TestDatabase provides a convenient pattern for interacting with databases when testing SQL + * database sources. The basic idea is to share the same database testcontainer instance for all + * tests and to use SQL constructs such as DATABASE and USER to isolate each test case's state. + * + * @param the type of the backing testcontainer. + * @param itself + * @param the type of the object returned by {@link #configBuilder()} + */ +abstract public class TestDatabase, T extends TestDatabase, B extends TestDatabase.ConfigBuilder> + implements AutoCloseable { + + static private final Logger LOGGER = LoggerFactory.getLogger(TestDatabase.class); + + final private C container; + final private String suffix; + final private ArrayList cleanupSQL = new ArrayList<>(); + final private Map connectionProperties = new HashMap<>(); + + private DataSource dataSource; + private DSLContext dslContext; + + protected TestDatabase(C container) { + this.container = container; + this.suffix = Strings.addRandomSuffix("", "_", 10); + } + + @SuppressWarnings("unchecked") + protected T self() { + return (T) this; + } + + /** + * Adds a key-value pair to the JDBC URL's query parameters. + */ + public T withConnectionProperty(String key, String value) { + if (isInitialized()) { + throw new RuntimeException("TestDatabase instance is already initialized"); + } + connectionProperties.put(key, value); + return self(); + } + + /** + * Enqueues a SQL statement to be executed when this object is closed. + */ + public T onClose(String fmtSql, Object... fmtArgs) { + cleanupSQL.add(String.format(fmtSql, fmtArgs)); + return self(); + } + + /** + * Executes a SQL statement after calling String.format on the arguments. + */ + public T with(String fmtSql, Object... fmtArgs) { + execSQL(Stream.of(String.format(fmtSql, fmtArgs))); + return self(); + } + + /** + * Executes SQL statements as root to provide the necessary isolation for the lifetime of this + * object. This typically entails at least a CREATE DATABASE and a CREATE USER. Also Initializes the + * {@link DataSource} and {@link DSLContext} owned by this object. + */ + final public T initialized() { + inContainerBootstrapCmd().forEach(this::execInContainer); + this.dataSource = DataSourceFactory.create( + getUserName(), + getPassword(), + getDatabaseDriver().getDriverClassName(), + getJdbcUrl(), + connectionProperties); + this.dslContext = DSLContextFactory.create(dataSource, getSqlDialect()); + return self(); + } + + final public boolean isInitialized() { + return dslContext != null; + } + + abstract protected Stream> inContainerBootstrapCmd(); + + abstract protected Stream inContainerUndoBootstrapCmd(); + + abstract public DatabaseDriver getDatabaseDriver(); + + abstract public SQLDialect getSqlDialect(); + + final public C getContainer() { + return container; + } + + public String withNamespace(String name) { + return name + suffix; + } + + public String getDatabaseName() { + return withNamespace("db"); + } + + public String getUserName() { + return withNamespace("user"); + } + + public String getPassword() { + return "password"; + } + + public DataSource getDataSource() { + if (!isInitialized()) { + throw new RuntimeException("TestDatabase instance is not yet initialized"); + } + return dataSource; + } + + final public DSLContext getDslContext() { + if (!isInitialized()) { + throw new RuntimeException("TestDatabase instance is not yet initialized"); + } + return dslContext; + } + + public String getJdbcUrl() { + return String.format( + getDatabaseDriver().getUrlFormatString(), + getContainer().getHost(), + getContainer().getFirstMappedPort(), + getDatabaseName()); + } + + public Database getDatabase() { + return new Database(getDslContext()); + } + + protected void execSQL(Stream sql) { + try { + getDatabase().query(ctx -> { + sql.forEach(ctx::execute); + return null; + }); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + protected void execInContainer(Stream cmds) { + final List cmd = cmds.toList(); + if (cmd.isEmpty()) { + return; + } + try { + LOGGER.debug("executing {}", Strings.join(cmd, " ")); + final var exec = getContainer().execInContainer(cmd.toArray(new String[0])); + if (exec.getExitCode() == 0) { + LOGGER.debug("execution success\nstdout:\n{}\nstderr:\n{}", exec.getStdout(), exec.getStderr()); + } else { + LOGGER.error("execution failure, code {}\nstdout:\n{}\nstderr:\n{}", exec.getExitCode(), exec.getStdout(), exec.getStderr()); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + + public X query(final ContextQueryFunction transform) throws SQLException { + return getDatabase().query(transform); + } + + public X transaction(final ContextQueryFunction transform) throws SQLException { + return getDatabase().transaction(transform); + } + + /** + * Returns a builder for the connector config object. + */ + public B configBuilder() { + return new ConfigBuilder(self()).self(); + } + + public B testConfigBuilder() { + return configBuilder() + .withHostAndPort() + .withCredentials() + .withDatabase(); + } + + public B integrationTestConfigBuilder() { + return configBuilder() + .withResolvedHostAndPort() + .withCredentials() + .withDatabase(); + } + + @Override + public void close() { + execSQL(this.cleanupSQL.stream()); + dslContext.close(); + execInContainer(inContainerUndoBootstrapCmd()); + } + + static public class ConfigBuilder, B extends ConfigBuilder> { + + static public final Duration DEFAULT_CDC_REPLICATION_INITIAL_WAIT = Duration.ofSeconds(5); + + protected final ImmutableMap.Builder builder = ImmutableMap.builder(); + protected final T testDatabase; + + protected ConfigBuilder(T testDatabase) { + this.testDatabase = testDatabase; + } + + public JsonNode build() { + return Jsons.jsonNode(builder.build()); + } + + @SuppressWarnings("unchecked") + final protected B self() { + return (B) this; + } + + public B with(Object key, Object value) { + builder.put(key, value); + return self(); + } + + public B withDatabase() { + return this + .with(JdbcUtils.DATABASE_KEY, testDatabase.getDatabaseName()); + } + + public B withCredentials() { + return this + .with(JdbcUtils.USERNAME_KEY, testDatabase.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, testDatabase.getPassword()); + } + + public B withResolvedHostAndPort() { + return this + .with(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(testDatabase.getContainer())) + .with(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(testDatabase.getContainer())); + } + + public B withHostAndPort() { + return this + .with(JdbcUtils.HOST_KEY, testDatabase.getContainer().getHost()) + .with(JdbcUtils.PORT_KEY, testDatabase.getContainer().getFirstMappedPort()); + } + + public B withoutSsl() { + return with(JdbcUtils.SSL_KEY, false); + } + + public B withSsl(Map sslMode) { + return with(JdbcUtils.SSL_KEY, true).with(JdbcUtils.SSL_MODE_KEY, sslMode); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle b/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle index f2031037e47e..c27161e3af74 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/build.gradle @@ -4,13 +4,11 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() - configurations.all { resolutionStrategy { force libs.jooq @@ -26,6 +24,7 @@ dependencies { implementation project(':airbyte-integrations:connectors:source-mssql') implementation libs.jooq + testImplementation testFixtures(project(':airbyte-integrations:connectors:source-mssql')) testImplementation 'org.apache.commons:commons-lang3:3.11' testImplementation libs.testcontainers.mssqlserver testImplementation 'org.hamcrest:hamcrest-all:1.3' diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties b/airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml b/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml index e91c1a8ccebb..91f70101f0e4 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: database connectorType: source definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 - dockerImageTag: 3.0.0 + dockerImageTag: 3.0.1 dockerRepository: airbyte/source-mssql-strict-encrypt githubIssueLabel: source-mssql icon: mssql.svg diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java index 8c727973ef1d..8687b6c81822 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlSourceStrictEncrypt.java @@ -19,7 +19,7 @@ public class MssqlSourceStrictEncrypt extends SpecModifyingSource implements Sou private static final Logger LOGGER = LoggerFactory.getLogger(MssqlSourceStrictEncrypt.class); public MssqlSourceStrictEncrypt() { - super(MssqlSource.sshWrappedSource()); + super(MssqlSource.sshWrappedSource(new MssqlSource())); } @Override diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java index 69ec87ddd4c6..c584e76113cd 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptSourceAcceptanceTest.java @@ -5,94 +5,48 @@ package io.airbyte.integrations.source.mssql_strict_encrypt; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.mssql.MsSQLContainerFactory; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConnectorSpecification; -import java.sql.SQLException; import java.util.HashMap; import java.util.Map; -import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class MssqlStrictEncryptSourceAcceptanceTest extends SourceAcceptanceTest { protected static final String SCHEMA_NAME = "dbo"; protected static final String STREAM_NAME = "id_and_name"; - protected static MSSQLServerContainer db; - protected JsonNode config; - @AfterAll - public static void closeContainer() { - if (db != null) { - db.close(); - db.stop(); - } - } + private MsSQLTestDatabase testdb; @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { - if (db == null) { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - db.start(); - } - - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(db)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(db)) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - final String dbName = "db_" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - try (final DSLContext dslContext = DSLContextFactory.create( - configWithoutDbName.get(JdbcUtils.USERNAME_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;encrypt=true;trustServerCertificate=true;", - db.getHost(), - db.getFirstMappedPort()), - null)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES " + - "(1,'picard', '2124-03-04T01:01:01Z'), " + - "(2, 'crusher', '2124-03-04T01:01:01Z'), " + - "(3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "encrypted_trust_server_certificate"))); - } - - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MsSQLContainerFactory().shared("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + testdb = new MsSQLTestDatabase(container); + testdb = testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1,'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); } @Override - protected void tearDown(final TestDestinationEnv testEnv) throws Exception {} + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } @Override protected String getImageName() { @@ -106,7 +60,9 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")) + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java index 4f17ea3e7b32..2aac6a760c84 100644 --- a/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mssql_strict_encrypt/MssqlStrictEncryptJdbcSourceAcceptanceTest.java @@ -8,127 +8,67 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; -import io.airbyte.integrations.source.mssql.MssqlSource; +import io.airbyte.integrations.source.mssql.MsSQLContainerFactory; +import io.airbyte.integrations.source.mssql.MsSQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; import io.airbyte.protocol.models.v0.CatalogHelpers; import io.airbyte.protocol.models.v0.ConnectorSpecification; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.JDBCType; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.function.Function; -import javax.sql.DataSource; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.MSSQLServerContainer; -public class MssqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +public class MssqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - private static MSSQLServerContainer dbContainer; - private static DataSource dataSource; - private JsonNode config; - - @BeforeAll - static void init() { + static { // In mssql, timestamp is generated automatically, so we need to use // the datetime type instead so that we can set the value manually. COL_TIMESTAMP_TYPE = "DATETIME"; - - if (dbContainer == null) { - dbContainer = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - dbContainer.start(); - } - } - - @BeforeEach - public void setup() throws Exception { - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, dbContainer.getHost()) - .put(JdbcUtils.PORT_KEY, dbContainer.getFirstMappedPort()) - .put(JdbcUtils.USERNAME_KEY, dbContainer.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, dbContainer.getPassword()) - .build()); - - dataSource = DataSourceFactory.create( - configWithoutDbName.get(JdbcUtils.USERNAME_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;encrypt=true;trustServerCertificate=true;", - dbContainer.getHost(), - dbContainer.getFirstMappedPort())); - - try { - database = new DefaultJdbcDatabase(dataSource); - - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - database.execute(ctx -> ctx.createStatement().execute(String.format("CREATE DATABASE %s;", dbName))); - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "encrypted_trust_server_certificate"))); - - super.setup(); - } finally { - DataSourceFactory.close(dataSource); - } - } - - @AfterAll - public static void cleanUp() throws Exception { - dbContainer.close(); } @Override - public boolean supportsSchemas() { - return true; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1"); } - @Override - public JsonNode getConfig() { - return config; + protected JsonNode config() { + return testdb.testConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")) + .build(); } @Override - public Function getToDatabaseConfigFunction() { - return new MssqlSource()::toDatabaseConfig; - } - - @Override - public String getDriverClass() { - return MssqlSource.DRIVER_CLASS; + protected MssqlSourceStrictEncrypt source() { + return new MssqlSourceStrictEncrypt(); } @Override - public AbstractJdbcSource getJdbcSource() { - return new MssqlSource(); + protected MsSQLTestDatabase createTestDatabase() { + final var container = new MsSQLContainerFactory().shared("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + final var testdb = new MsSQLTestDatabase(container); + return testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized(); } @Override - public Source getSource() { - return new MssqlSourceStrictEncrypt(); + public boolean supportsSchemas() { + return true; } @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = SshHelpers.injectSshIntoSpec(Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class)); diff --git a/airbyte-integrations/connectors/source-mssql/build.gradle b/airbyte-integrations/connectors/source-mssql/build.gradle index e31c0c4c7ead..d2c14de9a601 100644 --- a/airbyte-integrations/connectors/source-mssql/build.gradle +++ b/airbyte-integrations/connectors/source-mssql/build.gradle @@ -4,7 +4,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } @@ -15,7 +15,7 @@ configurations.all { } } -airbyteJavaConnector.addCdkDependencies() + application { mainClass = 'io.airbyte.integrations.source.mssql.MssqlSource' @@ -25,12 +25,14 @@ application { dependencies { implementation libs.postgresql - implementation libs.debezium.sqlserver implementation 'com.microsoft.sqlserver:mssql-jdbc:10.2.1.jre8' implementation 'org.codehaus.plexus:plexus-utils:3.4.2' testImplementation 'org.apache.commons:commons-lang3:3.11' - testImplementation libs.testcontainers.mssqlserver testImplementation 'org.hamcrest:hamcrest-all:1.3' + testImplementation 'org.awaitility:awaitility:4.2.0' + + testImplementation libs.testcontainers.mssqlserver + testFixturesImplementation libs.testcontainers.mssqlserver } diff --git a/airbyte-integrations/connectors/source-mssql/gradle.properties b/airbyte-integrations/connectors/source-mssql/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mssql/metadata.yaml b/airbyte-integrations/connectors/source-mssql/metadata.yaml index 781e6b7cf1e7..dbb3a32e9a52 100644 --- a/airbyte-integrations/connectors/source-mssql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mssql/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 - dockerImageTag: 3.0.0 + dockerImageTag: 3.0.1 dockerRepository: airbyte/source-mssql documentationUrl: https://docs.airbyte.com/integrations/sources/mssql githubIssueLabel: source-mssql diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java index 26175fb66b13..2e27ebc2b948 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlCdcHelper.java @@ -13,6 +13,7 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.SyncMode; +import java.time.Duration; import java.util.Properties; import java.util.stream.Collectors; import org.codehaus.plexus.util.StringUtils; @@ -30,6 +31,11 @@ public class MssqlCdcHelper { private static final String CDC_SNAPSHOT_ISOLATION_FIELD = "snapshot_isolation"; private static final String CDC_DATA_TO_SYNC_FIELD = "data_to_sync"; + private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L); + + // Test execution latency is lower when heartbeats are more frequent. + private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L); + public enum ReplicationMethod { STANDARD, CDC @@ -160,6 +166,14 @@ static Properties getDebeziumProperties(final JdbcDatabase database, final Confi props.setProperty("schema.include.list", getSchema(catalog)); props.setProperty("database.names", config.get(JdbcUtils.DATABASE_KEY).asText()); + final Duration heartbeatInterval = + (database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean()) + ? HEARTBEAT_INTERVAL_IN_TESTS + : HEARTBEAT_INTERVAL; + props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis())); + // TODO: enable heartbeats in MS SQL Server. + props.setProperty("heartbeat.interval.ms", "0"); + if (config.has("ssl_method")) { final JsonNode sslConfig = config.get("ssl_method"); final String sslMethod = sslConfig.get("ssl_method").asText(); diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index 61a2c4957ce1..5ce64b942485 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -30,7 +30,7 @@ import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource; import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; -import io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; import io.airbyte.cdk.integrations.debezium.internals.mssql.MssqlCdcTargetPosition; import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.relationaldb.TableInfo; @@ -88,8 +88,8 @@ SELECT CAST(IIF(EXISTS(SELECT TOP 1 1 FROM "%s"."%s" WHERE "%s" IS NULL), 1, 0) public static final String CDC_DEFAULT_CURSOR = "_ab_cdc_cursor"; private List schemas; - public static Source sshWrappedSource() { - return new SshWrappedSource(new MssqlSource(), JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + public static Source sshWrappedSource(MssqlSource source) { + return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); } public MssqlSource() { @@ -451,11 +451,16 @@ public List> getIncrementalIterators( final JsonNode sourceConfig = database.getSourceConfig(); if (MssqlCdcHelper.isCdc(sourceConfig) && isAnyStreamIncrementalSyncMode(catalog)) { LOGGER.info("using CDC: {}", true); - final Duration firstRecordWaitTime = FirstRecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); - final AirbyteDebeziumHandler handler = - new AirbyteDebeziumHandler<>(sourceConfig, - MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText()), true, firstRecordWaitTime, - OptionalInt.empty()); + final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + final Duration subsequentRecordWaitTime = RecordWaitTimeUtil.getSubsequentRecordWaitTime(sourceConfig); + final var targetPosition = MssqlCdcTargetPosition.getTargetPosition(database, sourceConfig.get(JdbcUtils.DATABASE_KEY).asText()); + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>( + sourceConfig, + targetPosition, + true, + firstRecordWaitTime, + subsequentRecordWaitTime, + OptionalInt.empty()); final MssqlCdcConnectorMetadataInjector mssqlCdcConnectorMetadataInjector = MssqlCdcConnectorMetadataInjector.getInstance(emittedAt); @@ -565,7 +570,7 @@ private void readSsl(final JsonNode sslMethod, final List additionalPara } public static void main(final String[] args) throws Exception { - final Source source = MssqlSource.sshWrappedSource(); + final Source source = MssqlSource.sshWrappedSource(new MssqlSource()); LOGGER.info("starting source: {}", MssqlSource.class); new IntegrationRunner(source).run(args); LOGGER.info("completed source: {}", MssqlSource.class); diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java index cb6457935a88..914f294ed515 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractMssqlSourceDatatypeTest.java @@ -4,18 +4,14 @@ package io.airbyte.integrations.source.mssql; -import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.protocol.models.JsonSchemaType; -import org.jooq.DSLContext; -import org.testcontainers.containers.MSSQLServerContainer; public abstract class AbstractMssqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { - protected static MSSQLServerContainer container; - protected JsonNode config; - protected DSLContext dslContext; + protected MsSQLTestDatabase testdb; @Override protected String getNameSpace() { @@ -28,14 +24,11 @@ protected String getImageName() { } @Override - protected JsonNode getConfig() { - return config; + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); } - protected static final String DB_NAME = "comprehensive"; - - protected static final String CREATE_TABLE_SQL = - "USE " + DB_NAME + "\nCREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; + protected static final String CREATE_TABLE_SQL = "CREATE TABLE %1$s(%2$s INTEGER PRIMARY KEY, %3$s %4$s)"; @Override protected void initTests() { diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java index a1e355617284..3c5073d32c59 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/AbstractSshMssqlSourceAcceptanceTest.java @@ -5,12 +5,7 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; @@ -25,91 +20,50 @@ import io.airbyte.protocol.models.v0.ConnectorSpecification; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.HashMap; -import java.util.Objects; -import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.DSLContext; -import org.testcontainers.containers.JdbcDatabaseContainer; -import org.testcontainers.containers.MSSQLServerContainer; -import org.testcontainers.containers.Network; public abstract class AbstractSshMssqlSourceAcceptanceTest extends SourceAcceptanceTest { private static final String STREAM_NAME = "dbo.id_and_name"; private static final String STREAM_NAME2 = "dbo.starships"; - private static final Network network = Network.newNetwork(); - private static JsonNode config; - private String dbName; - private MSSQLServerContainer db; - private final SshBastionContainer bastion = new SshBastionContainer(); public abstract SshTunnel.TunnelMethod getTunnelMethod(); - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - startTestContainers(); - config = bastion.getTunnelConfig(getTunnelMethod(), getMSSQLDbConfigBuilder(db), false); - populateDatabaseTestData(); - } - - public ImmutableMap.Builder getMSSQLDbConfigBuilder(final JdbcDatabaseContainer db) { - dbName = "db_" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); - return ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, Objects.requireNonNull(db.getContainerInfo().getNetworkSettings() - .getNetworks() - .get(((Network.NetworkImpl) network).getName()) - .getIpAddress())) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .put(JdbcUtils.PORT_KEY, db.getExposedPorts().get(0)) - .put(JdbcUtils.DATABASE_KEY, dbName); - } - - private Database getDatabaseFromConfig(final JsonNode config) { - final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - db.getHost(), - db.getFirstMappedPort()), - null); - return new Database(dslContext); - } - - private void startTestContainers() { - bastion.initAndStartBastion(network); - initAndStartJdbcContainer(); - } + protected MsSQLTestDatabase testdb; + protected SshBastionContainer bastion; - private void initAndStartJdbcContainer() { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2017-latest") - .withNetwork(network) - .acceptLicense(); - db.start(); + @Override + protected JsonNode getConfig() { + try { + return testdb.integrationTestConfigBuilder() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false)) + .build(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } - private void populateDatabaseTestData() throws Exception { - SshTunnel.sshWrap( - getConfig(), - JdbcUtils.HOST_LIST_KEY, - JdbcUtils.PORT_LIST_KEY, - mangledConfig -> { - getDatabaseFromConfig(mangledConfig).query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("ALTER DATABASE %s SET AUTO_CLOSE OFF WITH NO_WAIT;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', '2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - }); + @Override + protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2017-latest", "withNetwork"); + testdb = testdb + .with("ALTER DATABASE %s SET AUTO_CLOSE OFF WITH NO_WAIT;", testdb.getDatabaseName()) + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - bastion.stopAndCloseContainers(db); + bastion.close(); + testdb.close(); } @Override @@ -122,11 +76,6 @@ protected ConnectorSpecification getSpec() throws Exception { return SshHelpers.getSpecAndInjectSsh(); } - @Override - protected JsonNode getConfig() { - return config; - } - @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() { return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java index c36789482e36..141d4163f209 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceAcceptanceTest.java @@ -5,18 +5,10 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -26,32 +18,15 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.util.List; -import java.util.Map; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class CdcMssqlSourceAcceptanceTest extends SourceAcceptanceTest { private static final String SCHEMA_NAME = "dbo"; private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "starships"; - private static final String TEST_USER_PASSWORD = "testerjester[1]"; private static final String CDC_ROLE_NAME = "cdc_selector"; - public static MSSQLServerContainer container; - private String dbName; - private String testUserName; - private JsonNode config; - private Database database; - private DSLContext dslContext; - @AfterAll - public static void closeContainer() { - if (container != null) { - container.close(); - container.stop(); - } - } + private MsSQLTestDatabase testdb; @Override protected String getImageName() { @@ -65,7 +40,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); } @Override @@ -103,123 +81,40 @@ protected JsonNode getState() { } @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws InterruptedException { - if (container == null) { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work - container.start(); - } - - dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - testUserName = Strings.addRandomSuffix("test", "_", 5).toLowerCase(); - - final JsonNode replicationConfig = Jsons.jsonNode(Map.of( - "method", "CDC", - "data_to_sync", "Existing and New", - "initial_waiting_seconds", 5, - "snapshot_isolation", "Snapshot")); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, testUserName) - .put(JdbcUtils.PASSWORD_KEY, TEST_USER_PASSWORD) - .put("replication_method", replicationConfig) - .put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))) - .build()); - - dslContext = DSLContextFactory.create(DataSourceFactory.create( - container.getUsername(), - container.getPassword(), - container.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - database = new Database(dslContext); - - executeQuery("CREATE DATABASE " + dbName + ";"); - executeQuery("ALTER DATABASE " + dbName + "\n\tSET ALLOW_SNAPSHOT_ISOLATION ON"); - executeQuery("USE " + dbName + "\n" + "EXEC sys.sp_cdc_enable_db"); - - setupTestUser(); - revokeAllPermissions(); - createAndPopulateTables(); - grantCorrectPermissions(); - } - - private void setupTestUser() { - executeQuery("USE " + dbName); - executeQuery("CREATE LOGIN " + testUserName + " WITH PASSWORD = '" + TEST_USER_PASSWORD + "';"); - executeQuery("CREATE USER " + testUserName + " FOR LOGIN " + testUserName + ";"); - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL FROM " + testUserName + " CASCADE;"); - executeQuery("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO " + testUserName + ";\""); - } - - private void createAndPopulateTables() throws InterruptedException { - executeQuery(String.format("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", - SCHEMA_NAME, STREAM_NAME)); - executeQuery(String.format("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", - SCHEMA_NAME, STREAM_NAME)); - executeQuery(String.format("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", - SCHEMA_NAME, STREAM_NAME2)); - executeQuery(String.format("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", - SCHEMA_NAME, STREAM_NAME2)); - - // sometimes seeing an error that we can't enable cdc on a table while sql server agent is still - // spinning up - // solving with a simple while retry loop - boolean failingToStart = true; - int retryNum = 0; - final int maxRetries = 10; - while (failingToStart) { - try { - // enabling CDC on each table - final String[] tables = {STREAM_NAME, STREAM_NAME2}; - for (final String table : tables) { - executeQuery(String.format( - "EXEC sys.sp_cdc_enable_table\n" - + "\t@source_schema = N'%s',\n" - + "\t@source_name = N'%s', \n" - + "\t@role_name = N'%s',\n" - + "\t@supports_net_changes = 0", - SCHEMA_NAME, table, CDC_ROLE_NAME)); - } - failingToStart = false; - } catch (final Exception e) { - if (retryNum >= maxRetries) { - throw e; - } else { - retryNum++; - Thread.sleep(10000); // 10 seconds - } - } - } - } - - private void grantCorrectPermissions() { - executeQuery(String.format("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testUserName)); - executeQuery(String.format("USE %s;\n" + "GRANT SELECT ON SCHEMA :: [%s] TO %s", dbName, "cdc", testUserName)); - executeQuery(String.format("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName)); - } - - private void executeQuery(final String query) { - try { - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected void setupEnvironment(final TestDestinationEnv environment) { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest", "withAgent"); + final var enableCdcSqlFmt = """ + EXEC sys.sp_cdc_enable_table + \t@source_schema = N'%s', + \t@source_name = N'%s', + \t@role_name = N'%s', + \t@supports_net_changes = 0"""; + testdb + .withSnapshotIsolation() + .withCdc() + .withWaitUntilAgentRunning() + // create tables + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME) + .with("CREATE TABLE %s.%s(id INTEGER PRIMARY KEY, name VARCHAR(200));", SCHEMA_NAME, STREAM_NAME2) + // populate tables + .with("INSERT INTO %s.%s (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", SCHEMA_NAME, STREAM_NAME) + .with("INSERT INTO %s.%s (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');", SCHEMA_NAME, STREAM_NAME2) + // enable cdc on tables for designated role + .with(enableCdcSqlFmt, SCHEMA_NAME, STREAM_NAME, CDC_ROLE_NAME) + .with(enableCdcSqlFmt, SCHEMA_NAME, STREAM_NAME2, CDC_ROLE_NAME) + .withWaitUntilMaxLsnAvailable() + // revoke user permissions + .with("REVOKE ALL FROM %s CASCADE;", testdb.getUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testdb.getUserName()) + // grant user permissions + .with("EXEC sp_addrolemember N'%s', N'%s';", "db_datareader", testdb.getUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testdb.getUserName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testdb.getUserName()); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); + testdb.close(); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java index 67a7cafa9798..43393443805f 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceDatatypeTest.java @@ -5,73 +5,25 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import java.util.Map; -import org.testcontainers.containers.MSSQLServerContainer; public class CdcMssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest { @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.close(); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); } @Override - protected Database setupDatabase() throws Exception { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work - container.start(); - - final JsonNode replicationConfig = Jsons.jsonNode(Map.of( - "method", "CDC", - "data_to_sync", "Existing and New", - "initial_waiting_seconds", 5, - "snapshot_isolation", "Snapshot")); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, DB_NAME) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationConfig) - .put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))) - .build()); - - dslContext = DSLContextFactory.create(DataSourceFactory.create( - container.getUsername(), - container.getPassword(), - container.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - final Database database = new Database(dslContext); - - executeQuery("CREATE DATABASE " + DB_NAME + ";"); - executeQuery("ALTER DATABASE " + DB_NAME + "\n\tSET ALLOW_SNAPSHOT_ISOLATION ON"); - executeQuery("USE " + DB_NAME + "\n" + "EXEC sys.sp_cdc_enable_db"); - - return database; - } - - private void executeQuery(final String query) { - try { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected Database setupDatabase() { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest", "withAgent") + .withSnapshotIsolation() + .withCdc(); + return testdb.getDatabase(); } @Override @@ -81,39 +33,39 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc } private void enableCdcOnAllTables() { - executeQuery("USE " + DB_NAME + "\n" - + "DECLARE @TableName VARCHAR(100)\n" - + "DECLARE @TableSchema VARCHAR(100)\n" - + "DECLARE CDC_Cursor CURSOR FOR\n" - + " SELECT * FROM ( \n" - + " SELECT Name,SCHEMA_NAME(schema_id) AS TableSchema\n" - + " FROM sys.objects\n" - + " WHERE type = 'u'\n" - + " AND is_ms_shipped <> 1\n" - + " ) CDC\n" - + "OPEN CDC_Cursor\n" - + "FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema\n" - + "WHILE @@FETCH_STATUS = 0\n" - + " BEGIN\n" - + " DECLARE @SQL NVARCHAR(1000)\n" - + " DECLARE @CDC_Status TINYINT\n" - + " SET @CDC_Status=(SELECT COUNT(*)\n" - + " FROM cdc.change_tables\n" - + " WHERE Source_object_id = OBJECT_ID(@TableSchema+'.'+@TableName))\n" - + " --IF CDC is not enabled on Table, Enable CDC\n" - + " IF @CDC_Status <> 1\n" - + " BEGIN\n" - + " SET @SQL='EXEC sys.sp_cdc_enable_table\n" - + " @source_schema = '''+@TableSchema+''',\n" - + " @source_name = ''' + @TableName\n" - + " + ''',\n" - + " @role_name = null;'\n" - + " EXEC sp_executesql @SQL\n" - + " END\n" - + " FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema\n" - + "END\n" - + "CLOSE CDC_Cursor\n" - + "DEALLOCATE CDC_Cursor"); + testdb.with(""" + DECLARE @TableName VARCHAR(100) + DECLARE @TableSchema VARCHAR(100) + DECLARE CDC_Cursor CURSOR FOR + SELECT * FROM ( + SELECT Name,SCHEMA_NAME(schema_id) AS TableSchema + FROM sys.objects + WHERE type = 'u' + AND is_ms_shipped <> 1 + ) CDC + OPEN CDC_Cursor + FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema + WHILE @@FETCH_STATUS = 0 + BEGIN + DECLARE @SQL NVARCHAR(1000) + DECLARE @CDC_Status TINYINT + SET @CDC_Status=(SELECT COUNT(*) + FROM cdc.change_tables + WHERE Source_object_id = OBJECT_ID(@TableSchema+'.'+@TableName)) + --IF CDC is not enabled on Table, Enable CDC + IF @CDC_Status <> 1 + BEGIN + SET @SQL='EXEC sys.sp_cdc_enable_table + @source_schema = '''+@TableSchema+''', + @source_name = ''' + @TableName + + ''', + @role_name = null;' + EXEC sp_executesql @SQL + END + FETCH NEXT FROM CDC_Cursor INTO @TableName,@TableSchema + END + CLOSE CDC_Cursor + DEALLOCATE CDC_Cursor"""); } @Override diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java index 90f0095602d6..526ea54602ab 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceAcceptanceTest.java @@ -5,19 +5,10 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -25,61 +16,28 @@ import io.airbyte.protocol.models.v0.ConnectorSpecification; import java.sql.SQLException; import java.util.HashMap; -import java.util.Map; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class MssqlSourceAcceptanceTest extends SourceAcceptanceTest { protected static final String SCHEMA_NAME = "dbo"; protected static final String STREAM_NAME = "id_and_name"; - protected static MSSQLServerContainer db; - protected JsonNode config; - @AfterAll - public static void closeContainer() { - if (db != null) { - db.close(); - db.stop(); - } - } + protected MsSQLTestDatabase testdb; @Override protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { - if (db == null) { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - db.start(); - } - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(db)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(db)) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES " + - "(1,'picard', '2124-03-04T01:01:01Z'), " + - "(2, 'crusher', '2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); } @Override - protected void tearDown(final TestDestinationEnv testEnv) throws Exception {} + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); + } @Override protected String getImageName() { @@ -93,7 +51,9 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .build(); } @Override @@ -111,19 +71,4 @@ protected JsonNode getState() { return Jsons.jsonNode(new HashMap<>()); } - private static DSLContext getDslContext(final JsonNode config) { - return DSLContextFactory.create(DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - db.getHost(), - db.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - } - - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); - } - } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java index be6d6d9167a0..93abd3355758 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/MssqlSourceDatatypeTest.java @@ -5,70 +5,21 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import java.util.Map; -import org.jooq.DSLContext; -import org.testcontainers.containers.MSSQLServerContainer; public class MssqlSourceDatatypeTest extends AbstractMssqlSourceDatatypeTest { @Override - protected Database setupDatabase() throws Exception { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest") - .acceptLicense(); - container.start(); - - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .build()); - - dslContext = getDslContext(configWithoutDbName); - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", DB_NAME)); - ctx.fetch(String.format("USE %s;", DB_NAME)); - return null; - }); - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, DB_NAME); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); - - return database; - } - - private static DSLContext getDslContext(final JsonNode config) { - return DSLContextFactory.create(DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")), null); - } - - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); + protected Database setupDatabase() { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + return testdb.getDatabase(); } @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.stop(); - container.close(); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java index 6db7a7a48bff..397b36494870 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-integration/java/io/airbyte/integrations/source/mssql/SslEnabledMssqlSourceAcceptanceTest.java @@ -5,80 +5,32 @@ package io.airbyte.integrations.source.mssql; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import java.sql.SQLException; import java.util.Map; -import org.apache.commons.lang3.RandomStringUtils; -import org.jooq.DSLContext; -import org.junit.jupiter.api.AfterAll; -import org.testcontainers.containers.MSSQLServerContainer; public class SslEnabledMssqlSourceAcceptanceTest extends MssqlSourceAcceptanceTest { - @AfterAll - public static void closeContainer() { - if (db != null) { - db.close(); - db.stop(); - } - } - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws SQLException { - if (db == null) { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04").acceptLicense(); - db.start(); - } - - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(db)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(db)) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - final String dbName = "db_" + RandomStringUtils.randomAlphabetic(10).toLowerCase(); - - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES " + - "(1,'picard', '2124-03-04T01:01:01Z'), " + - "(2, 'crusher', '2124-03-04T01:01:01Z'), " + - "(3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "encrypted_trust_server_certificate"))); - } - - private DSLContext getDslContext(final JsonNode baseConfig) { - return DSLContextFactory.create( - baseConfig.get(JdbcUtils.USERNAME_KEY).asText(), - baseConfig.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;encrypt=true;trustServerCertificate=true;", - db.getHost(), - db.getFirstMappedPort()), - null); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withSsl(Map.of("ssl_method", "encrypted_trust_server_certificate")) + .build(); } - private static Database getDatabase(final DSLContext dslContext) { - return new Database(dslContext); + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MsSQLContainerFactory().shared("mcr.microsoft.com/mssql/server:2022-RTM-CU2-ubuntu-20.04"); + testdb = new MsSQLTestDatabase(container); + testdb = testdb + .withConnectionProperty("encrypt", "true") + .withConnectionProperty("trustServerCertificate", "true") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES " + + "(1, 'picard', '2124-03-04T01:01:01Z'), " + + "(2, 'crusher', '2124-03-04T01:01:01Z'), " + + "(3, 'vash', '2124-03-04T01:01:01Z');"); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java index 29f1e91fc7a5..73ccccee6c35 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java @@ -9,9 +9,9 @@ import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_DEFAULT_CURSOR; import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_EVENT_SERIAL_NO; import static io.airbyte.integrations.source.mssql.MssqlSource.CDC_LSN; -import static io.airbyte.integrations.source.mssql.MssqlSource.DRIVER_CLASS; import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_CDC_OFFSET; import static io.airbyte.integrations.source.mssql.MssqlSource.MSSQL_DB_HISTORY; +import static org.awaitility.Awaitility.await; import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -24,218 +24,157 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.debezium.CdcSourceTest; import io.airbyte.cdk.integrations.debezium.internals.mssql.MssqlCdcTargetPosition; +import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.AirbyteStateMessage; import io.airbyte.protocol.models.v0.AirbyteStream; import io.airbyte.protocol.models.v0.SyncMode; import io.debezium.connector.sqlserver.Lsn; -import java.sql.SQLException; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Optional; import javax.sql.DataSource; -import org.jooq.DSLContext; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.testcontainers.containers.MSSQLServerContainer; +import org.testcontainers.utility.DockerImageName; -public class CdcMssqlSourceTest extends CdcSourceTest { - - private static final String CDC_ROLE_NAME = "cdc_selector"; - private static final String TEST_USER_PASSWORD = "testerjester[1]"; - public static MSSQLServerContainer container; - - private String testUserName; - private String dbName; - private String dbNamewithDot; - private Database database; - private JdbcDatabase testJdbcDatabase; - private MssqlSource source; - private JsonNode config; - private DSLContext dslContext; - private DataSource dataSource; - private DataSource testDataSource; - - @BeforeEach - public void setup() throws SQLException { - init(); - setupTestUser(); - revokeAllPermissions(); - super.setup(); - grantCorrectPermissions(); - } - - @BeforeAll - public static void createContainer() { - if (container == null) { - container = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - container.addEnv("MSSQL_AGENT_ENABLED", "True"); // need this running for cdc to work - container.start(); - } - } +public class CdcMssqlSourceTest extends CdcSourceTest { - @AfterAll - public static void closeContainer() { - if (container != null) { - container.close(); - container.stop(); - } - } - - private void init() { - dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - testUserName = Strings.addRandomSuffix("test", "_", 5).toLowerCase(); - dbNamewithDot = Strings.addRandomSuffix("db", ".", 10).toLowerCase(); - source = new MssqlSource(); - - final JsonNode replicationConfig = Jsons.jsonNode(Map.of( - "method", "CDC", - "data_to_sync", "Existing and New", - "initial_waiting_seconds", INITIAL_WAITING_SECONDS, - "snapshot_isolation", "Snapshot")); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(MODELS_SCHEMA, MODELS_SCHEMA + "_random")) - .put(JdbcUtils.USERNAME_KEY, testUserName) - .put(JdbcUtils.PASSWORD_KEY, TEST_USER_PASSWORD) - .put("replication_method", replicationConfig) - .put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))) - .build()); + static private final String CDC_ROLE_NAME = "cdc_selector"; - dataSource = DataSourceFactory.create( - container.getUsername(), - container.getPassword(), - DRIVER_CLASS, - String.format("jdbc:sqlserver://%s:%d", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")); - - testDataSource = DataSourceFactory.create( - testUserName, - TEST_USER_PASSWORD, - DRIVER_CLASS, - String.format("jdbc:sqlserver://%s:%d", - container.getHost(), - container.getFirstMappedPort()), - Map.of("encrypt", "false")); + static private final String TEST_USER_NAME_PREFIX = "cdc_test_user"; - dslContext = DSLContextFactory.create(dataSource, null); + // Deliberately do not share this test container, as we're going to mutate the global SQL Server + // state. + static private final MSSQLServerContainer UNSHARED_CONTAINER = new MsSQLContainerFactory() + .createNewContainer(DockerImageName.parse("mcr.microsoft.com/mssql/server:2022-latest")); - database = new Database(dslContext); - - testJdbcDatabase = new DefaultJdbcDatabase(testDataSource); - - executeQuery("CREATE DATABASE " + dbName + ";"); - executeQuery("CREATE DATABASE [" + dbNamewithDot + "];"); - switchSnapshotIsolation(true, dbName); - } + private DataSource testDataSource; - private void switchSnapshotIsolation(final Boolean on, final String db) { - final String onOrOff = on ? "ON" : "OFF"; - executeQuery("ALTER DATABASE " + db + "\n\tSET ALLOW_SNAPSHOT_ISOLATION " + onOrOff); + @BeforeAll + static public void beforeAll() { + new MsSQLContainerFactory().withAgent(UNSHARED_CONTAINER); + UNSHARED_CONTAINER.start(); } - private void setupTestUser() { - executeQuery("USE " + dbName); - executeQuery("CREATE LOGIN " + testUserName + " WITH PASSWORD = '" + TEST_USER_PASSWORD + "';"); - executeQuery("CREATE USER " + testUserName + " FOR LOGIN " + testUserName + ";"); + @AfterAll + static void afterAll() { + UNSHARED_CONTAINER.close(); } - private void revokeAllPermissions() { - executeQuery("REVOKE ALL FROM " + testUserName + " CASCADE;"); - executeQuery("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO " + testUserName + ";\""); + private String testUserName() { + return testdb.withNamespace(TEST_USER_NAME_PREFIX); } - private void alterPermissionsOnSchema(final Boolean grant, final String schema) { - final String grantOrRemove = grant ? "GRANT" : "REVOKE"; - executeQuery(String.format("USE %s;\n" + "%s SELECT ON SCHEMA :: [%s] TO %s", dbName, grantOrRemove, schema, testUserName)); + @Override + protected MsSQLTestDatabase createTestDatabase() { + final var testdb = new MsSQLTestDatabase(UNSHARED_CONTAINER); + return testdb + .withConnectionProperty("encrypt", "false") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized() + .withSnapshotIsolation() + .withCdc() + .withWaitUntilAgentRunning(); } - private void grantCorrectPermissions() { - alterPermissionsOnSchema(true, MODELS_SCHEMA); - alterPermissionsOnSchema(true, MODELS_SCHEMA + "_random"); - alterPermissionsOnSchema(true, "cdc"); - executeQuery(String.format("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName)); + @Override + protected MssqlSource source() { + final var source = new MssqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } @Override - public String createSchemaQuery(final String schemaName) { - return "CREATE SCHEMA " + schemaName; + protected JsonNode config() { + return testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testUserName()) + .with(JdbcUtils.PASSWORD_KEY, testdb.getPassword()) + .withSchemas(modelsSchema(), randomSchema()) + .withCdcReplication() + .withoutSsl() + .build(); } - // TODO : Delete this Override when MSSQL supports individual table snapshot @Override - public void newTableSnapshotTest() { - // Do nothing + @BeforeEach + protected void setup() { + super.setup(); + + // Enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access. + final var enableCdcSqlFmt = """ + EXEC sys.sp_cdc_enable_table + \t@source_schema = N'%s', + \t@source_name = N'%s', + \t@role_name = N'%s', + \t@supports_net_changes = 0"""; + testdb + .with(enableCdcSqlFmt, modelsSchema(), MODELS_STREAM_NAME, CDC_ROLE_NAME) + .with(enableCdcSqlFmt, randomSchema(), RANDOM_TABLE_NAME, CDC_ROLE_NAME); + + // Create a test user to be used by the source, with proper permissions. + testdb + .with("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", testUserName(), testdb.getPassword(), testdb.getDatabaseName()) + .with("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", testUserName(), testUserName()) + .with("REVOKE ALL FROM %s CASCADE;", testUserName()) + .with("EXEC sp_msforeachtable \"REVOKE ALL ON '?' TO %s;\"", testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", modelsSchema(), testUserName()) + .with("GRANT SELECT ON SCHEMA :: [%s] TO %s", randomSchema(), testUserName()) + .with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName()) + .with("USE [master]") + .with("GRANT VIEW SERVER STATE TO %s", testUserName()) + .with("USE [%s]", testdb.getDatabaseName()) + .with("EXEC sp_addrolemember N'%s', N'%s';", CDC_ROLE_NAME, testUserName()); + + testDataSource = DataSourceFactory.create( + testUserName(), + testdb.getPassword(), + testdb.getDatabaseDriver().getDriverClassName(), + testdb.getJdbcUrl(), + Map.of("encrypt", "false")); } @Override - protected String randomTableSchema() { - return MODELS_SCHEMA + "_random"; + @AfterEach + protected void tearDown() { + try { + DataSourceFactory.close(testDataSource); + } catch (Exception e) { + throw new RuntimeException(e); + } + super.tearDown(); + } - private void switchCdcOnDatabase(final Boolean enable, final String db) { - final String storedProc = enable ? "sys.sp_cdc_enable_db" : "sys.sp_cdc_disable_db"; - executeQuery("USE [" + db + "]\n" + "EXEC " + storedProc); + private JdbcDatabase testDatabase() { + return new DefaultJdbcDatabase(testDataSource); } + // TODO : Delete this Override when MSSQL supports individual table snapshot @Override - public void createTable(final String schemaName, final String tableName, final String columnClause) { - switchCdcOnDatabase(true, dbName); - super.createTable(schemaName, tableName, columnClause); - - // sometimes seeing an error that we can't enable cdc on a table while sql server agent is still - // spinning up - // solving with a simple while retry loop - boolean failingToStart = true; - int retryNum = 0; - final int maxRetries = 10; - while (failingToStart) { - try { - executeQuery(String.format( - "EXEC sys.sp_cdc_enable_table\n" - + "\t@source_schema = N'%s',\n" - + "\t@source_name = N'%s', \n" - + "\t@role_name = N'%s',\n" - + "\t@supports_net_changes = 0", - schemaName, tableName, CDC_ROLE_NAME)); // enables cdc on MODELS_SCHEMA.MODELS_STREAM_NAME, giving CDC_ROLE_NAME select access - failingToStart = false; - } catch (final Exception e) { - if (retryNum >= maxRetries) { - throw e; - } else { - retryNum++; - try { - Thread.sleep(10000); // 10 seconds - } catch (final InterruptedException ex) { - throw new RuntimeException(ex); - } - } - } - } + public void newTableSnapshotTest() { + // Do nothing } @Override - public String columnClause(final Map columnsWithDataType, final Optional primaryKey) { + protected String columnClause(final Map columnsWithDataType, final Optional primaryKey) { final StringBuilder columnClause = new StringBuilder(); int i = 0; for (final Map.Entry column : columnsWithDataType.entrySet()) { @@ -254,59 +193,42 @@ public String columnClause(final Map columnsWithDataType, final return columnClause.toString(); } - @AfterEach - public void tearDown() { - try { - dslContext.close(); - DataSourceFactory.close(dataSource); - DataSourceFactory.close(testDataSource); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - @Test void testAssertCdcEnabledInDb() { // since we enable cdc in setup, assert that we successfully pass this first - assertDoesNotThrow(() -> source.assertCdcEnabledInDb(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertCdcEnabledInDb(config(), testDatabase())); // then disable cdc and assert the check fails - switchCdcOnDatabase(false, dbName); - assertThrows(RuntimeException.class, () -> source.assertCdcEnabledInDb(config, testJdbcDatabase)); + testdb.withoutCdc(); + assertThrows(RuntimeException.class, () -> source().assertCdcEnabledInDb(config(), testDatabase())); } @Test void testAssertCdcSchemaQueryable() { // correct access granted by setup so assert check passes - assertDoesNotThrow(() -> source.assertCdcSchemaQueryable(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertCdcSchemaQueryable(config(), testDatabase())); // now revoke perms and assert that check fails - alterPermissionsOnSchema(false, "cdc"); - assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class, () -> source.assertCdcSchemaQueryable(config, testJdbcDatabase)); - } - - private void switchSqlServerAgentAndWait(final Boolean start) throws InterruptedException { - final String startOrStop = start ? "START" : "STOP"; - executeQuery(String.format("EXEC xp_servicecontrol N'%s',N'SQLServerAGENT';", startOrStop)); - Thread.sleep(15 * 1000); // 15 seconds to wait for change of agent state + testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + assertThrows(com.microsoft.sqlserver.jdbc.SQLServerException.class, + () -> source().assertCdcSchemaQueryable(config(), testDatabase())); } @Test - void testAssertSqlServerAgentRunning() throws InterruptedException { - executeQuery(String.format("USE master;\n" + "GRANT VIEW SERVER STATE TO %s", testUserName)); + void testAssertSqlServerAgentRunning() { + testdb.withAgentStopped().withWaitUntilAgentStopped(); // assert expected failure if sql server agent stopped - switchSqlServerAgentAndWait(false); - assertThrows(RuntimeException.class, () -> source.assertSqlServerAgentRunning(testJdbcDatabase)); + assertThrows(RuntimeException.class, () -> source().assertSqlServerAgentRunning(testDatabase())); // assert success if sql server agent running - switchSqlServerAgentAndWait(true); - assertDoesNotThrow(() -> source.assertSqlServerAgentRunning(testJdbcDatabase)); + testdb.withAgentStarted().withWaitUntilAgentRunning(); + assertDoesNotThrow(() -> source().assertSqlServerAgentRunning(testDatabase())); } @Test void testAssertSnapshotIsolationAllowed() { // snapshot isolation enabled by setup so assert check passes - assertDoesNotThrow(() -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertSnapshotIsolationAllowed(config(), testDatabase())); // now disable snapshot isolation and assert that check fails - switchSnapshotIsolation(false, dbName); - assertThrows(RuntimeException.class, () -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); + testdb.withoutSnapshotIsolation(); + assertThrows(RuntimeException.class, () -> source().assertSnapshotIsolationAllowed(config(), testDatabase())); } @Test @@ -317,10 +239,11 @@ void testAssertSnapshotIsolationDisabled() { // set snapshot_isolation level to "Read Committed" to disable snapshot .put("snapshot_isolation", "Read Committed") .build()); + final var config = config(); Jsons.replaceNestedValue(config, List.of("replication_method"), replicationConfig); - assertDoesNotThrow(() -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); - switchSnapshotIsolation(false, dbName); - assertDoesNotThrow(() -> source.assertSnapshotIsolationAllowed(config, testJdbcDatabase)); + assertDoesNotThrow(() -> source().assertSnapshotIsolationAllowed(config, testDatabase())); + testdb.withoutSnapshotIsolation(); + assertDoesNotThrow(() -> source().assertSnapshotIsolationAllowed(config, testDatabase())); } // Ensure the CDC check operations are included when CDC is enabled @@ -328,47 +251,52 @@ void testAssertSnapshotIsolationDisabled() { @Test void testCdcCheckOperations() throws Exception { // assertCdcEnabledInDb - switchCdcOnDatabase(false, dbName); - AirbyteConnectionStatus status = getSource().check(getConfig()); + testdb.withoutCdc(); + AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - switchCdcOnDatabase(true, dbName); + testdb.withCdc(); // assertCdcSchemaQueryable - alterPermissionsOnSchema(false, "cdc"); - status = getSource().check(getConfig()); + testdb.with("REVOKE SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - alterPermissionsOnSchema(true, "cdc"); + testdb.with("GRANT SELECT ON SCHEMA :: [cdc] TO %s", testUserName()); + // assertSqlServerAgentRunning - executeQuery(String.format("USE master;\n" + "GRANT VIEW SERVER STATE TO %s", testUserName)); - switchSqlServerAgentAndWait(false); - status = getSource().check(getConfig()); + + testdb.withAgentStopped().withWaitUntilAgentStopped(); + status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - switchSqlServerAgentAndWait(true); + testdb.withAgentStarted().withWaitUntilAgentRunning(); // assertSnapshotIsolationAllowed - switchSnapshotIsolation(false, dbName); - status = getSource().check(getConfig()); + testdb.withoutSnapshotIsolation(); + status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); } @Test void testCdcCheckOperationsWithDot() throws Exception { - // assertCdcEnabledInDb and validate escape with special character - switchCdcOnDatabase(true, dbNamewithDot); - final AirbyteConnectionStatus status = getSource().check(getConfig()); + final String dbNameWithDot = testdb.getDatabaseName().replace("_", "."); + testdb.with("CREATE DATABASE [%s];", dbNameWithDot) + .with("USE [%s]", dbNameWithDot) + .with("EXEC sys.sp_cdc_enable_db;"); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.SUCCEEDED); } // todo: check LSN returned is actually the max LSN // todo: check we fail as expected under certain conditions @Test - void testGetTargetPosition() throws InterruptedException { - Thread.sleep(10 * 1000); // Sleeping because sometimes the db is not yet completely ready and the lsn is not found + void testGetTargetPosition() { // check that getTargetPosition returns higher Lsn after inserting new row - final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testJdbcDatabase, dbName).targetLsn; - executeQuery(String.format("USE %s; INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", - dbName, MODELS_SCHEMA, MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car")); - Thread.sleep(15 * 1000); // 15 seconds to wait for Agent capture job to log cdc change - final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testJdbcDatabase, dbName).targetLsn; - assertTrue(secondLsn.compareTo(firstLsn) > 0); + testdb.withWaitUntilMaxLsnAvailable(); + final Lsn firstLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn; + testdb.with("INSERT INTO %s.%s (%s, %s, %s) VALUES (%s, %s, '%s');", + modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, 910019, 1, "another car"); + // Wait for Agent capture job to log CDC change. + await().atMost(Duration.ofSeconds(45)).until(() -> { + final Lsn secondLsn = MssqlCdcTargetPosition.getTargetPosition(testDatabase(), testdb.getDatabaseName()).targetLsn; + return secondLsn.compareTo(firstLsn) > 0; + }); } @Override @@ -382,24 +310,12 @@ protected void removeCDCColumns(final ObjectNode data) { @Override protected MssqlCdcTargetPosition cdcLatestTargetPosition() { - try { - // Sleeping because sometimes the db is not yet completely ready and the lsn is not found - Thread.sleep(5000); - } catch (final InterruptedException e) { - throw new RuntimeException(e); - } + testdb.withWaitUntilMaxLsnAvailable(); final JdbcDatabase jdbcDatabase = new StreamingJdbcDatabase( - DataSourceFactory.create(config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DRIVER_CLASS, - String.format("jdbc:sqlserver://%s:%s;databaseName=%s;", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asInt(), - dbName), - Map.of("encrypt", "false")), + testDataSource, new MssqlSourceOperations(), AdaptiveStreamingQueryConfig::new); - return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, dbName); + return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, testdb.getDatabaseName()); } @Override @@ -451,21 +367,6 @@ protected void addCdcDefaultCursorField(final AirbyteStream stream) { } } - @Override - protected Source getSource() { - return new MssqlSource(); - } - - @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected Database getDatabase() { - return database; - } - @Override protected void assertExpectedStateMessages(final List stateMessages) { assertEquals(1, stateMessages.size()); diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java index 9c7dc3259757..37bd7ff3c770 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlJdbcSourceAcceptanceTest.java @@ -9,173 +9,117 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; import io.airbyte.protocol.models.v0.CatalogHelpers; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.JDBCType; import java.util.Collections; import java.util.List; -import java.util.Map; -import javax.sql.DataSource; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.MSSQLServerContainer; -public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +public class MssqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; protected static final String PASSWORD_WITHOUT_PERMISSION = "password_3435!"; - private static MSSQLServerContainer dbContainer; - private JsonNode config; - @BeforeAll - static void init() { + static { // In mssql, timestamp is generated automatically, so we need to use // the datetime type instead so that we can set the value manually. COL_TIMESTAMP_TYPE = "DATETIME2"; - - dbContainer = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - dbContainer.start(); } @Override - protected DataSource getDataSource(final JsonNode jdbcConfig) { - final Map connectionProperties = JdbcUtils.parseJdbcParameters(jdbcConfig, JdbcUtils.CONNECTION_PROPERTIES_KEY, - getJdbcParameterDelimiter()); - connectionProperties.put("encrypt", "false"); - return DataSourceFactory.create( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - getDriverClass(), - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - connectionProperties); - } - - @BeforeEach - public void setup() throws Exception { - final JsonNode configWithoutDbName = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, dbContainer.getHost()) - .put(JdbcUtils.PORT_KEY, dbContainer.getFirstMappedPort()) - .put(JdbcUtils.USERNAME_KEY, dbContainer.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, dbContainer.getPassword()) - .build()); - - final DataSource dataSource = DataSourceFactory.create( - configWithoutDbName.get(JdbcUtils.USERNAME_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - configWithoutDbName.get(JdbcUtils.HOST_KEY).asText(), - configWithoutDbName.get(JdbcUtils.PORT_KEY).asInt()), - Map.of("encrypt", "false")); - - try { - final JdbcDatabase database = new DefaultJdbcDatabase(dataSource); - - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - database.execute(ctx -> ctx.createStatement().execute(String.format("CREATE DATABASE %s;", dbName))); - - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); - - super.setup(); - } finally { - DataSourceFactory.close(dataSource); - } - } - - @AfterAll - public static void cleanUp() throws Exception { - dbContainer.close(); + protected JsonNode config() { + return testdb.testConfigBuilder() + .withoutSsl() + .build(); } @Override - public boolean supportsSchemas() { - return true; + protected MssqlSource source() { + return new MssqlSource(); } @Override - public JsonNode getConfig() { - return Jsons.clone(config); + protected MsSQLTestDatabase createTestDatabase() { + return MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest"); } @Override - public AbstractJdbcSource getJdbcSource() { - return new MssqlSource(); + public boolean supportsSchemas() { + return true; } @Override - public String getDriverClass() { - return MssqlSource.DRIVER_CLASS; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "loginTimeout=1"); } @Test void testCheckIncorrectPasswordFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage()); } @Test public void testCheckIncorrectUsernameFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 18456;"), status.getMessage()); } @Test public void testCheckIncorrectHostFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectPortFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectDataBaseFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage()); } @Test public void testUserHasNoPermissionToDataBase() throws Exception { - database.execute(ctx -> ctx.createStatement() - .execute(String.format("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION))); + final var config = config(); + maybeSetShorterConnectionTimeout(config); + testdb.with("CREATE LOGIN %s WITH PASSWORD = '%s'; ", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;")); + assertTrue(status.getMessage().contains("State code: S0001; Error code: 4060;"), status.getMessage()); } @Override diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java index 53637fae11fd..c14c3cad4d61 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/MssqlSourceTest.java @@ -9,17 +9,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; @@ -29,67 +20,44 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.Collections; import java.util.List; -import java.util.Map; -import org.jooq.DSLContext; import org.junit.jupiter.api.*; -import org.testcontainers.containers.MSSQLServerContainer; class MssqlSourceTest { - private static final String DB_NAME = "dbo"; private static final String STREAM_NAME = "id_and_name"; private static final AirbyteCatalog CATALOG = new AirbyteCatalog().withStreams(Lists.newArrayList(CatalogHelpers.createAirbyteStream( STREAM_NAME, - DB_NAME, + "dbo", Field.of("id", JsonSchemaType.INTEGER), Field.of("name", JsonSchemaType.STRING), Field.of("born", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(List.of(List.of("id"))))); - private JsonNode configWithoutDbName; - private JsonNode config; - - private static MSSQLServerContainer db; - - @BeforeAll - static void init() { - db = new MSSQLServerContainer<>("mcr.microsoft.com/mssql/server:2019-latest").acceptLicense(); - db.start(); - } + private MsSQLTestDatabase testdb; // how to interact with the mssql test container manaully. // 1. exec into mssql container (not the test container container) // 2. /opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P "A_Str0ng_Required_Password" @BeforeEach - void setup() throws SQLException { - configWithoutDbName = getConfig(db); - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); - - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("CREATE DATABASE %s;", dbName)); - ctx.fetch(String.format("USE %s;", dbName)); - ctx.fetch("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', '2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); - return null; - }); - } + void setup() { + testdb = MsSQLTestDatabase.in("mcr.microsoft.com/mssql/server:2022-latest") + .with("CREATE TABLE id_and_name(id INTEGER NOT NULL, name VARCHAR(200), born DATETIMEOFFSET(7));") + .with("INSERT INTO id_and_name (id, name, born) VALUES (1,'picard', '2124-03-04T01:01:01Z'), (2, 'crusher', " + + "'2124-03-04T01:01:01Z'), (3, 'vash', '2124-03-04T01:01:01Z');"); + } - config = Jsons.clone(configWithoutDbName); - ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, dbName); - ((ObjectNode) config).put("ssl_method", Jsons.jsonNode(Map.of("ssl_method", "unencrypted"))); + @AfterEach + void cleanUp() { + testdb.close(); } - @AfterAll - static void cleanUp() { - db.stop(); - db.close(); + private JsonNode getConfig() { + return testdb.testConfigBuilder() + .withoutSsl() + .build(); } // if a column in mssql is used as a primary key and in a separate index the discover query returns @@ -97,82 +65,43 @@ static void cleanUp() { // this tests that this de-duplication is successful. @Test void testDiscoverWithPk() throws Exception { - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("USE %s;", config.get(JdbcUtils.DATABASE_KEY))); - ctx.execute("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);"); - ctx.execute("CREATE INDEX i1 ON id_and_name (id);"); - return null; - }); - } - - final AirbyteCatalog actual = new MssqlSource().discover(config); + testdb + .with("ALTER TABLE id_and_name ADD CONSTRAINT i3pk PRIMARY KEY CLUSTERED (id);") + .with("CREATE INDEX i1 ON id_and_name (id);"); + final AirbyteCatalog actual = new MssqlSource().discover(getConfig()); assertEquals(CATALOG, actual); } @Test @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") public void testTableWithNullCursorValueShouldThrowException() throws Exception { - try (final DSLContext dslContext = getDslContext(configWithoutDbName)) { - final Database database = getDatabase(dslContext); - database.query(ctx -> { - ctx.fetch(String.format("USE %s;", config.get(JdbcUtils.DATABASE_KEY))); - ctx.execute("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL"); - ctx.execute("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)"); - return null; - }); - - ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode( - SyncMode.INCREMENTAL) - .withCursorField(Lists.newArrayList("id")) - .withDestinationSyncMode(DestinationSyncMode.APPEND) - .withSyncMode(SyncMode.INCREMENTAL) - .withStream(CatalogHelpers.createAirbyteStream( - STREAM_NAME, - DB_NAME, - Field.of("id", JsonSchemaType.INTEGER), - Field.of("name", JsonSchemaType.STRING), - Field.of("born", JsonSchemaType.STRING)) - .withSupportedSyncModes( - Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of("id")))); - - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams( - Collections.singletonList(configuredAirbyteStream)); - - final Throwable throwable = catchThrowable(() -> MoreIterators.toSet( - new MssqlSource().read(config, catalog, null))); - assertThat(throwable).isInstanceOf(ConfigErrorException.class) - .hasMessageContaining( - "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}"); - } - } - - private JsonNode getConfig(final MSSQLServerContainer db) { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, db.getHost()) - .put(JdbcUtils.PORT_KEY, db.getFirstMappedPort()) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .build()); - } - - private static DSLContext getDslContext(final JsonNode config) { - return DSLContextFactory.create(DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MSSQLSERVER.getDriverClassName(), - String.format("jdbc:sqlserver://%s:%d;", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asInt()), - Map.of("encrypt", "false")), null); - } - - public static Database getDatabase(final DSLContext dslContext) { - // todo (cgardens) - rework this abstraction so that we do not have to pass a null into the - // constructor. at least explicitly handle it, even if the impl doesn't change. - return new Database(dslContext); + testdb + .with("ALTER TABLE id_and_name ALTER COLUMN id INTEGER NULL") + .with("INSERT INTO id_and_name(id) VALUES (7), (8), (NULL)"); + + ConfiguredAirbyteStream configuredAirbyteStream = new ConfiguredAirbyteStream().withSyncMode( + SyncMode.INCREMENTAL) + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + STREAM_NAME, + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.INTEGER), + Field.of("name", JsonSchemaType.STRING), + Field.of("born", JsonSchemaType.STRING)) + .withSupportedSyncModes( + Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams( + Collections.singletonList(configuredAirbyteStream)); + + final Throwable throwable = catchThrowable(() -> MoreIterators.toSet( + new MssqlSource().read(getConfig(), catalog, null))); + assertThat(throwable).isInstanceOf(ConfigErrorException.class) + .hasMessageContaining( + "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='dbo.id_and_name', cursorColumnName='id', cursorSqlType=INTEGER, cause=Cursor column contains NULL value}"); } } diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java new file mode 100644 index 000000000000..1a44218cc5c5 --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLContainerFactory.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.testutils.ContainerFactory; +import org.testcontainers.containers.MSSQLServerContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +public class MsSQLContainerFactory implements ContainerFactory> { + + @Override + public MSSQLServerContainer createNewContainer(DockerImageName imageName) { + return new MSSQLServerContainer<>(imageName.asCompatibleSubstituteFor("mcr.microsoft.com/mssql/server")).acceptLicense(); + } + + @Override + public Class getContainerClass() { + return MSSQLServerContainer.class; + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(MSSQLServerContainer container) { + container.withNetwork(Network.newNetwork()); + } + + public void withAgent(MSSQLServerContainer container) { + container.addEnv("MSSQL_AGENT_ENABLED", "True"); + } + +} diff --git a/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java new file mode 100644 index 000000000000..3060ea513adc --- /dev/null +++ b/airbyte-integrations/connectors/source-mssql/src/testFixtures/java/io/airbyte/integrations/source/mssql/MsSQLTestDatabase.java @@ -0,0 +1,211 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mssql; + +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.testutils.TestDatabase; +import io.debezium.connector.sqlserver.Lsn; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.MSSQLServerContainer; + +public class MsSQLTestDatabase extends TestDatabase, MsSQLTestDatabase, MsSQLTestDatabase.MsSQLConfigBuilder> { + + static private final Logger LOGGER = LoggerFactory.getLogger(MsSQLTestDatabase.class); + + static public final int MAX_RETRIES = 60; + + static public MsSQLTestDatabase in(String imageName, String... methods) { + final var container = new MsSQLContainerFactory().shared(imageName, methods); + final var testdb = new MsSQLTestDatabase(container); + return testdb + .withConnectionProperty("encrypt", "false") + .withConnectionProperty("databaseName", testdb.getDatabaseName()) + .initialized(); + } + + public MsSQLTestDatabase(MSSQLServerContainer container) { + super(container); + } + + public MsSQLTestDatabase withSnapshotIsolation() { + return with("ALTER DATABASE %s SET ALLOW_SNAPSHOT_ISOLATION ON;", getDatabaseName()); + } + + public MsSQLTestDatabase withoutSnapshotIsolation() { + return with("ALTER DATABASE %s SET ALLOW_SNAPSHOT_ISOLATION OFF;", getDatabaseName()); + } + + public MsSQLTestDatabase withCdc() { + return with("EXEC sys.sp_cdc_enable_db;"); + } + + public MsSQLTestDatabase withoutCdc() { + return with("EXEC sys.sp_cdc_disable_db;"); + } + + public MsSQLTestDatabase withAgentStarted() { + return with("EXEC master.dbo.xp_servicecontrol N'START', N'SQLServerAGENT';"); + } + + public MsSQLTestDatabase withAgentStopped() { + return with("EXEC master.dbo.xp_servicecontrol N'STOP', N'SQLServerAGENT';"); + } + + public MsSQLTestDatabase withWaitUntilAgentRunning() { + waitForAgentState(true); + return self(); + } + + public MsSQLTestDatabase withWaitUntilAgentStopped() { + waitForAgentState(false); + return self(); + } + + private void waitForAgentState(final boolean running) { + final String expectedValue = running ? "Running." : "Stopped."; + LOGGER.debug("Waiting for SQLServerAgent state to change to '{}'.", expectedValue); + for (int i = 0; i < MAX_RETRIES; i++) { + try { + final var r = query(ctx -> ctx.fetch("EXEC master.dbo.xp_servicecontrol 'QueryState', N'SQLServerAGENT';").get(0)); + if (expectedValue.equalsIgnoreCase(r.getValue(0).toString())) { + LOGGER.debug("SQLServerAgent state is '{}', as expected.", expectedValue); + return; + } + LOGGER.debug("Retrying, SQLServerAgent state {} does not match expected '{}'.", r, expectedValue); + } catch (SQLException e) { + LOGGER.debug("Retrying agent state query after catching exception {}.", e.getMessage()); + } + try { + Thread.sleep(1_000); // Wait one second between retries. + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException("Exhausted retry attempts while polling for agent state"); + } + + public MsSQLTestDatabase withWaitUntilMaxLsnAvailable() { + LOGGER.debug("Waiting for max LSN to become available for database {}.", getDatabaseName()); + for (int i = 0; i < MAX_RETRIES; i++) { + try { + final var maxLSN = query(ctx -> ctx.fetch("SELECT sys.fn_cdc_get_max_lsn();").get(0).get(0, byte[].class)); + if (maxLSN != null) { + LOGGER.debug("Max LSN available for database {}: {}", getDatabaseName(), Lsn.valueOf(maxLSN)); + return self(); + } + LOGGER.debug("Retrying, max LSN still not available for database {}.", getDatabaseName()); + } catch (SQLException e) { + LOGGER.warn("Retrying max LSN query after catching exception {}", e.getMessage()); + } + try { + Thread.sleep(1_000); // Wait one second between retries. + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + throw new RuntimeException("Exhausted retry attempts while polling for max LSN availability"); + } + + @Override + public String getPassword() { + return "S00p3rS33kr3tP4ssw0rd!"; + } + + @Override + public String getJdbcUrl() { + return String.format("jdbc:sqlserver://%s:%d", getContainer().getHost(), getContainer().getFirstMappedPort()); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of( + mssqlCmd(Stream.of(String.format("CREATE DATABASE %s", getDatabaseName()))), + mssqlCmd(Stream.of( + String.format("USE %s", getDatabaseName()), + String.format("CREATE LOGIN %s WITH PASSWORD = '%s', DEFAULT_DATABASE = %s", getUserName(), getPassword(), getDatabaseName()), + String.format("ALTER SERVER ROLE [sysadmin] ADD MEMBER %s", getUserName()), + String.format("CREATE USER %s FOR LOGIN %s WITH DEFAULT_SCHEMA = [dbo]", getUserName(), getUserName()), + String.format("ALTER ROLE [db_owner] ADD MEMBER %s", getUserName())))); + } + + /** + * Don't drop anything when closing the test database. Instead, if cleanup is required, call + * {@link #dropDatabaseAndUser()} explicitly. Implicit cleanups may result in deadlocks and so + * aren't really worth it. + */ + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + public void dropDatabaseAndUser() { + execInContainer(mssqlCmd(Stream.of( + String.format("USE master"), + String.format("ALTER DATABASE %s SET single_user WITH ROLLBACK IMMEDIATE", getDatabaseName()), + String.format("DROP DATABASE %s", getDatabaseName())))); + } + + public Stream mssqlCmd(Stream sql) { + return Stream.of("/opt/mssql-tools/bin/sqlcmd", + "-U", getContainer().getUsername(), + "-P", getContainer().getPassword(), + "-Q", sql.collect(Collectors.joining("; ")), + "-b", "-e"); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.MSSQLSERVER; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.DEFAULT; + } + + @Override + public MsSQLConfigBuilder configBuilder() { + return new MsSQLConfigBuilder(this); + } + + static public class MsSQLConfigBuilder extends ConfigBuilder { + + protected MsSQLConfigBuilder(MsSQLTestDatabase testDatabase) { + super(testDatabase); + } + + public MsSQLConfigBuilder withCdcReplication() { + return with("replication_method", Map.of( + "method", "CDC", + "data_to_sync", "Existing and New", + "initial_waiting_seconds", DEFAULT_CDC_REPLICATION_INITIAL_WAIT.getSeconds(), + "snapshot_isolation", "Snapshot")); + } + + public MsSQLConfigBuilder withSchemas(String... schemas) { + return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas)); + } + + @Override + public MsSQLConfigBuilder withoutSsl() { + return withSsl(Map.of("ssl_method", "unencrypted")); + } + + @Override + public MsSQLConfigBuilder withSsl(Map sslMode) { + return with("ssl_method", sslMode); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle b/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle index a94da305be1e..61323c304a86 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/build.gradle @@ -4,12 +4,12 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() + configurations.all { resolutionStrategy { @@ -26,6 +26,7 @@ dependencies { implementation project(':airbyte-integrations:connectors:source-mysql') implementation libs.jooq + testImplementation testFixtures(project(':airbyte-integrations:connectors:source-mysql')) testImplementation libs.junit.jupiter.system.stubs testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation libs.testcontainers.mysql diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties b/airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml b/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml index e7aa7fb15b86..7101b9be25a2 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/metadata.yaml @@ -11,7 +11,7 @@ data: connectorSubtype: database connectorType: source definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad - dockerImageTag: 3.1.7 + dockerImageTag: 3.1.8 dockerRepository: airbyte/source-mysql-strict-encrypt githubIssueLabel: source-mysql icon: mysql.svg diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java index aff5350f30d7..05583c81ddaa 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/main/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSource.java @@ -41,7 +41,11 @@ public class MySqlStrictEncryptSource extends SpecModifyingSource implements Sou "
  • Verify Identity - Always connect with SSL. Verify both CA and Hostname.
  • Read more in the docs."; MySqlStrictEncryptSource() { - super(MySqlSource.sshWrappedSource()); + this(new MySqlSource()); + } + + MySqlStrictEncryptSource(MySqlSource source) { + super(MySqlSource.sshWrappedSource(source)); } @Override diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java deleted file mode 100644 index d69c5a0ff714..000000000000 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.source.mysql_strict_encrypt; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.MySqlUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.ssh.SshHelpers; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; - -public abstract class AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest extends MySqlStrictEncryptSourceAcceptanceTest { - - protected static MySqlUtils.Certificate certs; - protected static final String PASSWORD = "Passw0rd"; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - addTestData(container); - certs = MySqlUtils.getCertificate(container, true); - - final var sslMode = getSslConfig(); - final var innerContainerAddress = SshHelpers.getInnerContainerAddress(container); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, innerContainerAddress.left) - .put(JdbcUtils.PORT_KEY, innerContainerAddress.right) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - } - - public abstract ImmutableMap getSslConfig(); - - private void addTestData(final MySQLContainer container) throws Exception { - final var outerContainerAddress = SshHelpers.getOuterContainerAddress(container); - try (final DSLContext dslContext = DSLContextFactory.create( - container.getUsername(), - container.getPassword(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s", - outerContainerAddress.left, - outerContainerAddress.right, - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } - } - -} diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java index 5673d28039c1..c0efd449d2c0 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest.java @@ -4,17 +4,29 @@ package io.airbyte.integrations.source.mysql_strict_encrypt; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.JdbcUtils; +import java.util.stream.Stream; -public class MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest extends AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest { +public class MySqlSslCaCertificateStrictEncryptSourceAcceptanceTest extends MySqlStrictEncryptSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; + + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates"); + } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCaCertificate()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java index 332d95266fcc..6df92b5e507d 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest.java @@ -4,19 +4,31 @@ package io.airbyte.integrations.source.mysql_strict_encrypt; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.JdbcUtils; +import java.util.stream.Stream; -public class MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest extends AbstractMySqlSslCertificateStrictEncryptSourceAcceptanceTest { +public class MySqlSslFullCertificateStrictEncryptSourceAcceptanceTest extends MySqlStrictEncryptSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; + + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates", "withClientCertificate"); + } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java index 90ae08a90fa0..35fafab62790 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSourceAcceptanceTest.java @@ -4,22 +4,19 @@ package io.airbyte.integrations.source.mysql_strict_encrypt; -import static io.airbyte.integrations.source.mysql.MySqlSource.SSL_PARAMETERS; - import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -29,72 +26,40 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.util.HashMap; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) +import java.util.stream.Stream; + public class MySqlStrictEncryptSourceAcceptanceTest extends SourceAcceptanceTest { - @SystemStub - public EnvironmentVariables environmentVariables; private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "public.starships"; - protected MySQLContainer container; - protected JsonNode config; + protected MySQLTestDatabase testdb; @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); + protected void setupEnvironment(final TestDestinationEnv environment) { + final var container = new MySQLContainerFactory().shared("mysql:8.0", extraContainerFactoryMethods().toArray(String[]::new)); + testdb = new MySQLTestDatabase(container) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); + } - var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "required") - .build(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - - try (final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s?%s", - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText(), - String.join("&", SSL_PARAMETERS)), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } + protected Stream extraContainerFactoryMethods() { + return Stream.empty(); + } + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + testdb.close(); } @Override @@ -109,7 +74,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSsl(ImmutableMap.of(JdbcUtils.MODE_KEY, "required")) + .withStandardReplication() + .build(); } @Override @@ -120,7 +88,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), @@ -129,7 +97,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME2), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME2), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java index 7f42cdf5040a..a0381a0dc7c9 100644 --- a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptJdbcSourceAcceptanceTest.java @@ -8,43 +8,33 @@ * Copyright (c) 2023 Airbyte, Inc., all rights reserved. */ -import static io.airbyte.integrations.source.mysql.MySqlSource.SSL_PARAMETERS; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.STATE_TYPE_KEY; import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.MySqlUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.Source; -import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; -import io.airbyte.cdk.integrations.base.ssh.SshTunnel; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.integrations.source.mysql.MySqlSource; import io.airbyte.integrations.source.mysql.internal.models.CursorBasedStatus; import io.airbyte.integrations.source.mysql.internal.models.InternalModels.StateType; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; -import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.AirbyteMessage.Type; import io.airbyte.protocol.models.v0.AirbyteRecordMessage; @@ -59,128 +49,52 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import org.testcontainers.containers.Network; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) -class MySqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - - @SystemStub - private EnvironmentVariables environmentVariables; - - protected static final String TEST_USER = "test"; - protected static final String TEST_PASSWORD = "test"; - protected static MySQLContainer container; - private static final SshBastionContainer bastion = new SshBastionContainer(); - private static final Network network = Network.newNetwork(); - - protected Database database; - protected DSLContext dslContext; - - @BeforeAll - static void init() throws SQLException { - container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD); - container.start(); - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", container.getPassword()); - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - } - @BeforeEach - public void setup() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, Strings.addRandomSuffix("db", "_", 10)) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s?%s", - container.getHost(), - container.getFirstMappedPort(), - String.join("&", SSL_PARAMETERS)), - SQLDialect.MYSQL); - database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - return null; - }); +class MySqlStrictEncryptJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - super.setup(); - } - - @AfterEach - void tearDownMySql() throws Exception { - dslContext.close(); - super.tearDown(); - } - - @AfterAll - static void cleanUp() { - container.close(); - } - - // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public boolean supportsSchemas() { - return false; + protected JsonNode config() { + return testdb.testConfigBuilder().build(); } @Override - public MySqlSource getJdbcSource() { - return new MySqlSource(); + protected MySqlStrictEncryptSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return new MySqlStrictEncryptSource(source); } @Override - public Source getSource() { - return new MySqlStrictEncryptSource(); + protected MySQLTestDatabase createTestDatabase() { + final var container = new MySQLContainerFactory().shared("mysql:8.0"); + return new MySQLTestDatabase(container) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized(); } @Override - public String getDriverClass() { - return MySqlSource.DRIVER_CLASS; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1000"); } + // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public JsonNode getConfig() { - return Jsons.clone(config); + public boolean supportsSchemas() { + return false; } @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = SshHelpers.injectSshIntoSpec(Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class)); assertEquals(expected, actual); @@ -216,176 +130,42 @@ protected AirbyteCatalog getCatalog(final String defaultNamespace) { List.of(List.of(COL_FIRST_NAME), List.of(COL_LAST_NAME))))); } - @Test - void testStrictSSLUnsecuredNoTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "preferred") - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "NO_TUNNEL") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertTrue(actual.getMessage().contains("Unsecured connection not allowed")); - } - - @Test - void testStrictSSLSecuredNoTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "NO_TUNNEL") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertFalse(actual.getMessage().contains("Unsecured connection not allowed")); - } - - @Test - void testStrictSSLSecuredWithTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "SSH_KEY_AUTH") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration.")); - } - - @Test - void testStrictSSLUnsecuredWithTunnel() throws Exception { - final String PASSWORD = "Passw0rd"; - final var certs = MySqlUtils.getCertificate(container, true); - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "preferred") - .build(); - - final var tunnelMode = ImmutableMap.builder() - .put("tunnel_method", "SSH_KEY_AUTH") - .build(); - ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake") - .put(JdbcUtils.SSL_KEY, true) - .putIfAbsent(JdbcUtils.SSL_MODE_KEY, Jsons.jsonNode(sslMode)); - ((ObjectNode) config).putIfAbsent("tunnel_method", Jsons.jsonNode(tunnelMode)); - - final AirbyteConnectionStatus actual = source.check(config); - assertEquals(Status.FAILED, actual.getStatus()); - assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration.")); - } - - @Test - void testCheckWithSSlModeDisabled() throws Exception { - try (final MySQLContainer db = new MySQLContainer<>("mysql:8.0").withNetwork(network)) { - bastion.initAndStartBastion(network); - db.start(); - final JsonNode configWithSSLModeDisabled = bastion.getTunnelConfig(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, Objects.requireNonNull(db.getContainerInfo() - .getNetworkSettings() - .getNetworks() - .entrySet().stream() - .findFirst() - .get().getValue().getIpAddress())) - .put(JdbcUtils.PORT_KEY, db.getExposedPorts().get(0)) - .put(JdbcUtils.DATABASE_KEY, db.getDatabaseName()) - .put(JdbcUtils.SCHEMAS_KEY, List.of("public")) - .put(JdbcUtils.USERNAME_KEY, db.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) - .put(JdbcUtils.SSL_MODE_KEY, Map.of(JdbcUtils.MODE_KEY, "disable")), false); - - final AirbyteConnectionStatus actual = source.check(configWithSSLModeDisabled); - assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus()); - } finally { - bastion.stopAndClose(); - } - } - @Test void testReadMultipleTablesIncrementally() throws Exception { + final var config = config(); ((ObjectNode) config).put("sync_checkpoint_records", 1); final String namespace = getDefaultNamespace(); final String streamOneName = TABLE_NAME + "one"; // Create a fresh first table - database.query(connection -> { - connection.fetch(String.format("USE %s;", getDefaultNamespace())); - connection.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at VARCHAR(200) NOT NULL\n" - + ");", streamOneName)); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - return null; - }); + testdb.with(""" + CREATE TABLE %s ( + id int PRIMARY KEY, + name VARCHAR(200) NOT NULL, + updated_at VARCHAR(200) NOT NULL + );""", streamOneName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", + getFullyQualifiedTableName(streamOneName)); // Create a fresh second table final String streamTwoName = TABLE_NAME + "two"; final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); // Insert records into second table - database.query(ctx -> { - ctx.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at DATE NOT NULL\n" - + ");", streamTwoName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (40,'Jean Luc','2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (41, 'Groot', '2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (42, 'Thanos','2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with(""" + CREATE TABLE %s ( + id int PRIMARY KEY, + name VARCHAR(200) NOT NULL, + updated_at DATE NOT NULL + );""", streamTwoName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (40,'Jean Luc','2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')", + streamTwoFullyQualifiedName); // Create records list that we expect to see in the state message final List streamTwoExpectedRecords = Arrays.asList( createRecord(streamTwoName, namespace, ImmutableMap.of( @@ -416,7 +196,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Perform initial sync final List messagesFromFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, null)); + .toList(source().read(config, configuredCatalog, null)); final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); @@ -483,7 +263,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // - stream two state being the Primary Key state before the final emitted state before the cursor // switch final List messagesFromSecondSyncWithMixedStates = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); @@ -510,21 +290,13 @@ void testReadMultipleTablesIncrementally() throws Exception { // Add some data to each table and perform a third read. // Expect to see all records be synced via cursorBased method and not primaryKey - - database.query(ctx -> { - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (4,'Hooper','2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (43, 'Iron Man', '2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with("INSERT INTO %s(id, name, updated_at) VALUES (4,'Hooper','2006-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')", + streamTwoFullyQualifiedName); final List messagesFromThirdSync = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); @@ -615,13 +387,13 @@ protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configure protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_5, COL_NAME, "data", @@ -629,7 +401,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String final DbStreamState state = new CursorBasedStatus() .withStateType(StateType.CURSOR_BASED) .withVersion(2L) - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5") @@ -641,7 +413,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String @Override protected List getTestMessages() { - return getTestMessages(streamName); + return getTestMessages(streamName()); } protected List getTestMessages(final String streamName) { diff --git a/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java new file mode 100644 index 000000000000..66f6713dabec --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql-strict-encrypt/src/test/java/io/airbyte/integrations/source/mysql_strict_encrypt/MySqlStrictEncryptSslTest.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql_strict_encrypt; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +@Execution(ExecutionMode.CONCURRENT) +public class MySqlStrictEncryptSslTest { + + private MySQLTestDatabase createTestDatabase(String... containerFactoryMethods) { + final var container = new MySQLContainerFactory().shared("mysql:8.0", containerFactoryMethods); + return new MySQLTestDatabase(container) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized(); + } + + @Test + void testStrictSSLUnsecuredNoTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build()) + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "preferred") + .build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Unsecured connection not allowed"), actual.getMessage()); + } + } + + @Test + void testStrictSSLSecuredNoTunnel() throws Exception { + final String PASSWORD = "Passw0rd"; + try (final var testdb = createTestDatabase("withRootAndServerCertificates", "withClientCertificate")) { + final var config = testdb.testConfigBuilder() + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "NO_TUNNEL").build()) + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Failed to create keystore for Client certificate"), actual.getMessage()); + } + } + + @Test + void testStrictSSLSecuredWithTunnel() throws Exception { + final String PASSWORD = "Passw0rd"; + try (final var testdb = createTestDatabase("withRootAndServerCertificates", "withClientCertificate")) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage()); + } + } + + @Test + void testStrictSSLUnsecuredWithTunnel() throws Exception { + try (final var testdb = createTestDatabase()) { + final var config = testdb.configBuilder() + .withHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, testdb.getUserName()) + .with(JdbcUtils.PASSWORD_KEY, "fake") + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "preferred") + .build()) + .with("tunnel_method", ImmutableMap.builder().put("tunnel_method", "SSH_KEY_AUTH").build()) + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.FAILED, actual.getStatus()); + assertTrue(actual.getMessage().contains("Could not connect with provided SSH configuration."), actual.getMessage()); + } + } + + @Test + void testCheckWithSslModeDisabled() throws Exception { + try (final var testdb = createTestDatabase("withNetwork")) { + try (final SshBastionContainer bastion = new SshBastionContainer()) { + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); + final var config = testdb.integrationTestConfigBuilder() + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false)) + .withoutSsl() + .build(); + final AirbyteConnectionStatus actual = new MySqlStrictEncryptSource().check(config); + assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, actual.getStatus()); + } + } + } + +} diff --git a/airbyte-integrations/connectors/source-mysql/build.gradle b/airbyte-integrations/connectors/source-mysql/build.gradle index 95c505b52167..f90f12b2c5d6 100644 --- a/airbyte-integrations/connectors/source-mysql/build.gradle +++ b/airbyte-integrations/connectors/source-mysql/build.gradle @@ -7,7 +7,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.4.1' + cdkVersionRequired = '0.5.0' features = ['db-sources'] useLocalCdk = false } @@ -18,7 +18,7 @@ configurations.all { } } -airbyteJavaConnector.addCdkDependencies() + application { mainClass = 'io.airbyte.integrations.source.mysql.MySqlSource' @@ -35,7 +35,7 @@ dependencies { testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation libs.junit.jupiter.system.stubs testImplementation libs.testcontainers.mysql - + testFixturesImplementation libs.testcontainers.mysql performanceTestJavaImplementation project(':airbyte-integrations:connectors:source-mysql') } diff --git a/airbyte-integrations/connectors/source-mysql/gradle.properties b/airbyte-integrations/connectors/source-mysql/gradle.properties new file mode 100644 index 000000000000..8ef098d20b92 --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql/gradle.properties @@ -0,0 +1 @@ +testExecutionConcurrency=-1 \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-mysql/metadata.yaml b/airbyte-integrations/connectors/source-mysql/metadata.yaml index 58a82a25439c..8443347774a0 100644 --- a/airbyte-integrations/connectors/source-mysql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mysql/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad - dockerImageTag: 3.1.7 + dockerImageTag: 3.1.8 dockerRepository: airbyte/source-mysql documentationUrl: https://docs.airbyte.com/integrations/sources/mysql githubIssueLabel: source-mysql diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java index a0be6d6305e2..15bc34eefdcf 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcProperties.java @@ -26,7 +26,10 @@ public class MySqlCdcProperties { private static final Logger LOGGER = LoggerFactory.getLogger(MySqlCdcProperties.class); - private static final Duration HEARTBEAT_FREQUENCY = Duration.ofSeconds(10); + private static final Duration HEARTBEAT_INTERVAL = Duration.ofSeconds(10L); + + // Test execution latency is lower when heartbeats are more frequent. + private static final Duration HEARTBEAT_INTERVAL_IN_TESTS = Duration.ofSeconds(1L); public static Properties getDebeziumProperties(final JdbcDatabase database) { final JsonNode sourceConfig = database.getSourceConfig(); @@ -61,7 +64,12 @@ private static Properties commonProperties(final JdbcDatabase database) { props.setProperty("converters", "boolean, datetime"); props.setProperty("boolean.type", CustomMySQLTinyIntOneToBooleanConverter.class.getName()); props.setProperty("datetime.type", MySQLDateTimeConverter.class.getName()); - props.setProperty("heartbeat.interval.ms", Long.toString(HEARTBEAT_FREQUENCY.toMillis())); + + final Duration heartbeatInterval = + (database.getSourceConfig().has("is_test") && database.getSourceConfig().get("is_test").asBoolean()) + ? HEARTBEAT_INTERVAL_IN_TESTS + : HEARTBEAT_INTERVAL; + props.setProperty("heartbeat.interval.ms", Long.toString(heartbeatInterval.toMillis())); // For CDC mode, the user cannot provide timezone arguments as JDBC parameters - they are // specifically defined in the replication_method diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java index 6ff8a47884c2..b942d468cdc6 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSource.java @@ -34,7 +34,7 @@ import io.airbyte.cdk.integrations.base.IntegrationRunner; import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.base.ssh.SshWrappedSource; -import io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.JdbcDataSourceUtils; import io.airbyte.cdk.integrations.source.jdbc.JdbcSSLConnectionUtils; @@ -46,8 +46,6 @@ import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory; import io.airbyte.cdk.integrations.util.HostPortResolver; import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.map.MoreMaps; @@ -116,15 +114,12 @@ public class MySqlSource extends AbstractJdbcSource implements Source "useSSL=true", "requireSSL=true"); - private final FeatureFlags featureFlags; - - public static Source sshWrappedSource() { - return new SshWrappedSource(new MySqlSource(), JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + public static Source sshWrappedSource(MySqlSource source) { + return new SshWrappedSource(source, JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); } public MySqlSource() { super(DRIVER_CLASS, MySqlStreamingQueryConfig::new, new MySqlSourceOperations()); - this.featureFlags = new EnvVariableFeatureFlags(); } private static AirbyteStream overrideSyncModes(final AirbyteStream stream) { @@ -182,7 +177,7 @@ public List> getCheckOperations(final J checkOperations.addAll(CdcConfigurationHelper.getCheckOperations()); checkOperations.add(database -> { - FirstRecordWaitTimeUtil.checkFirstRecordWaitTime(config); + RecordWaitTimeUtil.checkFirstRecordWaitTime(config); CdcConfigurationHelper.checkServerTimeZoneConfig(config); }); } @@ -530,7 +525,7 @@ public static Map parseJdbcParameters(final String jdbcPropertie } public static void main(final String[] args) throws Exception { - final Source source = MySqlSource.sshWrappedSource(); + final Source source = MySqlSource.sshWrappedSource(new MySqlSource()); LOGGER.info("starting source: {}", MySqlSource.class); new IntegrationRunner(source).run(args); LOGGER.info("completed source: {}", MySqlSource.class); diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java index fd61596d5b28..8ca08abb0ffd 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialReadUtil.java @@ -17,7 +17,7 @@ import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility; import io.airbyte.cdk.integrations.debezium.AirbyteDebeziumHandler; import io.airbyte.cdk.integrations.debezium.internals.DebeziumPropertiesManager; -import io.airbyte.cdk.integrations.debezium.internals.FirstRecordWaitTimeUtil; +import io.airbyte.cdk.integrations.debezium.internals.RecordWaitTimeUtil; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlCdcPosition; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlCdcTargetPosition; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlDebeziumStateUtil; @@ -84,7 +84,8 @@ public static List> getCdcReadIterators(fi final Instant emittedAt, final String quoteString) { final JsonNode sourceConfig = database.getSourceConfig(); - final Duration firstRecordWaitTime = FirstRecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + final Duration firstRecordWaitTime = RecordWaitTimeUtil.getFirstRecordWaitTime(sourceConfig); + final Duration subsequentRecordWaitTime = RecordWaitTimeUtil.getSubsequentRecordWaitTime(sourceConfig); LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds()); // Determine the streams that need to be loaded via primary key sync. final List> initialLoadIterator = new ArrayList<>(); @@ -149,8 +150,13 @@ public static List> getCdcReadIterators(fi } // Build the incremental CDC iterators. - final AirbyteDebeziumHandler handler = - new AirbyteDebeziumHandler<>(sourceConfig, MySqlCdcTargetPosition.targetPosition(database), true, firstRecordWaitTime, OptionalInt.empty()); + final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>( + sourceConfig, + MySqlCdcTargetPosition.targetPosition(database), + true, + firstRecordWaitTime, + subsequentRecordWaitTime, + OptionalInt.empty()); final Supplier> incrementalIteratorSupplier = () -> handler.getIncrementalIterators(catalog, new MySqlCdcSavedInfoFetcher(stateToBeUsed), diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java index 1ea69c06ccec..14004596d669 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSourceDatatypeTest.java @@ -4,11 +4,11 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import com.fasterxml.jackson.databind.JsonNode; import com.mysql.cj.MysqlType; -import io.airbyte.cdk.db.Database; import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; +import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import java.io.File; import java.io.IOException; @@ -21,31 +21,26 @@ import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.testcontainers.containers.MySQLContainer; public abstract class AbstractMySqlSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { protected static final Logger LOGGER = LoggerFactory.getLogger(AbstractMySqlSourceDatatypeTest.class); - protected MySQLContainer container; - protected JsonNode config; + protected MySQLTestDatabase testdb; @Override - protected JsonNode getConfig() { - return config; + protected String getNameSpace() { + return testdb.getDatabaseName(); } @Override - protected String getImageName() { - return "airbyte/source-mysql:dev"; + protected void tearDown(final TestDestinationEnv testEnv) { + testdb.close(); } @Override - protected abstract Database setupDatabase() throws Exception; - - @Override - protected String getNameSpace() { - return container.getDatabaseName(); + protected String getImageName() { + return "airbyte/source-mysql:dev"; } @Override diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java deleted file mode 100644 index bef3c97c250f..000000000000 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractMySqlSslCertificateSourceAcceptanceTest.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.io.airbyte.integration_tests.sources; - -import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.MySqlUtils; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import java.io.IOException; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; - -public abstract class AbstractMySqlSslCertificateSourceAcceptanceTest extends MySqlSourceAcceptanceTest { - - protected static MySqlUtils.Certificate certs; - protected static final String PASSWORD = "Passw0rd"; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - addTestData(container); - certs = getCertificates(); - - var sslMode = getSslConfig(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - } - - public abstract MySqlUtils.Certificate getCertificates() throws IOException, InterruptedException; - - public abstract ImmutableMap getSslConfig(); - - private void addTestData(MySQLContainer container) throws Exception { - try (final DSLContext dslContext = DSLContextFactory.create( - container.getUsername(), - container.getPassword(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s", - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } - } - -} diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java index c5db1157f5c2..a5d57eeb336c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshMySqlSourceAcceptanceTest.java @@ -10,6 +10,8 @@ import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.protocol.models.Field; @@ -28,19 +30,22 @@ public abstract class AbstractSshMySqlSourceAcceptanceTest extends SourceAccepta private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "starships"; - protected static JsonNode config; + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + private JsonNode config; public abstract Path getConfigFilePath(); @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { + protected void setupEnvironment(final TestDestinationEnv environment) { config = Jsons.deserialize(IOs.readFile(getConfigFilePath())); } @Override - protected void tearDown(final TestDestinationEnv testEnv) { - - } + protected void tearDown(final TestDestinationEnv testEnv) {} @Override protected String getImageName() { diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java index 9d6ad4b2163d..357ccc336ace 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCMySqlDatatypeAccuracyTest.java @@ -4,98 +4,25 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; public class CDCMySqlDatatypeAccuracyTest extends MySqlDatatypeAccuracyTest { - private DSLContext dslContext; - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - super.tearDown(testEnv); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withCdcReplication() + .with("snapshot_mode", "initial_only") + .build(); } @Override - protected Database setupDatabase() throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("snapshot_mode", "initial_only") - .put("is_test", true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL); - final Database database = new Database(dslContext); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - revokeAllPermissions(); - grantCorrectPermissions(); - - return database; - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); - } - - private void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); - } - - private void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected Database setupDatabase() { + testdb = MySQLTestDatabase.in("mysql:8.0").withoutStrictMode().withCdcPermissions(); + return testdb.getDatabase(); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java index 1dc469ed3b60..54f2ea9c1ca1 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcBinlogsMySqlSourceDatatypeTest.java @@ -4,47 +4,42 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.AirbyteStateMessage; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import java.util.List; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) -public class CdcBinlogsMySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; +public class CdcBinlogsMySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - private DSLContext dslContext; private JsonNode stateAfterFirstSync; @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.close(); + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withCdcReplication() + .build(); + } + + @Override + protected Database setupDatabase() { + testdb = MySQLTestDatabase.in("mysql:8.0").withoutStrictMode().withCdcPermissions(); + return testdb.getDatabase(); } @Override @@ -57,11 +52,10 @@ protected List runRead(final ConfiguredAirbyteCatalog configured @Override protected void postSetup() throws Exception { - final Database database = setupDatabase(); - initTests(); + final var database = testdb.getDatabase(); for (final TestDataHolder test : testDataHolders) { database.query(ctx -> { - ctx.fetch(test.getCreateSqlQuery()); + ctx.execute("TRUNCATE TABLE " + test.getNameWithTestPrefix() + ";"); return null; }); } @@ -84,75 +78,6 @@ protected void postSetup() throws Exception { } } - @Override - protected Database setupDatabase() throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL); - final Database database = new Database(dslContext); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - revokeAllPermissions(); - grantCorrectPermissions(); - - return database; - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); - } - - private void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); - } - - private void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - @Override public boolean testCatalog() { return true; diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java index dad29d00d111..230f34ca13fe 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotMySqlSourceDatatypeTest.java @@ -4,108 +4,32 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; -@ExtendWith(SystemStubsExtension.class) public class CdcInitialSnapshotMySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - private DSLContext dslContext; - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - dslContext.close(); - container.close(); + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); } @Override - protected Database setupDatabase() throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("snapshot_mode", "initial_only") - .put("is_test", true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL); - final Database database = new Database(dslContext); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - revokeAllPermissions(); - grantCorrectPermissions(); - - return database; - } - - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withCdcReplication() + .with("snapshot_mode", "initial_only") + .build(); } - private void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); - } - - private void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + @Override + protected Database setupDatabase() { + testdb = MySQLTestDatabase.in("mysql:8.0").withoutStrictMode().withCdcPermissions(); + return testdb.getDatabase(); } @Override diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java index 5bd26ef502ec..1db8613696c5 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSourceAcceptanceTest.java @@ -4,26 +4,21 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; import static io.airbyte.protocol.models.v0.SyncMode.INCREMENTAL; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; @@ -37,25 +32,20 @@ import io.airbyte.protocol.models.v0.SyncMode; import java.util.List; import java.util.stream.Collectors; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class CdcMySqlSourceAcceptanceTest extends SourceAcceptanceTest { - @SystemStub - protected EnvironmentVariables environmentVariables; - protected static final String STREAM_NAME = "id_and_name"; protected static final String STREAM_NAME2 = "starships"; - protected MySQLContainer container; - protected JsonNode config; + + protected MySQLTestDatabase testdb; + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } @Override protected String getImageName() { @@ -69,7 +59,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withoutSsl() + .build(); } @Override @@ -80,7 +73,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSourceDefinedCursor(true) @@ -92,7 +85,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME2), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSourceDefinedCursor(true) @@ -107,70 +100,22 @@ protected JsonNode getState() { } @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); - - revokeAllPermissions(); - grantCorrectPermissions(); - createAndPopulateTables(); - } - - protected void createAndPopulateTables() { - executeQuery("CREATE TABLE id_and_name(id INTEGER PRIMARY KEY, name VARCHAR(200));"); - executeQuery( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - executeQuery("CREATE TABLE starships(id INTEGER PRIMARY KEY, name VARCHAR(200));"); - executeQuery( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - } - - protected void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); - } - - protected void grantCorrectPermissions() { - executeQuery( - "GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " - + container.getUsername() + "@'%';"); + protected void setupEnvironment(final TestDestinationEnv environment) { + testdb = MySQLTestDatabase.in("mysql:8.0", extraContainerFactoryMethods().toArray(String[]::new)) + .withCdcPermissions() + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); } - protected void executeQuery(final String query) { - try (final DSLContext dslContext = DSLContextFactory.create( - "root", - "test", - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - container.getDatabaseName()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - database.query( - ctx -> ctx - .execute(query)); - } catch (final Exception e) { - throw new RuntimeException(e); - } + protected Stream extraContainerFactoryMethods() { + return Stream.empty(); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + testdb.close(); } @Test @@ -195,7 +140,7 @@ public void testIncrementalSyncShouldNotFailIfBinlogIsDeleted() throws Exception final JsonNode latestState = Jsons.jsonNode(supportsPerStream() ? stateMessages : List.of(Iterables.getLast(stateMessages))); // RESET MASTER removes all binary log files that are listed in the index file, // leaving only a single, empty binary log file with a numeric suffix of .000001 - executeQuery("RESET MASTER;"); + testdb.with("RESET MASTER;"); assertEquals(6, filterRecords(runRead(configuredCatalog, latestState)).size()); } @@ -219,7 +164,7 @@ private ConfiguredAirbyteCatalog getConfiguredCatalogWithPartialColumns() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), Field.of("id", JsonSchemaType.NUMBER) /* no name field */) .withSourceDefinedCursor(true) @@ -231,7 +176,7 @@ private ConfiguredAirbyteCatalog getConfiguredCatalogWithPartialColumns() { .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( String.format("%s", STREAM_NAME2), - String.format("%s", config.get(JdbcUtils.DATABASE_KEY).asText()), + testdb.getDatabaseName(), /* no name field */ Field.of("id", JsonSchemaType.NUMBER)) .withSourceDefinedCursor(true) @@ -241,13 +186,8 @@ private ConfiguredAirbyteCatalog getConfiguredCatalogWithPartialColumns() { } private void verifyFieldNotExist(final List records, final String stream, final String field) { - assertTrue(records.stream() - .filter(r -> { - return r.getStream().equals(stream) - && r.getData().get(field) != null; - }) - .collect(Collectors.toList()) - .isEmpty(), "Records contain unselected columns [%s:%s]".formatted(stream, field)); + assertTrue(records.stream().noneMatch(r -> r.getStream().equals(stream) && r.getData().get(field) != null), + "Records contain unselected columns [%s:%s]".formatted(stream, field)); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java index 58f1d1f3939f..16cc3ec29ba4 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslCaCertificateSourceAcceptanceTest.java @@ -4,56 +4,27 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.MySqlUtils; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.testcontainers.containers.MySQLContainer; +import java.util.stream.Stream; public class CdcMySqlSslCaCertificateSourceAcceptanceTest extends CdcMySqlSourceAcceptanceTest { - private static MySqlUtils.Certificate certs; - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - certs = MySqlUtils.getCertificate(container, true); - - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", "Passw0rd") + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .build()) .build(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); + } - revokeAllPermissions(); - grantCorrectPermissions(); - createAndPopulateTables(); + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates"); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java index 5d8a02aef729..4f3691e8f9da 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcMySqlSslRequiredSourceAcceptanceTest.java @@ -4,53 +4,31 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import static io.airbyte.integrations.io.airbyte.integration_tests.sources.utils.TestConstants.INITIAL_CDC_WAITING_SECONDS; - import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.testcontainers.containers.MySQLContainer; +import java.util.stream.Stream; public class CdcMySqlSslRequiredSourceAcceptanceTest extends CdcMySqlSourceAcceptanceTest { @Override - protected void setupEnvironment(final TestDestinationEnv environment) { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - - final var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "required") + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withCdcReplication() + .withSsl(ImmutableMap.builder().put(JdbcUtils.MODE_KEY, "required").build()) .build(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_CDC_WAITING_SECONDS) - .build()); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .put("is_test", true) - .build()); + } - revokeAllPermissions(); - grantCorrectPermissions(); - alterUserRequireSsl(); - createAndPopulateTables(); + @Override + protected void setupEnvironment(final TestDestinationEnv environment) { + super.setupEnvironment(environment); + testdb.with("ALTER USER %s REQUIRE SSL;", testdb.getUserName()); } - private void alterUserRequireSsl() { - executeQuery("ALTER USER " + container.getUsername() + " REQUIRE SSL;"); + @Override + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates", "withClientCertificate"); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java index 043290f95536..07597d1ab27c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlDatatypeAccuracyTest.java @@ -5,38 +5,42 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.mysql.cj.MysqlType; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class MySqlDatatypeAccuracyTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withStandardReplication() + .build(); + } @Override - protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + protected Database setupDatabase() { + final var sharedContainer = new MySQLContainerFactory().shared("mysql:8.0"); + testdb = new MySQLTestDatabase(sharedContainer) + .withConnectionProperty("zeroDateTimeBehavior", "convertToNull") + .initialized() + .withoutStrictMode(); + return testdb.getDatabase(); } private final Map> charsetsCollationsMap = Map.of( @@ -46,42 +50,6 @@ protected void tearDown(final TestDestinationEnv testEnv) { "binary", Arrays.asList("binary"), "CP1250", Arrays.asList("CP1250_general_ci", "cp1250_czech_cs")); - @Override - protected Database setupDatabase() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .build()); - - final Database database = new Database( - DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL, - Map.of("zeroDateTimeBehavior", "convertToNull"))); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); - - return database; - } - @Override public boolean testCatalog() { return true; diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java index 11e0c0676e88..3ec7d4ab6740 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceAcceptanceTest.java @@ -5,18 +5,14 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -26,66 +22,36 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.util.HashMap; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; - -@ExtendWith(SystemStubsExtension.class) +import java.util.stream.Stream; + public class MySqlSourceAcceptanceTest extends SourceAcceptanceTest { - @SystemStub - public EnvironmentVariables environmentVariables; + protected MySQLTestDatabase testdb; + private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "public.starships"; - protected MySQLContainer container; - protected JsonNode config; - @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .build()); - - try (final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } + testdb = MySQLTestDatabase.in("mysql:8.0", extraContainerFactoryMethods().toArray(String[]::new)) + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); + } + + @Override + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); + } + + protected Stream extraContainerFactoryMethods() { + return Stream.empty(); } @Override protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + testdb.close(); } @Override @@ -100,7 +66,10 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withoutSsl() + .build(); } @Override @@ -111,7 +80,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))), @@ -120,7 +89,7 @@ protected ConfiguredAirbyteCatalog getConfiguredCatalog() { .withCursorField(Lists.newArrayList("id")) .withDestinationSyncMode(DestinationSyncMode.APPEND) .withStream(CatalogHelpers.createAirbyteStream( - String.format("%s.%s", config.get(JdbcUtils.DATABASE_KEY).asText(), STREAM_NAME2), + String.format("%s.%s", testdb.getDatabaseName(), STREAM_NAME2), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))))); diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java index 327b7e98af88..5b4f86eae403 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSourceDatatypeTest.java @@ -5,68 +5,35 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import java.util.Map; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; +import io.airbyte.integrations.source.mysql.MySQLContainerFactory; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; -@ExtendWith(SystemStubsExtension.class) public class MySqlSourceDatatypeTest extends AbstractMySqlSourceDatatypeTest { - @SystemStub - private EnvironmentVariables environmentVariables; - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - container.close(); + protected FeatureFlags featureFlags() { + return FeatureFlagsWrapper.overridingUseStreamCapableState(super.featureFlags(), true); } @Override - protected Database setupDatabase() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put("replication_method", replicationMethod) - .build()); - - final Database database = new Database( - DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format(DatabaseDriver.MYSQL.getUrlFormatString(), - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL, - Map.of("zeroDateTimeBehavior", "convertToNull"))); - - // It disable strict mode in the DB and allows to insert specific values. - // For example, it's possible to insert date with zero values "2021-00-00" - database.query(ctx -> ctx.fetch("SET @@sql_mode=''")); + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withStandardReplication() + .build(); + } - return database; + @Override + protected Database setupDatabase() { + final var sharedContainer = new MySQLContainerFactory().shared("mysql:8.0"); + testdb = new MySQLTestDatabase(sharedContainer) + .withConnectionProperty("zeroDateTimeBehavior", "convertToNull") + .initialized() + .withoutStrictMode(); + return testdb.getDatabase(); } @Override diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java index a22e7cfc6f9c..af217c88c7ea 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslCaCertificateSourceAcceptanceTest.java @@ -4,24 +4,29 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.MySqlUtils; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import java.io.IOException; +import java.util.stream.Stream; -public class MySqlSslCaCertificateSourceAcceptanceTest extends AbstractMySqlSslCertificateSourceAcceptanceTest { +public class MySqlSslCaCertificateSourceAcceptanceTest extends MySqlSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; @Override - public MySqlUtils.Certificate getCertificates() throws IOException, InterruptedException { - return MySqlUtils.getCertificate(container, false); + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates"); } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCaCertificate()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java index af656c30c575..efccbe3702aa 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslFullCertificateSourceAcceptanceTest.java @@ -4,26 +4,31 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; +import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.MySqlUtils; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import java.io.IOException; +import java.util.stream.Stream; -public class MySqlSslFullCertificateSourceAcceptanceTest extends AbstractMySqlSslCertificateSourceAcceptanceTest { +public class MySqlSslFullCertificateSourceAcceptanceTest extends MySqlSourceAcceptanceTest { + + private static final String PASSWORD = "Passw0rd"; @Override - public MySqlUtils.Certificate getCertificates() throws IOException, InterruptedException { - return MySqlUtils.getCertificate(container, true); + protected Stream extraContainerFactoryMethods() { + return Stream.of("withRootAndServerCertificates", "withClientCertificate"); } @Override - public ImmutableMap getSslConfig() { - return ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "verify_ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put(JdbcUtils.MODE_KEY, "verify_ca") + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) + .put("client_key_password", PASSWORD) + .build()) .build(); } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java index 21dd1aa40fd1..5f46e43808e4 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/MySqlSslSourceAcceptanceTest.java @@ -6,65 +6,16 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.commons.features.EnvVariableFeatureFlags; -import io.airbyte.commons.json.Jsons; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.testcontainers.containers.MySQLContainer; public class MySqlSslSourceAcceptanceTest extends MySqlSourceAcceptanceTest { @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "STANDARD") - .build()); - - var sslMode = ImmutableMap.builder() - .put(JdbcUtils.MODE_KEY, "required") + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder().put(JdbcUtils.MODE_KEY, "required").build()) .build(); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) - .put(JdbcUtils.DATABASE_KEY, container.getDatabaseName()) - .put(JdbcUtils.USERNAME_KEY, container.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, container.getPassword()) - .put(JdbcUtils.SSL_KEY, true) - .put(JdbcUtils.SSL_MODE_KEY, sslMode) - .put("replication_method", replicationMethod) - .build()); - - try (final DSLContext dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s/%s", - container.getHost(), - container.getFirstMappedPort(), - config.get(JdbcUtils.DATABASE_KEY).asText()), - SQLDialect.MYSQL)) { - final Database database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch( - "INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - return null; - }); - } } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java index 63e2a9b56ed5..7d5f060f34c2 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshKeyMySqlSourceAcceptanceTest.java @@ -4,26 +4,10 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.commons.features.EnvVariableFeatureFlags; import java.nio.file.Path; -import org.junit.jupiter.api.extension.ExtendWith; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class SshKeyMySqlSourceAcceptanceTest extends AbstractSshMySqlSourceAcceptanceTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - super.setupEnvironment(environment); - } - @Override public Path getConfigFilePath() { return Path.of("secrets/ssh-key-repl-config.json"); diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java index e49ea61e457e..1211c8269894 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/SshPasswordMySqlSourceAcceptanceTest.java @@ -10,32 +10,14 @@ import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.commons.exceptions.ConfigErrorException; -import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.integrations.source.mysql.MySQLTestDatabase; import io.airbyte.integrations.source.mysql.MySqlSource; import java.nio.file.Path; -import java.util.List; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import org.testcontainers.containers.Network; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) public class SshPasswordMySqlSourceAcceptanceTest extends AbstractSshMySqlSourceAcceptanceTest { - @SystemStub - private EnvironmentVariables environmentVariables; - - @Override - protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - super.setupEnvironment(environment); - } - @Override public Path getConfigFilePath() { return Path.of("secrets/ssh-pwd-repl-config.json"); @@ -43,30 +25,23 @@ public Path getConfigFilePath() { @Test public void sshTimeoutExceptionMarkAsConfigErrorTest() throws Exception { - final SshBastionContainer bastion = new SshBastionContainer(); - final Network network = Network.newNetwork(); - // set up env - final MySQLContainer db = startTestContainers(bastion, network); - config = bastion.getTunnelConfig(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, bastion.getBasicDbConfigBuider(db, List.of("public")), true); - bastion.stopAndClose(); - final Source sshWrappedSource = MySqlSource.sshWrappedSource(); - final Exception exception = assertThrows(ConfigErrorException.class, () -> sshWrappedSource.discover(config)); - - final String expectedMessage = "Timed out while opening a SSH Tunnel. Please double check the given SSH configurations and try again."; - final String actualMessage = exception.getMessage(); - - assertTrue(actualMessage.contains(expectedMessage)); - } - - private MySQLContainer startTestContainers(final SshBastionContainer bastion, final Network network) { - bastion.initAndStartBastion(network); - return initAndStartJdbcContainer(network); - } - - private MySQLContainer initAndStartJdbcContainer(final Network network) { - final MySQLContainer db = new MySQLContainer<>("mysql:8.0").withNetwork(network); - db.start(); - return db; + try (final var testdb = MySQLTestDatabase.in("mysql:8.0", "withNetwork")) { + final SshBastionContainer bastion = new SshBastionContainer(); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); + final var config = testdb.integrationTestConfigBuilder() + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, true)) + .build(); + bastion.stopAndClose(); + + final Source sshWrappedSource = MySqlSource.sshWrappedSource(new MySqlSource()); + final Exception exception = assertThrows(ConfigErrorException.class, () -> sshWrappedSource.discover(config)); + + final String expectedMessage = + "Timed out while opening a SSH Tunnel. Please double check the given SSH configurations and try again."; + final String actualMessage = exception.getMessage(); + assertTrue(actualMessage.contains(expectedMessage)); + } } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java b/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java deleted file mode 100644 index 669e7e9144d0..000000000000 --- a/airbyte-integrations/connectors/source-mysql/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/utils/TestConstants.java +++ /dev/null @@ -1,11 +0,0 @@ -/* - * Copyright (c) 2023 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.integrations.io.airbyte.integration_tests.sources.utils; - -public class TestConstants { - - public static final int INITIAL_CDC_WAITING_SECONDS = 10; - -} diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java index 293ec4d8f1ca..6bd939fd9e3c 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java @@ -13,7 +13,6 @@ import static io.airbyte.integrations.source.mysql.MySqlSource.CDC_DEFAULT_CURSOR; import static io.airbyte.integrations.source.mysql.MySqlSource.CDC_LOG_FILE; import static io.airbyte.integrations.source.mysql.MySqlSource.CDC_LOG_POS; -import static io.airbyte.integrations.source.mysql.MySqlSource.DRIVER_CLASS; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.PRIMARY_KEY_STATE_TYPE; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.STATE_TYPE_KEY; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -30,16 +29,13 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Streams; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.debezium.CdcSourceTest; import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage; import io.airbyte.cdk.integrations.debezium.internals.mysql.MySqlCdcTargetPosition; import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.AutoCloseableIterator; import io.airbyte.commons.util.AutoCloseableIterators; @@ -59,127 +55,68 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.Properties; import java.util.Random; import java.util.Set; import java.util.stream.Collectors; -import javax.sql.DataSource; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Tag; -import org.junit.jupiter.api.Tags; +import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.Timeout; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) -public class CdcMysqlSourceTest extends CdcSourceTest { +@Order(1) +public class CdcMysqlSourceTest extends CdcSourceTest { - private static final String START_DB_CONTAINER_WITH_INVALID_TIMEZONE = "START-DB-CONTAINER-WITH-INVALID-TIMEZONE"; private static final String INVALID_TIMEZONE_CEST = "CEST"; - @SystemStub - private EnvironmentVariables environmentVariables; - - private static final String DB_NAME = MODELS_SCHEMA; - private MySQLContainer container; - private Database database; - private MySqlSource source; - private JsonNode config; private static final Random RANDOM = new Random(); - @BeforeEach - public void setup(final TestInfo testInfo) throws SQLException { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - init(testInfo); - revokeAllPermissions(); - grantCorrectPermissions(); - super.setup(); + @Override + protected MySQLTestDatabase createTestDatabase() { + return MySQLTestDatabase.in("mysql:8.0", "withInvalidTimezoneCEST").withCdcPermissions(); } - private void init(final TestInfo testInfo) { - container = new MySQLContainer<>("mysql:8.0"); - if (testInfo.getTags().contains(START_DB_CONTAINER_WITH_INVALID_TIMEZONE)) { - container.withEnv(Map.of("TZ", INVALID_TIMEZONE_CEST)); - } - container.start(); - source = new MySqlSource(); - database = new Database(DSLContextFactory.create( - "root", - "test", - DRIVER_CLASS, - String.format("jdbc:mysql://%s:%s", - container.getHost(), - container.getFirstMappedPort()), - SQLDialect.MYSQL)); - - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .put("server_time_zone", "America/Los_Angeles") - .build()); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put("host", container.getHost()) - .put("port", container.getFirstMappedPort()) - .put("database", DB_NAME) - .put("username", container.getUsername()) - .put("password", container.getPassword()) - .put("replication_method", replicationMethod) - .put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) - .put("is_test", true) - .build()); + @Override + protected MySqlSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } - private void revokeAllPermissions() { - executeQuery("REVOKE ALL PRIVILEGES, GRANT OPTION FROM " + container.getUsername() + "@'%';"); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .withCdcReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); } - private void revokeReplicationClientPermission() { - executeQuery("REVOKE REPLICATION CLIENT ON *.* FROM " + container.getUsername() + "@'%';"); + protected void purgeAllBinaryLogs() { + testdb.with("RESET MASTER;"); } - private void grantCorrectPermissions() { - executeQuery("GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO " + container.getUsername() + "@'%';"); + @Override + protected String createSchemaSqlFmt() { + return "CREATE DATABASE IF NOT EXISTS %s;"; } - protected void purgeAllBinaryLogs() { - executeQuery("RESET MASTER;"); + @Override + protected String modelsSchema() { + return testdb.getDatabaseName(); } - @AfterEach - public void tearDown() { - try { - container.close(); - } catch (final Exception e) { - throw new RuntimeException(e); - } + @Override + protected String randomSchema() { + return testdb.getDatabaseName(); } @Override protected MySqlCdcTargetPosition cdcLatestTargetPosition() { - final DataSource dataSource = DataSourceFactory.create( - "root", - "test", - DRIVER_CLASS, - String.format("jdbc:mysql://%s:%s", - container.getHost(), - container.getFirstMappedPort()), - Collections.emptyMap()); - return MySqlCdcTargetPosition.targetPosition(new DefaultJdbcDatabase(dataSource)); + return MySqlCdcTargetPosition.targetPosition(new DefaultJdbcDatabase(testdb.getDataSource())); } @Override @@ -240,30 +177,10 @@ protected void addCdcDefaultCursorField(final AirbyteStream stream) { } } - @Override - protected Source getSource() { - return source; - } - - @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected Database getDatabase() { - return database; - } - - @Override - protected String randomTableSchema() { - return MODELS_SCHEMA; - } - @Test protected void syncWithReplicationClientPrivilegeRevokedFailsCheck() throws Exception { - revokeReplicationClientPermission(); - final AirbyteConnectionStatus status = getSource().check(getConfig()); + testdb.with("REVOKE REPLICATION CLIENT ON *.* FROM %s@'%%';", testdb.getUserName()); + final AirbyteConnectionStatus status = source().check(config()); final String expectedErrorMessage = "Please grant REPLICATION CLIENT privilege, so that binary log files are available" + " for CDC mode."; assertTrue(status.getStatus().equals(Status.FAILED)); @@ -283,8 +200,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -314,8 +231,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { purgeAllBinaryLogs(); final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -338,10 +255,10 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { @Test protected void verifyCheckpointStatesByRecords() throws Exception { // We require a huge amount of records, otherwise Debezium will notify directly the last offset. - final int recordsToCreate = 20000; + final int recordsToCreate = 20_000; - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateMessages = extractStateMessages(dataFromFirstBatch); @@ -351,16 +268,14 @@ protected void verifyCheckpointStatesByRecords() throws Exception { assertExpectedStateMessages(stateMessages); for (int recordsCreated = 0; recordsCreated < recordsToCreate; recordsCreated++) { - final JsonNode record = - Jsons.jsonNode(ImmutableMap - .of(COL_ID, 200 + recordsCreated, COL_MAKE_ID, 1, COL_MODEL, - "F-" + recordsCreated)); + final JsonNode record = Jsons.jsonNode(ImmutableMap + .of(COL_ID, 200 + recordsCreated, COL_MAKE_ID, 1, COL_MODEL, "F-" + recordsCreated)); writeModelRecord(record); } final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateMessages.get(stateMessages.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); assertEquals(recordsToCreate, extractRecordMessages(dataFromSecondBatch).size()); @@ -449,14 +364,14 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); - if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomTableSchema()))) { + if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomSchema()))) { assertEquals(PRIMARY_KEY_STATE_TYPE, streamState.get(STATE_TYPE_KEY).asText()); - } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))) { + } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(testdb.getDatabaseName()))) { assertFalse(streamState.has(STATE_TYPE_KEY)); } else { throw new RuntimeException("Unknown stream"); @@ -474,8 +389,8 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); assertFalse(streamState.has(STATE_TYPE_KEY)); @@ -492,17 +407,16 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List read = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator read = source() + .read(config(), getConfiguredCatalog(), null); final List actualRecords = AutoCloseableIterators.toListAndClose(read); @@ -526,12 +440,12 @@ public void syncWouldWorkWithDBWithInvalidTimezone() throws Exception { @Test public void testCompositeIndexInitialLoad() throws Exception { // Simulate adding a composite index by modifying the catalog. - final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(CONFIGURED_CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); final List> primaryKeys = configuredCatalog.getStreams().get(0).getStream().getSourceDefinedPrimaryKey(); primaryKeys.add(List.of("make_id")); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); @@ -546,8 +460,8 @@ public void testCompositeIndexInitialLoad() throws Exception { // load, and // the last one indicating the cdc position we have synced until. final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateMessages1.get(4))); - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, state); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, state); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final Set recordMessages2 = extractRecordMessages(actualRecords2); @@ -561,7 +475,7 @@ public void testCompositeIndexInitialLoad() throws Exception { @Test public void testTwoStreamSync() throws Exception { // Add another stream models_2 and read that one as well. - final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(CONFIGURED_CATALOG); + final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog()); final List MODEL_RECORDS_2 = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), @@ -571,18 +485,18 @@ public void testTwoStreamSync() throws Exception { Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", + testdb.with(createTableSqlFmt(), testdb.getDatabaseName(), MODELS_STREAM_NAME + "_2", columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS_2) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", COL_ID, + writeRecords(recordJson, testdb.getDatabaseName(), MODELS_STREAM_NAME + "_2", COL_ID, COL_MAKE_ID, COL_MODEL); } final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() .withStream(CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + testdb.getDatabaseName(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)) @@ -595,8 +509,8 @@ public void testTwoStreamSync() throws Exception { streams.add(airbyteStream); configuredCatalog.withStreams(streams); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source() + .read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final Set recordMessages1 = extractRecordMessages(actualRecords1); @@ -658,13 +572,13 @@ public void testTwoStreamSync() throws Exception { recordMessages1, names, names, - MODELS_SCHEMA); + testdb.getDatabaseName()); - assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA), firstStreamInState); + assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(testdb.getDatabaseName()), firstStreamInState); // Triggering a sync with a primary_key state for 1 stream and complete state for other stream - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List stateMessages2 = extractStateMessages(actualRecords2); @@ -701,7 +615,7 @@ public void testTwoStreamSync() throws Exception { recordMessages2, names, names, - MODELS_SCHEMA); + testdb.getDatabaseName()); } /** @@ -714,8 +628,8 @@ public void testTwoStreamSync() throws Exception { @Test public void testCompressedSchemaHistory() throws Exception { createTablesToIncreaseSchemaHistorySize(); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final AirbyteStateMessage lastStateMessageFromFirstBatch = Iterables.getLast(extractStateMessages(dataFromFirstBatch)); @@ -737,8 +651,8 @@ public void testCompressedSchemaHistory() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, Jsons.jsonNode(Collections.singletonList(lastStateMessageFromFirstBatch))); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), Jsons.jsonNode(Collections.singletonList(lastStateMessageFromFirstBatch))); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); final AirbyteStateMessage lastStateMessageFromSecondBatch = Iterables.getLast(extractStateMessages(dataFromSecondBatch)); @@ -758,7 +672,7 @@ public void testCompressedSchemaHistory() throws Exception { private void createTablesToIncreaseSchemaHistorySize() { for (int i = 0; i <= 200; i++) { final String tableName = generateRandomStringOf32Characters(); - final StringBuilder createTableQuery = new StringBuilder("CREATE TABLE models_schema." + tableName + "("); + final StringBuilder createTableQuery = new StringBuilder("CREATE TABLE " + tableName + "("); String firstCol = null; for (int j = 1; j <= 250; j++) { final String columnName = generateRandomStringOf32Characters(); @@ -769,7 +683,7 @@ private void createTablesToIncreaseSchemaHistorySize() { createTableQuery.append(columnName).append(" INTEGER, "); } createTableQuery.append("PRIMARY KEY (").append(firstCol).append("));"); - executeQuery(createTableQuery.toString()); + testdb.with(createTableQuery.toString()); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java index ff2648974915..874c8293924a 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java @@ -8,6 +8,7 @@ * Copyright (c) 2023 Airbyte, Inc., all rights reserved. */ +import static io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants.SYNC_CHECKPOINT_RECORDS_PROPERTY; import static io.airbyte.integrations.source.mysql.initialsync.MySqlInitialLoadStateManager.STATE_TYPE_KEY; import static java.util.stream.Collectors.toList; import static org.assertj.core.api.Assertions.assertThat; @@ -19,18 +20,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.mysql.cj.MysqlType; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.source.mysql.internal.models.CursorBasedStatus; import io.airbyte.integrations.source.mysql.internal.models.InternalModels.StateType; @@ -52,187 +48,102 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.Connection; -import java.sql.DriverManager; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.concurrent.Callable; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.jooq.DSLContext; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.testcontainers.containers.MySQLContainer; -import uk.org.webcompere.systemstubs.environment.EnvironmentVariables; -import uk.org.webcompere.systemstubs.jupiter.SystemStub; -import uk.org.webcompere.systemstubs.jupiter.SystemStubsExtension; -@ExtendWith(SystemStubsExtension.class) -class MySqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { - - @SystemStub - private EnvironmentVariables environmentVariables; +@Order(2) +class MySqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; protected static final String PASSWORD_WITHOUT_PERMISSION = "new_password"; - protected static final String TEST_USER = "test"; - protected static final Callable TEST_PASSWORD = () -> "test"; - protected static MySQLContainer container; - - protected Database database; - protected DSLContext dslContext; - - @BeforeAll - static void init() throws Exception { - container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD.call()) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD.call()); - container.start(); - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD.call()); - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - } - @BeforeEach - public void setup() throws Exception { - environmentVariables.set(EnvVariableFeatureFlags.USE_STREAM_CAPABLE_STATE, "true"); - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, Strings.addRandomSuffix("db", "_", 10)) - .put(JdbcUtils.USERNAME_KEY, TEST_USER) - .put(JdbcUtils.PASSWORD_KEY, TEST_PASSWORD.call()) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asText()), - SQLDialect.MYSQL); - database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE DATABASE " + getDefaultNamespace()); - return null; - }); - - super.setup(); - } - - @AfterEach - void tearDownMySql() throws Exception { - dslContext.close(); - super.tearDown(); - } - - @AfterAll - static void cleanUp() { - container.close(); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder().build(); } - // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public boolean supportsSchemas() { - return false; + protected MySqlSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; } @Override - public AbstractJdbcSource getJdbcSource() { - return new MySqlSource(); + protected MySQLTestDatabase createTestDatabase() { + return MySQLTestDatabase.in("mysql:8.0"); } @Override - public String getDriverClass() { - return MySqlSource.DRIVER_CLASS; + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { + ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1000"); } + // MySql does not support schemas in the way most dbs do. Instead we namespace by db name. @Override - public JsonNode getConfig() { - return Jsons.clone(config); + protected boolean supportsSchemas() { + return false; } @Test void testReadMultipleTablesIncrementally() throws Exception { - ((ObjectNode) config).put("sync_checkpoint_records", 1); - final String namespace = getDefaultNamespace(); + final var config = config(); + ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1); final String streamOneName = TABLE_NAME + "one"; // Create a fresh first table - database.query(connection -> { - connection.fetch(String.format("USE %s;", getDefaultNamespace())); - connection.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at VARCHAR(200) NOT NULL\n" - + ");", streamOneName)); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", - getFullyQualifiedTableName(streamOneName))); - connection.execute( - String.format( - "INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - return null; - }); + testdb.with("CREATE TABLE %s (\n" + + " id int PRIMARY KEY,\n" + + " name VARCHAR(200) NOT NULL,\n" + + " updated_at VARCHAR(200) NOT NULL\n" + + ");", streamOneName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (1,'picard', '2004-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (2, 'crusher', '2005-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (3, 'vash', '2006-10-19')", + getFullyQualifiedTableName(streamOneName)); // Create a fresh second table final String streamTwoName = TABLE_NAME + "two"; final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); // Insert records into second table - database.query(ctx -> { - ctx.fetch(String.format("CREATE TABLE %s (\n" - + " id int PRIMARY KEY,\n" - + " name VARCHAR(200) NOT NULL,\n" - + " updated_at DATE NOT NULL\n" - + ");", streamTwoName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (40,'Jean Luc','2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (41, 'Groot', '2006-10-19')", - streamTwoFullyQualifiedName)); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (42, 'Thanos','2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with("CREATE TABLE %s (\n" + + " id int PRIMARY KEY,\n" + + " name VARCHAR(200) NOT NULL,\n" + + " updated_at DATE NOT NULL\n" + + ");", streamTwoName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (40,'Jean Luc','2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (41, 'Groot', '2006-10-19')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at) VALUES (42, 'Thanos','2006-10-19')", + streamTwoFullyQualifiedName); + // Create records list that we expect to see in the state message final List streamTwoExpectedRecords = Arrays.asList( - createRecord(streamTwoName, namespace, ImmutableMap.of( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( COL_ID, 40, COL_NAME, "Jean Luc", COL_UPDATED_AT, "2006-10-19")), - createRecord(streamTwoName, namespace, ImmutableMap.of( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( COL_ID, 41, COL_NAME, "Groot", COL_UPDATED_AT, "2006-10-19")), - createRecord(streamTwoName, namespace, ImmutableMap.of( + createRecord(streamTwoName, getDefaultNamespace(), ImmutableMap.of( COL_ID, 42, COL_NAME, "Thanos", COL_UPDATED_AT, "2006-10-19"))); // Prep and create a configured catalog to perform sync - final AirbyteStream streamOne = getAirbyteStream(streamOneName, namespace); - final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, namespace); + final AirbyteStream streamOne = getAirbyteStream(streamOneName, getDefaultNamespace()); + final AirbyteStream streamTwo = getAirbyteStream(streamTwoName, getDefaultNamespace()); final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog( new AirbyteCatalog().withStreams(List.of(streamOne, streamTwo))); @@ -245,7 +156,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Perform initial sync final List messagesFromFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, null)); + .toList(source().read(config, configuredCatalog, null)); final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); @@ -312,7 +223,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // - stream two state being the Primary Key state before the final emitted state before the cursor // switch final List messagesFromSecondSyncWithMixedStates = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); @@ -339,21 +250,13 @@ void testReadMultipleTablesIncrementally() throws Exception { // Add some data to each table and perform a third read. // Expect to see all records be synced via cursorBased method and not primaryKey - - database.query(ctx -> { - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (4,'Hooper','2006-10-19')", - getFullyQualifiedTableName(streamOneName))); - ctx.execute( - String.format("INSERT INTO %s(id, name, updated_at)" - + "VALUES (43, 'Iron Man', '2006-10-19')", - streamTwoFullyQualifiedName)); - return null; - }); + testdb.with("INSERT INTO %s(id, name, updated_at) VALUES (4,'Hooper','2006-10-19')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at) VALUES (43, 'Iron Man', '2006-10-19')", + streamTwoFullyQualifiedName); final List messagesFromThirdSync = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); @@ -386,7 +289,7 @@ void testReadMultipleTablesIncrementally() throws Exception { @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); assertEquals(expected, actual); @@ -402,16 +305,20 @@ void testSpec() throws Exception { */ @Test void testCheckIncorrectPasswordFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08001;")); + assertTrue(status.getMessage().contains("State code: 08001;"), status.getMessage()); } @Test public void testCheckIncorrectUsernameFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); // do not test for message since there seems to be flakiness where sometimes the test will get the // message with @@ -420,38 +327,45 @@ public void testCheckIncorrectUsernameFailure() throws Exception { @Test public void testCheckIncorrectHostFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectPortFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "0000"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08S01;")); + assertTrue(status.getMessage().contains("State code: 08S01;"), status.getMessage()); } @Test public void testCheckIncorrectDataBaseFailure() throws Exception { + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 42000; Error code: 1049;")); + assertTrue(status.getMessage().contains("State code: 42000; Error code: 1049;"), status.getMessage()); } @Test public void testUserHasNoPermissionToDataBase() throws Exception { - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD.call()); - connection.createStatement() - .execute("create user '" + USERNAME_WITHOUT_PERMISSION + "'@'%' IDENTIFIED BY '" + PASSWORD_WITHOUT_PERMISSION + "';\n"); - ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, USERNAME_WITHOUT_PERMISSION); + final var config = config(); + maybeSetShorterConnectionTimeout(config); + final String usernameWithoutPermission = testdb.withNamespace(USERNAME_WITHOUT_PERMISSION); + testdb.with("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", usernameWithoutPermission, PASSWORD_WITHOUT_PERMISSION); + ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, usernameWithoutPermission); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, PASSWORD_WITHOUT_PERMISSION); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); - assertTrue(status.getMessage().contains("State code: 08001;")); + assertTrue(status.getMessage().contains("State code: 08001;"), status.getMessage()); } @Override @@ -470,13 +384,13 @@ protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configure protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", COL_UPDATED_AT, "2006-10-19"))))); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_5, COL_NAME, "data", @@ -484,7 +398,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String final DbStreamState state = new CursorBasedStatus() .withStateType(StateType.CURSOR_BASED) .withVersion(2L) - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5") @@ -501,7 +415,7 @@ protected boolean supportsPerStream() { @Override protected List getTestMessages() { - return getTestMessages(streamName); + return getTestMessages(streamName()); } protected List getTestMessages(final String streamName) { diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java index aac3f2efbd71..f9c8f288410f 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceOperationsTest.java @@ -4,15 +4,12 @@ package io.airbyte.integrations.source.mysql; -import static io.airbyte.integrations.source.mysql.MySqlSource.DRIVER_CLASS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsInAnyOrder; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.mysql.cj.MysqlType; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; import io.airbyte.cdk.db.jdbc.DateTimeConverter; import io.airbyte.commons.json.Jsons; import java.sql.Connection; @@ -27,280 +24,107 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; +import java.util.function.Function; +import java.util.function.IntFunction; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.MySQLContainer; public class MySqlSourceOperationsTest { - private final MySqlSourceOperations sqlSourceOperations = new MySqlSourceOperations(); - private MySQLContainer container; - private Database database; - - @BeforeEach - public void init() { - container = new MySQLContainer<>("mysql:8.0"); - container.start(); - database = new Database(DSLContextFactory.create( - "root", - "test", - DRIVER_CLASS, - String.format("jdbc:mysql://%s:%s", - container.getHost(), - container.getFirstMappedPort()), - SQLDialect.MYSQL)); - } - - @AfterEach - public void tearDown() { - try { - container.close(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - @Test public void dateColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_date"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " DATE);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final LocalDate cursorValue = LocalDate.of(2019, 1, i); - jsonNode.put("cursor_column", DateTimeConverter.convertToDate(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + cursorValue + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATE, DateTimeConverter.convertToDate(LocalDate.of(2019, 1, 1))); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); - - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATE, "2019-01-01T00:00:00Z"); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); + testImpl( + "DATE", + i -> LocalDate.of(2019, 1, i), + DateTimeConverter::convertToDate, + LocalDate::toString, + MysqlType.DATE, + DateTimeConverter.convertToDate(LocalDate.of(2019, 1, 1)), + "2019-01-01T00:00:00Z"); } @Test public void timeColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_time"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " TIME);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final LocalTime cursorValue = LocalTime.of(20, i, 0); - jsonNode.put("cursor_column", DateTimeConverter.convertToTime(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + cursorValue + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIME, DateTimeConverter.convertToTime(LocalTime.of(20, 1, 0))); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); - - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIME, "1970-01-01T20:01:00Z"); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } + testImpl( + "TIME", + i -> LocalTime.of(20, i, 0), + DateTimeConverter::convertToTime, + LocalTime::toString, + MysqlType.TIME, + DateTimeConverter.convertToTime(LocalTime.of(20, 1, 0)), + "1970-01-01T20:01:00Z"); } @Test public void dateTimeColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_datetime"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " DATETIME);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final LocalDateTime cursorValue = LocalDateTime.of(2019, i, 20, 3, 0, 0); - jsonNode.put("cursor_column", DateTimeConverter.convertToTimestamp(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + cursorValue + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATETIME, - DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0))); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); - - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.DATETIME, "2019-01-20T03:00:00.000000"); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); - } - } - } - assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); + testImpl( + "DATETIME", + i -> LocalDateTime.of(2019, i, 20, 3, 0, 0), + DateTimeConverter::convertToTimestamp, + LocalDateTime::toString, + MysqlType.DATETIME, + DateTimeConverter.convertToTimestamp(LocalDateTime.of(2019, 1, 20, 3, 0, 0)), + "2019-01-20T03:00:00.000000"); } @Test public void timestampColumnAsCursor() throws SQLException { - final String tableName = container.getDatabaseName() + ".table_with_timestamp"; - final String cursorColumn = "cursor_column"; - executeQuery("CREATE TABLE " + tableName + "(id INTEGER PRIMARY KEY, " + cursorColumn + " timestamp);"); - - final List expectedRecords = new ArrayList<>(); - for (int i = 1; i <= 4; i++) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - jsonNode.put("id", i); - final Instant cursorValue = Instant.ofEpochSecond(1660298508L).plusSeconds(i - 1); - jsonNode.put("cursor_column", DateTimeConverter.convertToTimestampWithTimezone(cursorValue)); - executeQuery("INSERT INTO " + tableName + " VALUES (" + i + ", '" + Timestamp.from(cursorValue) + "');"); - if (i >= 2) { - expectedRecords.add(jsonNode); - } - } - - final List actualRecords = new ArrayList<>(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIMESTAMP, - DateTimeConverter.convertToTimestampWithTimezone(Instant.ofEpochSecond(1660298508L))); + testImpl( + "TIMESTAMP", + i -> Instant.ofEpochSecond(1660298508L).plusSeconds(i - 1), + DateTimeConverter::convertToTimestampWithTimezone, + r -> Timestamp.from(r).toString(), + MysqlType.TIMESTAMP, + DateTimeConverter.convertToTimestampWithTimezone(Instant.ofEpochSecond(1660298508L)), + Instant.ofEpochSecond(1660298508L).toString()); + } - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); - } - actualRecords.add(jsonNode); + private void testImpl( + final String sqlType, + IntFunction recordBuilder, + Function airbyteRecordStringifier, + Function sqlRecordStringifier, + MysqlType mysqlType, + String initialCursorFieldValue, + // Test to check backward compatibility for connectors created before PR + // https://github.com/airbytehq/airbyte/pull/15504 + String backwardCompatibleInitialCursorFieldValue) + throws SQLException { + final var sqlSourceOperations = new MySqlSourceOperations(); + final String cursorColumn = "cursor_column"; + try (final var testdb = MySQLTestDatabase.in("mysql:8.0") + .with("CREATE TABLE cursor_table (id INTEGER PRIMARY KEY, %s %s);", cursorColumn, sqlType)) { + + final List expectedRecords = new ArrayList<>(); + for (int i = 1; i <= 4; i++) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + jsonNode.put("id", i); + final T cursorValue = recordBuilder.apply(i); + jsonNode.put("cursor_column", airbyteRecordStringifier.apply(cursorValue)); + testdb.with("INSERT INTO cursor_table VALUES (%d, '%s');", i, sqlRecordStringifier.apply(cursorValue)); + if (i >= 2) { + expectedRecords.add(jsonNode); } } - } - - Assertions.assertEquals(3, actualRecords.size()); - // Test to check backward compatibility for connectors created before PR - // https://github.com/airbytehq/airbyte/pull/15504 - actualRecords.clear(); - try (final Connection connection = container.createConnection("")) { - final PreparedStatement preparedStatement = connection.prepareStatement( - "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); - sqlSourceOperations.setCursorField(preparedStatement, 1, MysqlType.TIMESTAMP, Instant.ofEpochSecond(1660298508L).toString()); - - try (final ResultSet resultSet = preparedStatement.executeQuery()) { - while (resultSet.next()) { - final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); - for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { - sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); + try (final Connection connection = testdb.getContainer().createConnection("")) { + final PreparedStatement preparedStatement = connection.prepareStatement( + "SELECT * FROM " + testdb.getDatabaseName() + ".cursor_table WHERE " + cursorColumn + " > ?"); + for (final var initialValue : List.of(initialCursorFieldValue, backwardCompatibleInitialCursorFieldValue)) { + sqlSourceOperations.setCursorField(preparedStatement, 1, mysqlType, initialValue); + final List actualRecords = new ArrayList<>(); + try (final ResultSet resultSet = preparedStatement.executeQuery()) { + while (resultSet.next()) { + final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap()); + for (int i = 1; i <= resultSet.getMetaData().getColumnCount(); i++) { + sqlSourceOperations.copyToJsonField(resultSet, i, jsonNode); + } + actualRecords.add(jsonNode); + } } - actualRecords.add(jsonNode); + assertThat(actualRecords, containsInAnyOrder(expectedRecords.toArray())); } } } - Assertions.assertEquals(3, actualRecords.size()); - } - - protected void executeQuery(final String query) { - try { - database.query( - ctx -> ctx - .execute(query)); - } catch (final SQLException e) { - throw new RuntimeException(e); - } } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java index bcba45ab727c..747a66a8dd63 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java @@ -17,8 +17,9 @@ import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource.PrimaryKeyAttributesFromDb; import io.airbyte.commons.exceptions.ConfigErrorException; +import io.airbyte.commons.features.EnvVariableFeatureFlags; +import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; @@ -28,71 +29,36 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Properties; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.testcontainers.containers.MySQLContainer; -import org.testcontainers.containers.output.Slf4jLogConsumer; public class MySqlSourceTests { - private static final Logger LOGGER = LoggerFactory.getLogger(MySqlSourceTests.class); - - private static final String TEST_USER = "test"; - private static final String TEST_PASSWORD = "test"; + public MySqlSource source() { + final var source = new MySqlSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; + } @Test public void testSettingTimezones() throws Exception { - // start DB - try (final MySQLContainer container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD) - .withEnv("TZ", "Europe/Moscow") - .withLogConsumer(new Slf4jLogConsumer(LOGGER))) { - - container.start(); - - final Properties properties = new Properties(); - properties.putAll(ImmutableMap.of("user", "root", JdbcUtils.PASSWORD_KEY, TEST_PASSWORD, "serverTimezone", "Europe/Moscow")); - DriverManager.getConnection(container.getJdbcUrl(), properties); - final String dbName = Strings.addRandomSuffix("db", "_", 10); - final JsonNode config = getConfig(container, dbName, "serverTimezone=Europe/Moscow"); - - try (final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), properties)) { - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - } - final AirbyteConnectionStatus check = new MySqlSource().check(config); - assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, check.getStatus()); + try (final var testdb = MySQLTestDatabase.in("mysql:8.0", "withMoscowTimezone")) { + final var config = testdb.testConfigBuilder() + .with(JdbcUtils.JDBC_URL_PARAMS_KEY, "serverTimezone=Europe/Moscow") + .withoutSsl() + .build(); + final AirbyteConnectionStatus check = source().check(config); + assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, check.getStatus(), check.getMessage()); } } - private static JsonNode getConfig(final MySQLContainer dbContainer, final String dbName, final String jdbcParams) { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, dbContainer.getHost()) - .put(JdbcUtils.PORT_KEY, dbContainer.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.USERNAME_KEY, TEST_USER) - .put(JdbcUtils.PASSWORD_KEY, TEST_PASSWORD) - .put(JdbcUtils.JDBC_URL_PARAMS_KEY, jdbcParams) - .build()); - } - @Test void testJdbcUrlWithEscapedDatabaseName() { - final JsonNode jdbcConfig = new MySqlSource().toDatabaseConfig(buildConfigEscapingNeeded()); + final JsonNode jdbcConfig = source().toDatabaseConfig(buildConfigEscapingNeeded()); assertNotNull(jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText()); assertTrue(jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText().startsWith(EXPECTED_JDBC_ESCAPED_URL)); } @@ -109,95 +75,45 @@ private JsonNode buildConfigEscapingNeeded() { @Test @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") - public void testTableWithNullCursorValueShouldThrowException() throws SQLException { - try (final MySQLContainer db = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD)) { - db.start(); - final JsonNode config = getConfig(db, "test", ""); - try (Connection connection = DriverManager.getConnection(db.getJdbcUrl(), "root", config.get(JdbcUtils.PASSWORD_KEY).asText())) { - final ConfiguredAirbyteStream table = createTableWithNullValueCursor(connection); - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); - - final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(new MySqlSource().read(config, catalog, null))); - assertThat(throwable).isInstanceOf(ConfigErrorException.class) - .hasMessageContaining( - "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='test.null_cursor_table', cursorColumnName='id', cursorSqlType=INT, cause=Cursor column contains NULL value}"); - - } finally { - db.stop(); - } + public void testNullCursorValueShouldThrowException() { + try (final var testdb = MySQLTestDatabase.in("mysql:8.0") + .with("CREATE TABLE null_cursor_table(id INTEGER NULL);") + .with("INSERT INTO null_cursor_table(id) VALUES (1), (2), (NULL);") + .with("CREATE VIEW null_cursor_view(id) AS SELECT null_cursor_table.id FROM null_cursor_table;")) { + final var config = testdb.testConfigBuilder().withoutSsl().build(); + + final var tableStream = new ConfiguredAirbyteStream() + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + "null_cursor_table", + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + final var tableCatalog = new ConfiguredAirbyteCatalog().withStreams(List.of(tableStream)); + final var tableThrowable = catchThrowable(() -> MoreIterators.toSet(source().read(config, tableCatalog, null))); + assertThat(tableThrowable).isInstanceOf(ConfigErrorException.class).hasMessageContaining(NULL_CURSOR_EXCEPTION_MESSAGE_CONTAINS); + + final var viewStream = new ConfiguredAirbyteStream() + .withCursorField(Lists.newArrayList("id")) + .withDestinationSyncMode(DestinationSyncMode.APPEND) + .withSyncMode(SyncMode.INCREMENTAL) + .withStream(CatalogHelpers.createAirbyteStream( + "null_cursor_view", + testdb.getDatabaseName(), + Field.of("id", JsonSchemaType.STRING)) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) + .withSourceDefinedPrimaryKey(List.of(List.of("id")))); + final var viewCatalog = new ConfiguredAirbyteCatalog().withStreams(List.of(viewStream)); + final var viewThrowable = catchThrowable(() -> MoreIterators.toSet(source().read(config, viewCatalog, null))); + assertThat(viewThrowable).isInstanceOf(ConfigErrorException.class).hasMessageContaining(NULL_CURSOR_EXCEPTION_MESSAGE_CONTAINS); } } - private ConfiguredAirbyteStream createTableWithNullValueCursor(final Connection connection) throws SQLException { - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE TABLE IF NOT EXISTS test.null_cursor_table(id INTEGER NULL)"); - connection.createStatement().execute("INSERT INTO test.null_cursor_table(id) VALUES (1), (2), (NULL)"); - - return new ConfiguredAirbyteStream().withSyncMode(SyncMode.INCREMENTAL) - .withCursorField(Lists.newArrayList("id")) - .withDestinationSyncMode(DestinationSyncMode.APPEND) - .withSyncMode(SyncMode.INCREMENTAL) - .withStream(CatalogHelpers.createAirbyteStream( - "null_cursor_table", - "test", - Field.of("id", JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of("id")))); - - } - - @Test - @Disabled("See https://github.com/airbytehq/airbyte/pull/23908#issuecomment-1463753684, enable once communication is out") - public void viewWithNullValueCursorShouldThrowException() throws SQLException { - try (final MySQLContainer db = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD)) { - db.start(); - final JsonNode config = getConfig(db, "test", ""); - try (Connection connection = DriverManager.getConnection(db.getJdbcUrl(), "root", config.get(JdbcUtils.PASSWORD_KEY).asText())) { - final ConfiguredAirbyteStream table = createViewWithNullValueCursor(connection); - final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); - - final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(new MySqlSource().read(config, catalog, null))); - assertThat(throwable).isInstanceOf(ConfigErrorException.class) - .hasMessageContaining( - "The following tables have invalid columns selected as cursor, please select a column with a well-defined ordering with no null values as a cursor. {tableName='test.test_view_null_cursor', cursorColumnName='id', cursorSqlType=INT, cause=Cursor column contains NULL value}"); - - } finally { - db.stop(); - } - } - } - - private ConfiguredAirbyteStream createViewWithNullValueCursor(final Connection connection) throws SQLException { - - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE TABLE IF NOT EXISTS test.test_table_null_cursor(id INTEGER NULL)"); - connection.createStatement().execute(""" - CREATE VIEW test_view_null_cursor(id) as - SELECT test_table_null_cursor.id - FROM test_table_null_cursor - """); - connection.createStatement().execute("INSERT INTO test.test_table_null_cursor(id) VALUES (1), (2), (NULL)"); - - return new ConfiguredAirbyteStream().withSyncMode(SyncMode.INCREMENTAL) - .withCursorField(Lists.newArrayList("id")) - .withDestinationSyncMode(DestinationSyncMode.APPEND) - .withSyncMode(SyncMode.INCREMENTAL) - .withStream(CatalogHelpers.createAirbyteStream( - "test_view_null_cursor", - "test", - Field.of("id", JsonSchemaType.STRING)) - .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) - .withSourceDefinedPrimaryKey(List.of(List.of("id")))); - - } + static private final String NULL_CURSOR_EXCEPTION_MESSAGE_CONTAINS = "The following tables have invalid columns " + + "selected as cursor, please select a column with a well-defined ordering with no null values as a cursor."; @Test void testParseJdbcParameters() { @@ -210,26 +126,12 @@ void testParseJdbcParameters() { @Test public void testJDBCSessionVariable() throws Exception { - // start DB - try (final MySQLContainer container = new MySQLContainer<>("mysql:8.0") - .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) - .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD) - .withLogConsumer(new Slf4jLogConsumer(LOGGER))) { - - container.start(); - final Properties properties = new Properties(); - properties.putAll(ImmutableMap.of("user", "root", JdbcUtils.PASSWORD_KEY, TEST_PASSWORD)); - DriverManager.getConnection(container.getJdbcUrl(), properties); - final String dbName = Strings.addRandomSuffix("db", "_", 10); - final JsonNode config = getConfig(container, dbName, "sessionVariables=MAX_EXECUTION_TIME=28800000"); - - try (final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), properties)) { - connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); - connection.createStatement().execute("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - } - final AirbyteConnectionStatus check = new MySqlSource().check(config); + try (final var testdb = MySQLTestDatabase.in("mysql:8.0")) { + final var config = testdb.testConfigBuilder() + .with(JdbcUtils.JDBC_URL_PARAMS_KEY, "sessionVariables=MAX_EXECUTION_TIME=28800000") + .withoutSsl() + .build(); + final AirbyteConnectionStatus check = source().check(config); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, check.getStatus()); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java index 0386a18813b6..5d5ac314a928 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java @@ -4,49 +4,27 @@ package io.airbyte.integrations.source.mysql; -import static io.airbyte.integrations.source.mysql.MySqlSource.SSL_PARAMETERS; - -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.factory.DSLContextFactory; -import io.airbyte.cdk.db.factory.DatabaseDriver; +import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.string.Strings; -import org.jooq.SQLDialect; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; +@Order(3) class MySqlSslJdbcSourceAcceptanceTest extends MySqlJdbcSourceAcceptanceTest { - @BeforeEach - public void setup() throws Exception { - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, container.getHost()) - .put(JdbcUtils.PORT_KEY, container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, Strings.addRandomSuffix("db", "_", 10)) - .put(JdbcUtils.USERNAME_KEY, TEST_USER) - .put(JdbcUtils.PASSWORD_KEY, TEST_PASSWORD.call()) - .put(JdbcUtils.SSL_KEY, true) - .build()); - - dslContext = DSLContextFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), - DatabaseDriver.MYSQL.getDriverClassName(), - String.format("jdbc:mysql://%s:%s?%s", - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asText(), - String.join("&", SSL_PARAMETERS)), - SQLDialect.MYSQL); - database = new Database(dslContext); - - database.query(ctx -> { - ctx.fetch("CREATE DATABASE " + config.get(JdbcUtils.DATABASE_KEY).asText()); - ctx.fetch("SHOW STATUS LIKE 'Ssl_cipher'"); - return null; - }); + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .with(JdbcUtils.SSL_KEY, true) + .build(); + } - super.setup(); + @Override + protected MySQLTestDatabase createTestDatabase() { + return new MySQLTestDatabase(new MySQLContainerFactory().shared("mysql:8.0")) + .withConnectionProperty("useSSL", "true") + .withConnectionProperty("requireSSL", "true") + .initialized() + .with("SHOW STATUS LIKE 'Ssl_cipher'"); } } diff --git a/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java new file mode 100644 index 000000000000..74c745cb7f7f --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLContainerFactory.java @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql; + +import io.airbyte.cdk.testutils.ContainerFactory; +import java.io.IOException; +import java.io.UncheckedIOException; +import org.testcontainers.containers.MySQLContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +public class MySQLContainerFactory implements ContainerFactory> { + + @Override + public MySQLContainer createNewContainer(DockerImageName imageName) { + return new MySQLContainer<>(imageName.asCompatibleSubstituteFor("mysql")); + } + + @Override + public Class getContainerClass() { + return MySQLContainer.class; + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(MySQLContainer container) { + container.withNetwork(Network.newNetwork()); + } + + private static final String INVALID_TIMEZONE_CEST = "CEST"; + + public void withInvalidTimezoneCEST(MySQLContainer container) { + container.withEnv("TZ", INVALID_TIMEZONE_CEST); + } + + public void withMoscowTimezone(MySQLContainer container) { + container.withEnv("TZ", "Europe/Moscow"); + } + + public void withRootAndServerCertificates(MySQLContainer container) { + execInContainer(container, + "sed -i '31 a ssl' /etc/my.cnf", + "sed -i '32 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf", + "sed -i '33 a ssl-cert=/var/lib/mysql/server-cert.pem' /etc/my.cnf", + "sed -i '34 a ssl-key=/var/lib/mysql/server-key.pem' /etc/my.cnf", + "sed -i '35 a require_secure_transport=ON' /etc/my.cnf"); + } + + public void withClientCertificate(MySQLContainer container) { + execInContainer(container, + "sed -i '39 a [client]' /etc/mysql/my.cnf", + "sed -i '40 a ssl-ca=/var/lib/mysql/ca.pem' /etc/my.cnf", + "sed -i '41 a ssl-cert=/var/lib/mysql/client-cert.pem' /etc/my.cnf", + "sed -i '42 a ssl-key=/var/lib/mysql/client-key.pem' /etc/my.cnf"); + } + + static private void execInContainer(MySQLContainer container, String... commands) { + container.start(); + try { + for (String command : commands) { + container.execInContainer("sh", "-c", command); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java new file mode 100644 index 000000000000..5f35def2b83f --- /dev/null +++ b/airbyte-integrations/connectors/source-mysql/src/testFixtures/java/io/airbyte/integrations/source/mysql/MySQLTestDatabase.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.mysql; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.testutils.TestDatabase; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.testcontainers.containers.MySQLContainer; + +public class MySQLTestDatabase extends + TestDatabase, MySQLTestDatabase, MySQLTestDatabase.MySQLConfigBuilder> { + + static public MySQLTestDatabase in(String imageName, String... methods) { + final var container = new MySQLContainerFactory().shared(imageName, methods); + return new MySQLTestDatabase(container).initialized(); + } + + public MySQLTestDatabase(MySQLContainer container) { + super(container); + } + + public MySQLTestDatabase withCdcPermissions() { + return this + .with("REVOKE ALL PRIVILEGES, GRANT OPTION FROM '%s';", getUserName()) + .with("GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO '%s';", getUserName()); + } + + public MySQLTestDatabase withoutStrictMode() { + // This disables strict mode in the DB and allows to insert specific values. + // For example, it's possible to insert date with zero values "2021-00-00" + return with("SET @@sql_mode=''"); + } + + static private final int MAX_CONNECTIONS = 1000; + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of(mysqlCmd(Stream.of( + String.format("SET GLOBAL max_connections=%d", MAX_CONNECTIONS), + String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER '%s' IDENTIFIED BY '%s'", getUserName(), getPassword()), + // Grant privileges also to the container's user, which is not root. + String.format("GRANT ALL PRIVILEGES ON *.* TO '%s', '%s' WITH GRANT OPTION", getUserName(), + getContainer().getUsername())))); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + return mysqlCmd(Stream.of( + String.format("DROP USER '%s'", getUserName()), + String.format("DROP DATABASE %s", getDatabaseName()))); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.MYSQL; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.MYSQL; + } + + @Override + public MySQLConfigBuilder configBuilder() { + return new MySQLConfigBuilder(this); + } + + public Stream mysqlCmd(Stream sql) { + return Stream.of("bash", "-c", String.format( + "set -o errexit -o pipefail; echo \"%s\" | mysql -v -v -v --user=root --password=test", + sql.collect(Collectors.joining("; ")))); + } + + static public class MySQLConfigBuilder extends ConfigBuilder { + + protected MySQLConfigBuilder(MySQLTestDatabase testDatabase) { + super(testDatabase); + } + + public MySQLConfigBuilder withStandardReplication() { + return with("replication_method", ImmutableMap.builder().put("method", "STANDARD").build()); + } + + public MySQLConfigBuilder withCdcReplication() { + return this + .with("is_test", true) + .with("replication_method", ImmutableMap.builder() + .put("method", "CDC") + .put("initial_waiting_seconds", 5) + .put("server_time_zone", "America/Los_Angeles") + .build()); + } + + } + + private String cachedCaCertificate; + private Certificates cachedCertificates; + + public synchronized String getCaCertificate() { + if (cachedCaCertificate == null) { + cachedCaCertificate = catFileInContainer("/var/lib/mysql/ca.pem"); + } + return cachedCaCertificate; + } + + public synchronized Certificates getCertificates() { + if (cachedCertificates == null) { + cachedCertificates = new Certificates( + catFileInContainer("/var/lib/mysql/ca.pem"), + catFileInContainer("/var/lib/mysql/client-cert.pem"), + catFileInContainer("/var/lib/mysql/client-key.pem")); + } + return cachedCertificates; + } + + public record Certificates(String caCertificate, String clientCertificate, String clientKey) {} + + private String catFileInContainer(String filePath) { + try { + return getContainer().execInContainer("sh", "-c", "cat " + filePath).getStdout().trim(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + +} diff --git a/airbyte-integrations/connectors/source-postgres/build.gradle b/airbyte-integrations/connectors/source-postgres/build.gradle index 49e0bee537d6..722598ac2d82 100644 --- a/airbyte-integrations/connectors/source-postgres/build.gradle +++ b/airbyte-integrations/connectors/source-postgres/build.gradle @@ -13,12 +13,12 @@ java { } airbyteJavaConnector { - cdkVersionRequired = '0.4.8' + cdkVersionRequired = '0.5.0' features = ['db-sources'] - useLocalCdk = false + useLocalCdk = true } -airbyteJavaConnector.addCdkDependencies() + application { mainClass = 'io.airbyte.integrations.source.postgres.PostgresSource' @@ -52,6 +52,8 @@ dependencies { implementation libs.bundles.datadog testImplementation 'org.hamcrest:hamcrest-all:1.3' + testFixturesImplementation libs.testcontainers.jdbc + testFixturesImplementation libs.testcontainers.postgresql testImplementation libs.testcontainers.jdbc testImplementation libs.testcontainers.postgresql testImplementation libs.junit.jupiter.system.stubs diff --git a/airbyte-integrations/connectors/source-postgres/metadata.yaml b/airbyte-integrations/connectors/source-postgres/metadata.yaml index 1fbe97df91ce..8e617f30cd35 100644 --- a/airbyte-integrations/connectors/source-postgres/metadata.yaml +++ b/airbyte-integrations/connectors/source-postgres/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: decd338e-5647-4c0b-adf4-da0e75f5a750 - dockerImageTag: 3.2.21 + dockerImageTag: 3.2.22 dockerRepository: airbyte/source-postgres documentationUrl: https://docs.airbyte.com/integrations/sources/postgres githubIssueLabel: source-postgres diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java index b5208c543092..3c727acef7e7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresUtils.java @@ -45,6 +45,8 @@ public class PostgresUtils { public static final Duration MIN_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(2); public static final Duration MAX_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(20); public static final Duration DEFAULT_FIRST_RECORD_WAIT_TIME = Duration.ofMinutes(5); + public static final Duration DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME = Duration.ofMinutes(1); + private static final int MIN_QUEUE_SIZE = 1000; private static final int MAX_QUEUE_SIZE = 10000; @@ -157,6 +159,18 @@ public static Duration getFirstRecordWaitTime(final JsonNode config) { return firstRecordWaitTime; } + public static Duration getSubsequentRecordWaitTime(final JsonNode config) { + Duration subsequentRecordWaitTime = DEFAULT_SUBSEQUENT_RECORD_WAIT_TIME; + final boolean isTest = config.has("is_test") && config.get("is_test").asBoolean(); + final Optional firstRecordWaitSeconds = getFirstRecordWaitSeconds(config); + if (isTest && firstRecordWaitSeconds.isPresent()) { + // In tests, reuse the initial_waiting_seconds property to speed things up. + subsequentRecordWaitTime = Duration.ofSeconds(firstRecordWaitSeconds.get()); + } + LOGGER.info("Subsequent record waiting time: {} seconds", subsequentRecordWaitTime.getSeconds()); + return subsequentRecordWaitTime; + } + public static boolean isXmin(final JsonNode config) { final boolean isXmin = config.hasNonNull("replication_method") && config.get("replication_method").get("method").asText().equals("Xmin"); diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java index a9a5553a6f72..910a22648508 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/cdc/PostgresCdcCtidInitializer.java @@ -68,6 +68,7 @@ public static List> cdcCtidIteratorsCombin try { final JsonNode sourceConfig = database.getSourceConfig(); final Duration firstRecordWaitTime = PostgresUtils.getFirstRecordWaitTime(sourceConfig); + final Duration subsequentRecordWaitTime = PostgresUtils.getSubsequentRecordWaitTime(sourceConfig); final OptionalInt queueSize = OptionalInt.of(PostgresUtils.getQueueSize(sourceConfig)); LOGGER.info("First record waiting time: {} seconds", firstRecordWaitTime.getSeconds()); LOGGER.info("Queue size: {}", queueSize.getAsInt()); @@ -163,20 +164,9 @@ public static List> cdcCtidIteratorsCombin final var targetPosition = PostgresCdcTargetPosition.targetPosition(database); final AirbyteDebeziumHandler handler = new AirbyteDebeziumHandler<>(sourceConfig, - targetPosition, false, firstRecordWaitTime, queueSize); + targetPosition, false, firstRecordWaitTime, subsequentRecordWaitTime, queueSize); final PostgresCdcStateHandler postgresCdcStateHandler = new PostgresCdcStateHandler(stateManager); - final boolean canShortCircuitDebeziumEngine = savedOffset.isPresent() && - // Until the need presents itself in production, short-circuiting should only be done in tests. - sourceConfig.has("is_test") && sourceConfig.get("is_test").asBoolean() && - !postgresDebeziumStateUtil.maybeReplicationStreamIntervalHasRecords( - database.getDatabaseConfig(), - sourceConfig.get("replication_method").get("replication_slot").asText(), - sourceConfig.get("replication_method").get("publication").asText(), - PostgresUtils.getPluginValue(sourceConfig.get("replication_method")), - savedOffset.getAsLong(), - targetPosition.targetLsn.asLong()); - final Supplier> incrementalIteratorSupplier = () -> handler.getIncrementalIterators( catalog, new PostgresCdcSavedInfoFetcher(stateToBeUsed), @@ -185,8 +175,7 @@ public static List> cdcCtidIteratorsCombin PostgresCdcProperties.getDebeziumDefaultProperties(database), DebeziumPropertiesManager.DebeziumConnectorType.RELATIONALDB, emittedAt, - false, - canShortCircuitDebeziumEngine); + false); if (initialSyncCtidIterators.isEmpty()) { return Collections.singletonList(incrementalIteratorSupplier.get()); diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java index ea68ae74436b..a1f6b8242952 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractCdcPostgresSourceSslAcceptanceTest.java @@ -5,52 +5,36 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.db.PostgresUtils; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; -import io.airbyte.commons.json.Jsons; -import java.util.List; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; +import java.util.Map; public abstract class AbstractCdcPostgresSourceSslAcceptanceTest extends CdcPostgresSourceAcceptanceTest { protected static final String PASSWORD = "Passw0rd"; - protected static PostgresUtils.Certificate certs; @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make(getServerImageName(), "withWalLevelLogical", "withCert"); - certs = testdb.getCertificate(); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(NAMESPACE)) - .put("replication_method", replicationMethod) - .put(JdbcUtils.SSL_KEY, true) - .put("ssl_mode", getCertificateConfiguration()) - .put("is_test", true) - .build()); - - testdb.database.query(ctx -> { - ctx.execute("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.execute("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.execute("SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - return null; - }); + testdb = PostgresTestDatabase.in(getServerImageName(), "withWalLevelLogical", "withCert") + .with("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .withReplicationSlot() + .withPublicationForAllTables(); + } + + @Override + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withSchemas(NAMESPACE) + .withSsl(getCertificateConfiguration()) + .withCdcReplication() + .build(); } protected abstract String getServerImageName(); - public abstract ImmutableMap getCertificateConfiguration(); + public abstract Map getCertificateConfiguration(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java index 00c1722eef25..a87c4395785e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceDatatypeTest.java @@ -10,20 +10,17 @@ import static io.airbyte.protocol.models.JsonSchemaType.STRING_TIME_WITHOUT_TIMEZONE; import static io.airbyte.protocol.models.JsonSchemaType.STRING_TIME_WITH_TIMEZONE; -import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.cdk.integrations.standardtest.source.AbstractSourceDatabaseTypeTest; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.JsonSchemaPrimitiveUtil.JsonSchemaPrimitive; import io.airbyte.protocol.models.JsonSchemaType; -import java.sql.SQLException; import java.util.Set; public abstract class AbstractPostgresSourceDatatypeTest extends AbstractSourceDatabaseTypeTest { protected PostgresTestDatabase testdb; - protected JsonNode config; protected static final String SCHEMA_NAME = "test"; @@ -38,12 +35,7 @@ protected String getImageName() { } @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected void tearDown(final TestDestinationEnv testEnv) throws SQLException { + protected void tearDown(final TestDestinationEnv testEnv) { testdb.close(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java index 6014ca946500..d605214a8028 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractPostgresSourceSSLCertificateAcceptanceTest.java @@ -5,14 +5,12 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.PostgresUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -22,6 +20,7 @@ import io.airbyte.protocol.models.v0.SyncMode; import java.util.HashMap; import java.util.List; +import java.util.Map; public abstract class AbstractPostgresSourceSSLCertificateAcceptanceTest extends AbstractPostgresSourceAcceptanceTest { @@ -29,11 +28,9 @@ public abstract class AbstractPostgresSourceSSLCertificateAcceptanceTest extends private static final String STREAM_NAME2 = "starships"; private static final String STREAM_NAME_MATERIALIZED_VIEW = "testview"; private static final String SCHEMA_NAME = "public"; - - private PostgresTestDatabase testdb; - private JsonNode config; protected static final String PASSWORD = "Passw0rd"; - protected static PostgresUtils.Certificate certs; + + protected PostgresTestDatabase testdb; @Override protected FeatureFlags featureFlags() { @@ -42,29 +39,15 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withCert"); - certs = testdb.getCertificate(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put("schemas", Jsons.jsonNode(List.of("public"))) - .put("ssl", true) - .put("replication_method", replicationMethod) - .put("ssl_mode", getCertificateConfiguration()) - .build()); - testdb.database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); - return null; - }); + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withCert") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .with("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); } - public abstract ImmutableMap getCertificateConfiguration(); + public abstract Map getCertificateConfiguration(); @Override protected void tearDown(final TestDestinationEnv testEnv) { @@ -73,7 +56,11 @@ protected void tearDown(final TestDestinationEnv testEnv) { @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withStandardReplication() + .withSsl(getCertificateConfiguration()) + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java index 11e848d94275..ae97db058d15 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/AbstractSshPostgresSourceAcceptanceTest.java @@ -13,11 +13,11 @@ import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.functional.CheckedFunction; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -25,6 +25,8 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.HashMap; import java.util.List; import org.jooq.SQLDialect; @@ -37,13 +39,13 @@ public abstract class AbstractSshPostgresSourceAcceptanceTest extends AbstractPo private final SshBastionContainer bastion = new SshBastionContainer(); private PostgresTestDatabase testdb; - private JsonNode config; private void populateDatabaseTestData() throws Exception { - final var builder = testdb.makeConfigBuilder() - .put("schemas", List.of("public")) - .put("ssl", false); - final var outerConfig = bastion.getTunnelConfig(getTunnelMethod(), builder, false); + final var outerConfig = testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), false)) + .build(); SshTunnel.sshWrap( outerConfig, JdbcUtils.HOST_LIST_KEY, @@ -82,12 +84,8 @@ protected FeatureFlags featureFlags() { // requiring data to already be in place. @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withNetwork"); - bastion.initAndStartBastion(testdb.container.getNetwork()); - final var builder = testdb.makeConfigBuilder() - .put("schemas", List.of("public")) - .put("ssl", false); - config = bastion.getTunnelConfig(getTunnelMethod(), builder, true); + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withNetwork"); + bastion.initAndStartBastion(testdb.getContainer().getNetwork()); populateDatabaseTestData(); } @@ -98,7 +96,17 @@ protected void tearDown(final TestDestinationEnv testEnv) { @Override protected JsonNode getConfig() { - return config; + try { + return testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .with("tunnel_method", bastion.getTunnelMethod(getTunnelMethod(), true)) + .build(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java index 5c4eb2ddf57f..8143ccafc663 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceCaCertificateSslAcceptanceTest.java @@ -5,13 +5,14 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class CDCPostgresSourceCaCertificateSslAcceptanceTest extends AbstractCdcPostgresSourceSslAcceptanceTest { - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) + .put("ca_certificate", testdb.getCertificates().caCertificate()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java index 15a2fca44d68..c01f163fc9e7 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CDCPostgresSourceFullCertificateSslAcceptanceTest.java @@ -5,16 +5,18 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class CDCPostgresSourceFullCertificateSslAcceptanceTest extends AbstractCdcPostgresSourceSslAcceptanceTest { @Override - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { + final var certs = testdb.getCertificates(); return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) + .put("ca_certificate", certs.caCertificate()) + .put("client_certificate", certs.clientCertificate()) + .put("client_key", certs.clientKey()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java index 6968f340210d..762bfecb3d0a 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcInitialSnapshotPostgresSourceDatatypeTest.java @@ -5,26 +5,16 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; -import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; -import java.sql.SQLException; -import java.util.List; public class CdcInitialSnapshotPostgresSourceDatatypeTest extends AbstractPostgresSourceDatatypeTest { private static final String SCHEMA_NAME = "test"; - private static final int INITIAL_WAITING_SECONDS = 30; - - private String slotName; - private String publication; @Override protected FeatureFlags featureFlags() { @@ -33,60 +23,28 @@ protected FeatureFlags featureFlags() { @Override protected Database setupDatabase() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - - /** - * The publication is not being set as part of the config and because of it - * {@link io.airbyte.integrations.source.postgres.PostgresSource#isCdc(JsonNode)} returns false, as - * a result no test in this class runs through the cdc path. - */ - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put("replication_method", replicationMethod) - .put("is_test", true) - .put(JdbcUtils.SSL_KEY, false) - .build()); - - testdb.database.query(ctx -> { - ctx.execute( - "SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - ctx.execute("CREATE EXTENSION hstore;"); - return null; - }); - - testdb.database.query(ctx -> ctx.fetch("CREATE SCHEMA TEST;")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE inventory_item AS (\n" - + " name text,\n" - + " supplier_id integer,\n" - + " price numeric\n" - + ");")); - - testdb.database.query(ctx -> ctx.fetch("SET TIMEZONE TO 'MST'")); - return testdb.database; + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("CREATE EXTENSION hstore;") + .with("CREATE SCHEMA TEST;") + .with("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") + .with("CREATE TYPE inventory_item AS (\n" + + " name text,\n" + + " supplier_id integer,\n" + + " price numeric\n" + + ");") + .with("SET TIMEZONE TO 'MST'") + .withReplicationSlot() + .withPublicationForAllTables(); + return testdb.getDatabase(); } @Override - protected void tearDown(TestDestinationEnv testEnv) throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_drop_replication_slot('" + slotName + "');"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - return null; - }); - super.tearDown(testEnv); - } - - public boolean testCatalog() { - return true; + protected JsonNode getConfig() { + return testdb.integrationTestConfigBuilder() + .withSchemas(SCHEMA_NAME) + .withoutSsl() + .withCdcReplication() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java index 4fb133959f82..df8c13bd046b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcPostgresSourceAcceptanceTest.java @@ -8,14 +8,12 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; @@ -39,12 +37,8 @@ public class CdcPostgresSourceAcceptanceTest extends AbstractPostgresSourceAccep protected static final String NAMESPACE = "public"; private static final String STREAM_NAME = "id_and_name"; private static final String STREAM_NAME2 = "starships"; - protected static final int INITIAL_WAITING_SECONDS = 30; protected PostgresTestDatabase testdb; - protected JsonNode config; - protected String slotName; - protected String publication; @Override protected FeatureFlags featureFlags() { @@ -53,47 +47,27 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make(getServerImageName(), "withConf"); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(NAMESPACE)) - .put("replication_method", replicationMethod) - .put(JdbcUtils.SSL_KEY, false) - .put("is_test", true) - .build()); - - testdb.database.query(ctx -> { - ctx.execute("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.execute("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));"); - ctx.execute("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.execute("SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - return null; - }); + testdb = PostgresTestDatabase.in(getServerImageName(), "withConf") + .with("CREATE TABLE id_and_name(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER primary key, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .withReplicationSlot() + .withPublicationForAllTables(); } @Override protected void tearDown(final TestDestinationEnv testEnv) throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_drop_replication_slot('" + slotName + "');"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - return null; - }); testdb.close(); } @Override protected JsonNode getConfig() { - return config; + return testdb.integrationTestConfigBuilder() + .withSchemas(NAMESPACE) + .withoutSsl() + .withCdcReplication() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java index f36fdb61b164..cde90fd79c8e 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CdcWalLogsPostgresSourceDatatypeTest.java @@ -5,21 +5,17 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDataHolder; -import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteMessage; import io.airbyte.protocol.models.v0.AirbyteStateMessage; import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; -import java.sql.SQLException; import java.util.Collections; import java.util.List; import java.util.Set; @@ -27,10 +23,7 @@ public class CdcWalLogsPostgresSourceDatatypeTest extends AbstractPostgresSourceDatatypeTest { private static final String SCHEMA_NAME = "test"; - private static final int INITIAL_WAITING_SECONDS = 30; private JsonNode stateAfterFirstSync; - private String slotName; - private String publication; @Override protected List runRead(final ConfiguredAirbyteCatalog configuredCatalog) throws Exception { @@ -43,7 +36,6 @@ protected List runRead(final ConfiguredAirbyteCatalog configured @Override protected void postSetup() throws Exception { final Database database = setupDatabase(); - initTests(); for (final TestDataHolder test : testDataHolders) { database.query(ctx -> { ctx.fetch(test.getCreateSqlQuery()); @@ -78,61 +70,29 @@ protected FeatureFlags featureFlags() { } @Override - protected Database setupDatabase() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - slotName = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - - /** - * The publication is not being set as part of the config and because of it - * {@link io.airbyte.integrations.source.postgres.PostgresSource#isCdc(JsonNode)} returns false, as - * a result no test in this class runs through the cdc path. - */ - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", slotName) - .put("publication", publication) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put("replication_method", replicationMethod) - .put("is_test", true) - .put(JdbcUtils.SSL_KEY, false) - .build()); - - testdb.database.query(ctx -> { - ctx.execute( - "SELECT pg_create_logical_replication_slot('" + slotName + "', 'pgoutput');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - ctx.execute("CREATE EXTENSION hstore;"); - return null; - }); - - testdb.database.query(ctx -> ctx.fetch("CREATE SCHEMA TEST;")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');")); - testdb.database.query(ctx -> ctx.fetch("CREATE TYPE inventory_item AS (\n" - + " name text,\n" - + " supplier_id integer,\n" - + " price numeric\n" - + ");")); - - testdb.database.query(ctx -> ctx.fetch("SET TIMEZONE TO 'MST'")); - return testdb.database; + protected Database setupDatabase() { + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("CREATE EXTENSION hstore;") + .with("CREATE SCHEMA TEST;") + .with("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") + .with("CREATE TYPE inventory_item AS (\n" + + " name text,\n" + + " supplier_id integer,\n" + + " price numeric\n" + + ");") + .with("SET TIMEZONE TO 'MST'") + .withReplicationSlot() + .withPublicationForAllTables(); + return testdb.getDatabase(); } @Override - protected void tearDown(TestDestinationEnv testEnv) throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_drop_replication_slot('" + slotName + "');"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - return null; - }); - super.tearDown(testEnv); - } - - public boolean testCatalog() { - return true; + protected JsonNode getConfig() throws Exception { + return testdb.integrationTestConfigBuilder() + .withSchemas(SCHEMA_NAME) + .withoutSsl() + .withCdcReplication() + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java index c6a7c9cf6465..4bd3d9e02e26 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/CloudDeploymentPostgresSourceAcceptanceTest.java @@ -7,16 +7,15 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.PostgresUtils; import io.airbyte.cdk.integrations.base.adaptive.AdaptiveSourceRunner; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.standardtest.source.SourceAcceptanceTest; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -36,10 +35,8 @@ public class CloudDeploymentPostgresSourceAcceptanceTest extends SourceAcceptanc private static final String SCHEMA_NAME = "public"; private PostgresTestDatabase testdb; - private JsonNode config; protected static final String PASSWORD = "Passw0rd"; - protected static PostgresUtils.Certificate certs; @Override protected FeatureFlags featureFlags() { @@ -52,23 +49,8 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withCert"); - certs = testdb.getCertificate(); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put("replication_method", replicationMethod) - .put("ssl_mode", ImmutableMap.builder() - .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) - .put("client_key_password", PASSWORD) - .build()) - .build()); - - testdb.database.query(ctx -> { + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withCert"); + testdb.query(ctx -> { ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); @@ -96,7 +78,17 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + final var certs = testdb.getCertificates(); + return testdb.integrationTestConfigBuilder() + .withStandardReplication() + .withSsl(ImmutableMap.builder() + .put("mode", "verify-ca") + .put("ca_certificate", certs.caCertificate()) + .put("client_certificate", certs.clientCertificate()) + .put("client_key", certs.clientKey()) + .put("client_key_password", PASSWORD) + .build()) + .build(); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java index 2c7c8cf13612..b199b808738d 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceAcceptanceTest.java @@ -8,16 +8,14 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import io.airbyte.cdk.db.Database; import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.integrations.util.HostPortResolver; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.AirbyteCatalog; @@ -39,13 +37,10 @@ public class PostgresSourceAcceptanceTest extends AbstractPostgresSourceAcceptan private static final String STREAM_NAME_MATERIALIZED_VIEW = "testview"; private static final String SCHEMA_NAME = "public"; public static final String LIMIT_PERMISSION_SCHEMA = "limit_perm_schema"; - - public final String LIMIT_PERMISSION_ROLE_PASSWORD = "test"; + static public final String LIMIT_PERMISSION_ROLE_PASSWORD = "test"; private PostgresTestDatabase testdb; private JsonNode config; - private ConfiguredAirbyteCatalog configCatalog; - private String limitPermissionRole; @Override protected FeatureFlags featureFlags() { @@ -54,12 +49,9 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make(getServerImageName()); - limitPermissionRole = testdb.withSuffix("limit_perm_role"); - - final List schemas = List.of("public"); - config = getConfig(testdb.userName, testdb.password, schemas); - testdb.database.query(ctx -> { + testdb = PostgresTestDatabase.in(getServerImageName()); + config = getConfig(testdb.getUserName(), testdb.getPassword(), "public"); + testdb.query(ctx -> { ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); @@ -67,23 +59,22 @@ protected void setupEnvironment(final TestDestinationEnv environment) throws Exc ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); return null; }); - configCatalog = getCommonConfigCatalog(); } - private JsonNode getConfig(final String username, final String password, final List schemas) { - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(testdb.container)) - .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(testdb.container)) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.USERNAME_KEY, username) - .put(JdbcUtils.PASSWORD_KEY, password) - .put(JdbcUtils.SCHEMAS_KEY, Jsons.jsonNode(schemas)) - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", replicationMethod) - .build()); + private String getLimitPermissionRoleName() { + return testdb.withNamespace("limit_perm_role"); + } + + private JsonNode getConfig(final String username, final String password, String... schemas) { + return testdb.configBuilder() + .withResolvedHostAndPort() + .withDatabase() + .with(JdbcUtils.USERNAME_KEY, username) + .with(JdbcUtils.PASSWORD_KEY, password) + .withSchemas(schemas) + .withoutSsl() + .withStandardReplication() + .build(); } @Override @@ -98,7 +89,7 @@ protected JsonNode getConfig() { @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() { - return configCatalog; + return getCommonConfigCatalog(); } @Override @@ -113,16 +104,16 @@ protected boolean supportsPerStream() { @Test public void testFullRefreshWithRevokingSchemaPermissions() throws Exception { - prepareEnvForUserWithoutPermissions(testdb.database); + prepareEnvForUserWithoutPermissions(testdb.getDatabase()); - config = getConfig(limitPermissionRole, LIMIT_PERMISSION_ROLE_PASSWORD, List.of(LIMIT_PERMISSION_SCHEMA)); + config = getConfig(getLimitPermissionRoleName(), LIMIT_PERMISSION_ROLE_PASSWORD, LIMIT_PERMISSION_SCHEMA); final ConfiguredAirbyteCatalog configuredCatalog = getLimitPermissionConfiguredCatalog(); final List fullRefreshRecords = filterRecords(runRead(configuredCatalog)); final String assertionMessage = "Expected records after full refresh sync for user with schema permission"; assertFalse(fullRefreshRecords.isEmpty(), assertionMessage); - revokeSchemaPermissions(testdb.database); + revokeSchemaPermissions(testdb.getDatabase()); final List lessPermFullRefreshRecords = filterRecords(runRead(configuredCatalog)); final String assertionMessageWithoutPermission = "Expected no records after full refresh sync for user without schema permission"; @@ -132,9 +123,9 @@ public void testFullRefreshWithRevokingSchemaPermissions() throws Exception { @Test public void testDiscoverWithRevokingSchemaPermissions() throws Exception { - prepareEnvForUserWithoutPermissions(testdb.database); - revokeSchemaPermissions(testdb.database); - config = getConfig(limitPermissionRole, LIMIT_PERMISSION_ROLE_PASSWORD, List.of(LIMIT_PERMISSION_SCHEMA)); + prepareEnvForUserWithoutPermissions(testdb.getDatabase()); + revokeSchemaPermissions(testdb.getDatabase()); + config = getConfig(getLimitPermissionRoleName(), LIMIT_PERMISSION_ROLE_PASSWORD, LIMIT_PERMISSION_SCHEMA); runDiscover(); final AirbyteCatalog lastPersistedCatalogSecond = getLastPersistedCatalog(); @@ -144,20 +135,20 @@ public void testDiscoverWithRevokingSchemaPermissions() throws Exception { private void revokeSchemaPermissions(final Database database) throws SQLException { database.query(ctx -> { - ctx.fetch(String.format("REVOKE USAGE ON schema %s FROM %s;", LIMIT_PERMISSION_SCHEMA, limitPermissionRole)); + ctx.fetch(String.format("REVOKE USAGE ON schema %s FROM %s;", LIMIT_PERMISSION_SCHEMA, getLimitPermissionRoleName())); return null; }); } private void prepareEnvForUserWithoutPermissions(final Database database) throws SQLException { database.query(ctx -> { - ctx.fetch(String.format("CREATE ROLE %s WITH LOGIN PASSWORD '%s';", limitPermissionRole, LIMIT_PERMISSION_ROLE_PASSWORD)); + ctx.fetch(String.format("CREATE ROLE %s WITH LOGIN PASSWORD '%s';", getLimitPermissionRoleName(), LIMIT_PERMISSION_ROLE_PASSWORD)); ctx.fetch(String.format("CREATE SCHEMA %s;", LIMIT_PERMISSION_SCHEMA)); - ctx.fetch(String.format("GRANT CONNECT ON DATABASE %s TO %s;", testdb.dbName, limitPermissionRole)); - ctx.fetch(String.format("GRANT USAGE ON schema %s TO %s;", LIMIT_PERMISSION_SCHEMA, limitPermissionRole)); + ctx.fetch(String.format("GRANT CONNECT ON DATABASE %s TO %s;", testdb.getDatabaseName(), getLimitPermissionRoleName())); + ctx.fetch(String.format("GRANT USAGE ON schema %s TO %s;", LIMIT_PERMISSION_SCHEMA, getLimitPermissionRoleName())); ctx.fetch(String.format("CREATE TABLE %s.id_and_name(id INTEGER, name VARCHAR(200));", LIMIT_PERMISSION_SCHEMA)); ctx.fetch(String.format("INSERT INTO %s.id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');", LIMIT_PERMISSION_SCHEMA)); - ctx.fetch(String.format("GRANT SELECT ON table %s.id_and_name TO %s;", LIMIT_PERMISSION_SCHEMA, limitPermissionRole)); + ctx.fetch(String.format("GRANT SELECT ON table %s.id_and_name TO %s;", LIMIT_PERMISSION_SCHEMA, getLimitPermissionRoleName())); return null; }); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java index 116ecb38767b..6c36dd3ee932 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceDatatypeTest.java @@ -5,13 +5,10 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.Database; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; -import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import java.sql.SQLException; public class PostgresSourceDatatypeTest extends AbstractPostgresSourceDatatypeTest { @@ -23,37 +20,29 @@ protected FeatureFlags featureFlags() { @Override protected Database setupDatabase() throws SQLException { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Standard") - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", replicationMethod) - .build()); - testdb.database.query(ctx -> { - ctx.execute(String.format("CREATE SCHEMA %S;", SCHEMA_NAME)); - ctx.execute("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');"); - ctx.execute("CREATE TYPE inventory_item AS (name text, supplier_id integer, price numeric);"); - // In one of the test case, we have some money values with currency symbol. Postgres can only - // understand those money values if the symbol corresponds to the monetary locale setting. For - // example, - // if the locale is 'en_GB', '£100' is valid, but '$100' is not. So setting the monetary locate is - // necessary here to make sure the unit test can pass, no matter what the locale the runner VM has. - ctx.execute("SET lc_monetary TO 'en_US.utf8';"); - // Set up a fixed timezone here so that timetz and timestamptz always have the same time zone - // wherever the tests are running on. - ctx.execute("SET TIMEZONE TO 'MST'"); - ctx.execute("CREATE EXTENSION hstore;"); - return null; - }); - - return testdb.database; + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("CREATE SCHEMA %S;", SCHEMA_NAME) + .with("CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');") + .with("CREATE TYPE inventory_item AS (name text, supplier_id integer, price numeric);") + // In one of the test case, we have some money values with currency symbol. Postgres can only + // understand those money values if the symbol corresponds to the monetary locale setting. For + // example, + // if the locale is 'en_GB', '£100' is valid, but '$100' is not. So setting the monetary locate is + // necessary here to make sure the unit test can pass, no matter what the locale the runner VM has. + .with("SET lc_monetary TO 'en_US.utf8';") + // Set up a fixed timezone here so that timetz and timestamptz always have the same time zone + // wherever the tests are running on. + .with("SET TIMEZONE TO 'MST'") + .with("CREATE EXTENSION hstore;"); + return testdb.getDatabase(); } @Override - public boolean testCatalog() { - return true; + protected JsonNode getConfig() throws Exception { + return testdb.integrationTestConfigBuilder() + .withoutSsl() + .withStandardReplication() + .build(); } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java index bfecd215194a..eb93444a7201 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLCaCertificateAcceptanceTest.java @@ -5,14 +5,15 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class PostgresSourceSSLCaCertificateAcceptanceTest extends AbstractPostgresSourceSSLCertificateAcceptanceTest { @Override - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) + .put("ca_certificate", testdb.getCertificates().caCertificate()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java index bf0282c418ee..dcd4810cd34b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/PostgresSourceSSLFullCertificateAcceptanceTest.java @@ -5,16 +5,17 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.google.common.collect.ImmutableMap; +import java.util.Map; public class PostgresSourceSSLFullCertificateAcceptanceTest extends AbstractPostgresSourceSSLCertificateAcceptanceTest { @Override - public ImmutableMap getCertificateConfiguration() { + public Map getCertificateConfiguration() { return ImmutableMap.builder() .put("mode", "verify-ca") - .put("ca_certificate", certs.getCaCertificate()) - .put("client_certificate", certs.getClientCertificate()) - .put("client_key", certs.getClientKey()) + .put("ca_certificate", testdb.getCertificates().caCertificate()) + .put("client_certificate", testdb.getCertificates().clientCertificate()) + .put("client_key", testdb.getCertificates().clientKey()) .put("client_key_password", PASSWORD) .build(); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java index 2a2a7be36c44..90f173ecf813 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-integration/java/io/airbyte/integrations/io/airbyte/integration_tests/sources/XminPostgresSourceAcceptanceTest.java @@ -5,14 +5,12 @@ package io.airbyte.integrations.io.airbyte.integration_tests.sources; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.standardtest.source.TestDestinationEnv; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.source.postgres.PostgresTestDatabase; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import io.airbyte.protocol.models.v0.CatalogHelpers; @@ -31,12 +29,14 @@ public class XminPostgresSourceAcceptanceTest extends AbstractPostgresSourceAcce private static final String SCHEMA_NAME = "public"; private PostgresTestDatabase testdb; - private JsonNode config; - private ConfiguredAirbyteCatalog configCatalog; @Override protected JsonNode getConfig() throws Exception { - return config; + return testdb.integrationTestConfigBuilder() + .withSchemas("public") + .withoutSsl() + .withXminReplication() + .build(); } @Override @@ -46,25 +46,12 @@ protected FeatureFlags featureFlags() { @Override protected void setupEnvironment(final TestDestinationEnv environment) throws Exception { - testdb = PostgresTestDatabase.make("postgres:12-bullseye"); - final JsonNode replicationMethod = Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Xmin") - .build()); - config = Jsons.jsonNode(testdb.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, Jsons.jsonNode(List.of("public"))) - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", replicationMethod) - .build()); - - testdb.database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');"); - ctx.fetch("CREATE TABLE starships(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');"); - ctx.fetch("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); - return null; - }); - configCatalog = getXminCatalog(); + testdb = PostgresTestDatabase.in("postgres:12-bullseye") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,'picard'), (2, 'crusher'), (3, 'vash');") + .with("CREATE TABLE starships(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO starships (id, name) VALUES (1,'enterprise-d'), (2, 'defiant'), (3, 'yamato');") + .with("CREATE MATERIALIZED VIEW testview AS select * from id_and_name where id = '2';"); } @Override @@ -74,20 +61,6 @@ protected void tearDown(final TestDestinationEnv testEnv) throws Exception { @Override protected ConfiguredAirbyteCatalog getConfiguredCatalog() throws Exception { - return configCatalog; - } - - @Override - protected JsonNode getState() throws Exception { - return Jsons.jsonNode(new HashMap<>()); - } - - @Override - protected boolean supportsPerStream() { - return true; - } - - private ConfiguredAirbyteCatalog getXminCatalog() { return new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( new ConfiguredAirbyteStream() .withSyncMode(SyncMode.INCREMENTAL) @@ -121,4 +94,14 @@ private ConfiguredAirbyteCatalog getXminCatalog() { .withSourceDefinedPrimaryKey(List.of(List.of("id")))))); } + @Override + protected JsonNode getState() throws Exception { + return Jsons.jsonNode(new HashMap<>()); + } + + @Override + protected boolean supportsPerStream() { + return true; + } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java index 3ba91d1c4656..a6d7ecb4d970 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceLegacyCtidTest.java @@ -4,6 +4,9 @@ package io.airbyte.integrations.source.postgres; +import org.junit.jupiter.api.Order; + +@Order(2) public class CdcPostgresSourceLegacyCtidTest extends CdcPostgresSourceTest { protected static String getServerImageName() { diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java index 06b058ad87e2..59064a1893e2 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CdcPostgresSourceTest.java @@ -24,20 +24,17 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Streams; -import io.airbyte.cdk.db.Database; import io.airbyte.cdk.db.PgLsn; import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.factory.DatabaseDriver; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.base.Source; import io.airbyte.cdk.integrations.debezium.CdcSourceTest; import io.airbyte.cdk.integrations.debezium.CdcTargetPosition; import io.airbyte.cdk.integrations.debezium.internals.postgres.PostgresCdcTargetPosition; import io.airbyte.cdk.integrations.debezium.internals.postgres.PostgresReplicationConnection; import io.airbyte.cdk.integrations.util.ConnectorExceptionUtil; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; @@ -59,7 +56,6 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; import io.airbyte.protocol.models.v0.StreamDescriptor; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -68,129 +64,100 @@ import java.util.Set; import java.util.stream.Collectors; import javax.sql.DataSource; -import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -public class CdcPostgresSourceTest extends CdcSourceTest { +@Order(1) +public class CdcPostgresSourceTest extends CdcSourceTest { - protected String publication; - protected static final int INITIAL_WAITING_SECONDS = 15; - private PostgresSource source; - - private PostgresTestDatabase testdb; + @Override + protected PostgresTestDatabase createTestDatabase() { + return PostgresTestDatabase.in(getServerImageName(), "withConf").withReplicationSlot(); + } - private JsonNode config; - private String fullReplicationSlot; - private String cleanUserVanillaName, cleanUserReplicationName, cleanUserSuperName; - private final String cleanUserPassword = "password"; + @Override + protected PostgresSource source() { + final var source = new PostgresSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; + } - protected String getPluginName() { - return "pgoutput"; + @Override + protected JsonNode config() { + return testdb.testConfigBuilder() + .withSchemas(modelsSchema(), modelsSchema() + "_random") + .withoutSsl() + .withCdcReplication("After loading Data in the destination") + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); } + @Override @BeforeEach - protected void setup() throws SQLException { - source = new PostgresSource(); - source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); - testdb = PostgresTestDatabase.make(getServerImageName(), "withConf"); - fullReplicationSlot = testdb.withSuffix("debezium_slot"); - publication = testdb.withSuffix("publication"); - config = getConfig(testdb.dbName, testdb.userName, testdb.password); - cleanUserSuperName = testdb.withSuffix("super_user"); - cleanUserReplicationName = testdb.withSuffix("replication_user"); - cleanUserVanillaName = testdb.withSuffix("vanilla_user"); + protected void setup() { super.setup(); - testdb.database.query(ctx -> { - ctx.execute("SELECT pg_create_logical_replication_slot('" + fullReplicationSlot + "', '" + getPluginName() + "');"); - ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;"); - ctx.execute("CREATE USER " + cleanUserSuperName + " PASSWORD '" + cleanUserPassword + "';"); - ctx.execute("ALTER USER " + cleanUserSuperName + " SUPERUSER;"); - ctx.execute("CREATE USER " + cleanUserReplicationName + " PASSWORD '" + cleanUserPassword + "';"); - ctx.execute("ALTER USER " + cleanUserReplicationName + " REPLICATION;"); - ctx.execute("CREATE USER " + cleanUserVanillaName + " PASSWORD '" + cleanUserPassword + "';"); - return null; - }); - } - - @AfterEach - protected void tearDown() throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("DROP USER " + cleanUserVanillaName + ";"); - ctx.execute("DROP USER " + cleanUserReplicationName + ";"); - ctx.execute("DROP USER " + cleanUserSuperName + ";"); - ctx.execute("DROP PUBLICATION " + publication + " CASCADE;"); - ctx.execute("SELECT pg_drop_replication_slot('" + fullReplicationSlot + "');"); - return null; - }); - testdb.close(); - } - - private JsonNode getConfig(final String dbName, final String userName, final String userPassword) { - final JsonNode replicationMethod = getReplicationMethod(dbName); - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(MODELS_SCHEMA, MODELS_SCHEMA + "_random")) - .put(JdbcUtils.USERNAME_KEY, userName) - .put(JdbcUtils.PASSWORD_KEY, userPassword) - .put(JdbcUtils.SSL_KEY, false) - .put("is_test", true) - .put("replication_method", replicationMethod) - .put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) - .build()); - } - - private JsonNode getReplicationMethod(final String dbName) { - return Jsons.jsonNode(ImmutableMap.builder() - .put("method", "CDC") - .put("replication_slot", fullReplicationSlot) - .put("publication", publication) - .put("plugin", getPluginName()) - .put("initial_waiting_seconds", INITIAL_WAITING_SECONDS) - .put("lsn_commit_behaviour", "After loading Data in the destination") - .build()); + testdb.withPublicationForAllTables(); } @Test void testCheckReplicationAccessSuperUserPrivilege() throws Exception { - final JsonNode test_config = getConfig(testdb.dbName, cleanUserSuperName, cleanUserPassword); - final AirbyteConnectionStatus status = source.check(test_config); + final var cleanUserSuperName = testdb.withNamespace("super_user"); + testdb + .with("CREATE USER %s PASSWORD '%s';", cleanUserSuperName, testdb.getPassword()) + .with("ALTER USER %s SUPERUSER;", cleanUserSuperName) + .onClose("DROP OWNED BY %s;", cleanUserSuperName) + .onClose("DROP USER %s;", cleanUserSuperName); + final JsonNode testConfig = config(); + ((ObjectNode) testConfig).put(JdbcUtils.USERNAME_KEY, cleanUserSuperName); + final AirbyteConnectionStatus status = source().check(testConfig); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, status.getStatus()); } @Test void testCheckReplicationAccessReplicationPrivilege() throws Exception { - final JsonNode test_config = getConfig(testdb.dbName, cleanUserReplicationName, cleanUserPassword); - final AirbyteConnectionStatus status = source.check(test_config); + final var cleanUserReplicationName = testdb.withNamespace("replication_user"); + testdb + .with("CREATE USER %s PASSWORD '%s';", cleanUserReplicationName, testdb.getPassword()) + .with("ALTER USER %s REPLICATION;", cleanUserReplicationName) + .onClose("DROP OWNED BY %s;", cleanUserReplicationName) + .onClose("DROP USER %s;", cleanUserReplicationName); + final JsonNode testConfig = config(); + ((ObjectNode) testConfig).put(JdbcUtils.USERNAME_KEY, cleanUserReplicationName); + final AirbyteConnectionStatus status = source().check(testConfig); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, status.getStatus()); } @Test void testCheckWithoutReplicationPermission() throws Exception { - final JsonNode test_config = getConfig(testdb.dbName, cleanUserVanillaName, cleanUserPassword); - final AirbyteConnectionStatus status = source.check(test_config); + final var cleanUserVanillaName = testdb.withNamespace("vanilla_user"); + testdb + .with("CREATE USER %s PASSWORD '%s';", cleanUserVanillaName, testdb.getPassword()) + .onClose("DROP OWNED BY %s;", cleanUserVanillaName) + .onClose("DROP USER %s;", cleanUserVanillaName); + final JsonNode testConfig = config(); + ((ObjectNode) testConfig).put(JdbcUtils.USERNAME_KEY, cleanUserVanillaName); + final AirbyteConnectionStatus status = source().check(testConfig); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertEquals(String.format(ConnectorExceptionUtil.COMMON_EXCEPTION_MESSAGE_TEMPLATE, - String.format(PostgresReplicationConnection.REPLICATION_PRIVILEGE_ERROR_MESSAGE, test_config.get("username").asText())), + String.format(PostgresReplicationConnection.REPLICATION_PRIVILEGE_ERROR_MESSAGE, testConfig.get("username").asText())), status.getMessage()); } @Test void testCheckWithoutPublication() throws Exception { - testdb.database.query(ctx -> ctx.execute("DROP PUBLICATION " + publication + ";")); - final AirbyteConnectionStatus status = source.check(getConfig()); + testdb.query(ctx -> ctx.execute("DROP PUBLICATION " + testdb.getPublicationName() + ";")); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - testdb.database.query(ctx -> ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES;")); + testdb.query(ctx -> ctx.execute("CREATE PUBLICATION " + testdb.getPublicationName() + " FOR ALL TABLES;")); } @Test void testCheckWithoutReplicationSlot() throws Exception { - testdb.database.query(ctx -> ctx.execute("SELECT pg_drop_replication_slot('" + fullReplicationSlot + "');")); - final AirbyteConnectionStatus status = source.check(getConfig()); + testdb.query(ctx -> ctx.execute("SELECT pg_drop_replication_slot('" + testdb.getReplicationSlotName() + "');")); + final AirbyteConnectionStatus status = source().check(config()); assertEquals(status.getStatus(), AirbyteConnectionStatus.Status.FAILED); - testdb.database.query(ctx -> ctx.execute("SELECT pg_create_logical_replication_slot('" + fullReplicationSlot + "', '" + getPluginName() + "');")); + testdb.query(ctx -> ctx.execute("SELECT pg_create_logical_replication_slot('" + testdb.getReplicationSlotName() + "', 'pgoutput');")); } @Override @@ -243,14 +210,14 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); - if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomTableSchema()))) { + if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME + "_random").withNamespace(randomSchema()))) { assertEquals("ctid", streamState.get(STATE_TYPE_KEY).asText()); - } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA))) { + } else if (s.getStreamDescriptor().equals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()))) { assertFalse(streamState.has(STATE_TYPE_KEY)); } else { throw new RuntimeException("Unknown stream"); @@ -268,8 +235,8 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List { final JsonNode streamState = s.getStreamState(); assertFalse(streamState.has(STATE_TYPE_KEY)); @@ -286,14 +253,14 @@ protected void assertStateMessagesForNewTableSnapshotTest(final List MODEL_RECORDS_2 = ImmutableList.of( Jsons.jsonNode(ImmutableMap.of(COL_ID, 110, COL_MAKE_ID, 1, COL_MODEL, "Fiesta-2")), @@ -303,18 +270,18 @@ public void testTwoStreamSync() throws Exception { Jsons.jsonNode(ImmutableMap.of(COL_ID, 150, COL_MAKE_ID, 2, COL_MODEL, "A 220-2")), Jsons.jsonNode(ImmutableMap.of(COL_ID, 160, COL_MAKE_ID, 2, COL_MODEL, "E 350-2"))); - createTable(MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", + testdb.with(createTableSqlFmt(), modelsSchema(), MODELS_STREAM_NAME + "_2", columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_MAKE_ID, "INTEGER", COL_MODEL, "VARCHAR(200)"), Optional.of(COL_ID))); for (final JsonNode recordJson : MODEL_RECORDS_2) { - writeRecords(recordJson, MODELS_SCHEMA, MODELS_STREAM_NAME + "_2", COL_ID, + writeRecords(recordJson, modelsSchema(), MODELS_STREAM_NAME + "_2", COL_ID, COL_MAKE_ID, COL_MODEL); } final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream() .withStream(CatalogHelpers.createAirbyteStream( MODELS_STREAM_NAME + "_2", - MODELS_SCHEMA, + modelsSchema(), Field.of(COL_ID, JsonSchemaType.INTEGER), Field.of(COL_MAKE_ID, JsonSchemaType.INTEGER), Field.of(COL_MODEL, JsonSchemaType.STRING)) @@ -327,8 +294,7 @@ public void testTwoStreamSync() throws Exception { streams.add(airbyteStream); configuredCatalog.withStreams(streams); - final AutoCloseableIterator read1 = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator read1 = source().read(config(), configuredCatalog, null); final List actualRecords1 = AutoCloseableIterators.toListAndClose(read1); final Set recordMessages1 = extractRecordMessages(actualRecords1); @@ -389,13 +355,13 @@ public void testTwoStreamSync() throws Exception { recordMessages1, names, names, - MODELS_SCHEMA); + modelsSchema()); - assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(MODELS_SCHEMA), firstStreamInState); + assertEquals(new StreamDescriptor().withName(MODELS_STREAM_NAME).withNamespace(modelsSchema()), firstStreamInState); // Triggering a sync with a ctid state for 1 stream and complete state for other stream - final AutoCloseableIterator read2 = getSource() - .read(getConfig(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); + final AutoCloseableIterator read2 = source() + .read(config(), configuredCatalog, Jsons.jsonNode(Collections.singletonList(stateMessages1.get(6)))); final List actualRecords2 = AutoCloseableIterators.toListAndClose(read2); final List stateMessages2 = extractStateMessages(actualRecords2); @@ -432,7 +398,7 @@ public void testTwoStreamSync() throws Exception { recordMessages2, names, names, - MODELS_SCHEMA); + modelsSchema()); } @Override @@ -450,13 +416,13 @@ protected void assertExpectedStateMessagesFromIncrementalSync(final List ctx.execute("DROP PUBLICATION " + publication + ";")); - testdb.database.query(ctx -> ctx.execute(String.format("CREATE PUBLICATION " + publication + " FOR TABLE %s.%s", MODELS_SCHEMA, "models"))); + testdb.query(ctx -> ctx.execute("DROP PUBLICATION " + testdb.getPublicationName() + ";")); + testdb + .query(ctx -> ctx.execute(String.format("CREATE PUBLICATION " + testdb.getPublicationName() + " FOR TABLE %s.%s", modelsSchema(), "models"))); - final AirbyteCatalog catalog = source.discover(getConfig()); + final AirbyteCatalog catalog = source().discover(config()); assertEquals(catalog.getStreams().size(), 2); final AirbyteStream streamInPublication = catalog.getStreams().stream().filter(stream -> stream.getName().equals("models")).findFirst().get(); @@ -565,15 +507,15 @@ void testDiscoverFiltersNonPublication() throws Exception { assertEquals(streamNotInPublication.getSupportedSyncModes(), List.of(SyncMode.FULL_REFRESH)); assertTrue(streamNotInPublication.getSourceDefinedPrimaryKey().isEmpty()); assertFalse(streamNotInPublication.getSourceDefinedCursor()); - testdb.database.query(ctx -> ctx.execute("DROP PUBLICATION " + publication + ";")); - testdb.database.query(ctx -> ctx.execute("CREATE PUBLICATION " + publication + " FOR ALL TABLES")); + testdb.query(ctx -> ctx.execute("DROP PUBLICATION " + testdb.getPublicationName() + ";")); + testdb.query(ctx -> ctx.execute("CREATE PUBLICATION " + testdb.getPublicationName() + " FOR ALL TABLES")); } @Test public void testTableWithTimestampColDefault() throws Exception { createAndPopulateTimestampTable(); final AirbyteCatalog catalog = new AirbyteCatalog().withStreams(List.of( - CatalogHelpers.createAirbyteStream("time_stamp_table", MODELS_SCHEMA, + CatalogHelpers.createAirbyteStream("time_stamp_table", modelsSchema(), Field.of("id", JsonSchemaType.NUMBER), Field.of("name", JsonSchemaType.STRING), Field.of("created_at", JsonSchemaType.STRING_TIMESTAMP_WITH_TIMEZONE)) @@ -584,8 +526,8 @@ public void testTableWithTimestampColDefault() throws Exception { // set all streams to incremental. configuredCatalog.getStreams().forEach(s -> s.setSyncMode(SyncMode.INCREMENTAL)); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), configuredCatalog, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), configuredCatalog, null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -597,7 +539,7 @@ public void testTableWithTimestampColDefault() throws Exception { } private void createAndPopulateTimestampTable() { - createTable(MODELS_SCHEMA, "time_stamp_table", + testdb.with(createTableSqlFmt(), modelsSchema(), "time_stamp_table", columnClause(ImmutableMap.of("id", "INTEGER", "name", "VARCHAR(200)", "created_at", "TIMESTAMPTZ NOT NULL DEFAULT NOW()"), Optional.of("id"))); final List timestampRecords = ImmutableList.of( @@ -617,10 +559,9 @@ private void createAndPopulateTimestampTable() { .jsonNode(ImmutableMap .of("id", 16000, "name", "blah6"))); for (final JsonNode recordJson : timestampRecords) { - executeQuery( - String.format("INSERT INTO %s.%s (%s, %s) VALUES (%s, '%s');", MODELS_SCHEMA, "time_stamp_table", - "id", "name", - recordJson.get("id").asInt(), recordJson.get("name").asText())); + testdb.with("INSERT INTO %s.%s (%s, %s) VALUES (%s, '%s');", modelsSchema(), "time_stamp_table", + "id", "name", + recordJson.get("id").asInt(), recordJson.get("name").asText()); } } @@ -629,13 +570,14 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { final int recordsToCreate = 20; - final JsonNode config = getConfig(); - final JsonNode replicationMethod = ((ObjectNode) getReplicationMethod(config.get(JdbcUtils.DATABASE_KEY).asText())) - .put("lsn_commit_behaviour", "While reading Data"); - ((ObjectNode) config).put("replication_method", replicationMethod); - - final AutoCloseableIterator firstBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, null); + final JsonNode config = testdb.testConfigBuilder() + .withSchemas(modelsSchema(), modelsSchema() + "_random") + .withoutSsl() + .withCdcReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); + final AutoCloseableIterator firstBatchIterator = source() + .read(config, getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); @@ -645,8 +587,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { // Extract the last state message final JsonNode state = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, state); + final AutoCloseableIterator secondBatchIterator = source() + .read(config, getConfiguredCatalog(), state); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); final List stateAfterSecondBatch = extractStateMessages(dataFromSecondBatch); @@ -662,8 +604,8 @@ protected void syncShouldHandlePurgedLogsGracefully() throws Exception { // Triggering sync with the first sync's state only which would mimic a scenario that the second // sync failed on destination end, and we didn't save state - final AutoCloseableIterator thirdBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, state); + final AutoCloseableIterator thirdBatchIterator = source() + .read(config, getConfiguredCatalog(), state); final List dataFromThirdBatch = AutoCloseableIterators .toListAndClose(thirdBatchIterator); @@ -694,29 +636,30 @@ void testReachedTargetPosition() { @Test protected void syncShouldIncrementLSN() throws Exception { final int recordsToCreate = 20; + final var postgresSource = source(); final DataSource dataSource = DataSourceFactory.create( - config.get(JdbcUtils.USERNAME_KEY).asText(), - config.get(JdbcUtils.PASSWORD_KEY).asText(), + config().get(JdbcUtils.USERNAME_KEY).asText(), + config().get(JdbcUtils.PASSWORD_KEY).asText(), DatabaseDriver.POSTGRESQL.getDriverClassName(), String.format(DatabaseDriver.POSTGRESQL.getUrlFormatString(), - config.get(JdbcUtils.HOST_KEY).asText(), - config.get(JdbcUtils.PORT_KEY).asInt(), - config.get(JdbcUtils.DATABASE_KEY).asText())); + config().get(JdbcUtils.HOST_KEY).asText(), + config().get(JdbcUtils.PORT_KEY).asInt(), + config().get(JdbcUtils.DATABASE_KEY).asText())); final JdbcDatabase defaultJdbcDatabase = new DefaultJdbcDatabase(dataSource); final Long replicationSlotAtTheBeginning = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateAfterFirstBatch = extractStateMessages(dataFromFirstBatch); final Long replicationSlotAfterFirstSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // First sync should not make any change to the replication slot status assertLsnPositionForSyncShouldIncrementLSN(replicationSlotAtTheBeginning, replicationSlotAfterFirstSync, 1); @@ -725,15 +668,15 @@ protected void syncShouldIncrementLSN() throws Exception { bulkInsertRecords(recordsToCreate); final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); final List stateAfterSecondBatch = extractStateMessages(dataFromSecondBatch); assertExpectedStateMessagesFromIncrementalSync(stateAfterSecondBatch); final Long replicationSlotAfterSecondSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // Second sync should move the replication slot ahead assertLsnPositionForSyncShouldIncrementLSN(replicationSlotAfterFirstSync, replicationSlotAfterSecondSync, 2); @@ -748,8 +691,8 @@ protected void syncShouldIncrementLSN() throws Exception { // Triggering sync with the first sync's state only which would mimic a scenario that the second // sync failed on destination end, and we didn't save state - final AutoCloseableIterator thirdBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator thirdBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromThirdBatch = AutoCloseableIterators .toListAndClose(thirdBatchIterator); @@ -759,7 +702,7 @@ protected void syncShouldIncrementLSN() throws Exception { dataFromThirdBatch); final Long replicationSlotAfterThirdSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // Since we used the state, no change should happen to the replication slot assertEquals(replicationSlotAfterSecondSync, replicationSlotAfterThirdSync); @@ -773,8 +716,9 @@ protected void syncShouldIncrementLSN() throws Exception { writeModelRecord(record); } - final AutoCloseableIterator fourthBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, Jsons.jsonNode(Collections.singletonList(stateAfterThirdBatch.get(stateAfterThirdBatch.size() - 1)))); + final AutoCloseableIterator fourthBatchIterator = postgresSource + .read(config(), getConfiguredCatalog(), + Jsons.jsonNode(Collections.singletonList(stateAfterThirdBatch.get(stateAfterThirdBatch.size() - 1)))); final List dataFromFourthBatch = AutoCloseableIterators .toListAndClose(fourthBatchIterator); @@ -784,7 +728,7 @@ protected void syncShouldIncrementLSN() throws Exception { dataFromFourthBatch); final Long replicationSlotAfterFourthSync = PgLsn.fromPgString( - source.getReplicationSlot(defaultJdbcDatabase, getConfig()).get(0).get("confirmed_flush_lsn").asText()).asLong(); + postgresSource.getReplicationSlot(defaultJdbcDatabase, config()).get(0).get("confirmed_flush_lsn").asText()).asLong(); // Fourth sync should again move the replication slot ahead assertEquals(1, replicationSlotAfterFourthSync.compareTo(replicationSlotAfterThirdSync)); @@ -815,8 +759,8 @@ protected void verifyCheckpointStatesByRecords() throws Exception { // We require a huge amount of records, otherwise Debezium will notify directly the last offset. final int recordsToCreate = 20000; - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateMessages = extractStateMessages(dataFromFirstBatch); @@ -827,8 +771,8 @@ protected void verifyCheckpointStatesByRecords() throws Exception { bulkInsertRecords(recordsToCreate); final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateMessages.get(stateMessages.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = source() + .read(config(), getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); assertEquals(recordsToCreate, extractRecordMessages(dataFromSecondBatch).size()); @@ -849,8 +793,8 @@ protected void verifyCheckpointStatesBySeconds() throws Exception { // We require a huge amount of records, otherwise Debezium will notify directly the last offset. final int recordsToCreate = 40000; - final AutoCloseableIterator firstBatchIterator = getSource() - .read(getConfig(), CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config(), getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); final List stateMessages = extractStateMessages(dataFromFirstBatch); @@ -860,13 +804,13 @@ protected void verifyCheckpointStatesBySeconds() throws Exception { assertExpectedStateMessages(stateMessages); bulkInsertRecords(recordsToCreate); - final JsonNode config = getConfig(); + final JsonNode config = config(); ((ObjectNode) config).put(SYNC_CHECKPOINT_DURATION_PROPERTY, 1); ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 100_000); final JsonNode stateAfterFirstSync = Jsons.jsonNode(Collections.singletonList(stateMessages.get(stateMessages.size() - 1))); - final AutoCloseableIterator secondBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, stateAfterFirstSync); + final AutoCloseableIterator secondBatchIterator = source() + .read(config, getConfiguredCatalog(), stateAfterFirstSync); final List dataFromSecondBatch = AutoCloseableIterators .toListAndClose(secondBatchIterator); @@ -899,10 +843,10 @@ protected void ctidIteratorPageSizeTest() throws Exception { * size of 8KB instead of default 1GB This allows us to make sure that the iterator logic works with * multiple pages (sub queries) */ - final JsonNode config = getConfig(); + final JsonNode config = config(); ((ObjectNode) config).put(USE_TEST_CHUNK_SIZE, true); - final AutoCloseableIterator firstBatchIterator = getSource() - .read(config, CONFIGURED_CATALOG, null); + final AutoCloseableIterator firstBatchIterator = source() + .read(config, getConfiguredCatalog(), null); final List dataFromFirstBatch = AutoCloseableIterators .toListAndClose(firstBatchIterator); @@ -916,18 +860,17 @@ protected void ctidIteratorPageSizeTest() throws Exception { } private void bulkInsertRecords(int recordsToCreate) { - final var bulkInsertQuery = String.format(""" - INSERT INTO %s.%s (%s, %s, %s) - SELECT - 200 + generate_series AS id, - 1 AS make_id, - 'F-' || generate_series AS model - FROM generate_series(0, %d - 1); - """, - MODELS_SCHEMA, MODELS_STREAM_NAME, + testdb.with(""" + INSERT INTO %s.%s (%s, %s, %s) + SELECT + 200 + generate_series AS id, + 1 AS make_id, + 'F-' || generate_series AS model + FROM generate_series(0, %d - 1); + """, + modelsSchema(), MODELS_STREAM_NAME, COL_ID, COL_MAKE_ID, COL_MODEL, recordsToCreate); - executeQuery(bulkInsertQuery); } @Override diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java index 5f245d7690ef..a5473eb703ba 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/CloudDeploymentPostgresSourceTest.java @@ -16,7 +16,6 @@ import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; import io.airbyte.cdk.integrations.base.ssh.SshHelpers; import io.airbyte.cdk.integrations.base.ssh.SshTunnel; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; @@ -37,15 +36,15 @@ public class CloudDeploymentPostgresSourceTest { @BeforeAll static void setupContainers() { - DB_NO_SSL_WITH_NETWORK = PostgresTestDatabase.make("postgres:16-bullseye", "withNetwork"); - NETWORK_NO_SSL = DB_NO_SSL_WITH_NETWORK.container.getNetwork(); + DB_NO_SSL_WITH_NETWORK = PostgresTestDatabase.in("postgres:16-bullseye", "withNetwork"); + NETWORK_NO_SSL = DB_NO_SSL_WITH_NETWORK.getContainer().getNetwork(); BASTION_NO_SSL = new SshBastionContainer(); BASTION_NO_SSL.initAndStartBastion(NETWORK_NO_SSL); - DB_WITH_SSL = PostgresTestDatabase.make("marcosmarxm/postgres-ssl:dev", "withSSL"); + DB_WITH_SSL = PostgresTestDatabase.in("marcosmarxm/postgres-ssl:dev", "withSSL"); - DB_WITH_SSL_WITH_NETWORK = PostgresTestDatabase.make("marcosmarxm/postgres-ssl:dev", "withSSL", "withNetwork"); - NETWORK_WITH_SSL = DB_WITH_SSL_WITH_NETWORK.container.getNetwork(); + DB_WITH_SSL_WITH_NETWORK = PostgresTestDatabase.in("marcosmarxm/postgres-ssl:dev", "withSSL", "withNetwork"); + NETWORK_WITH_SSL = DB_WITH_SSL_WITH_NETWORK.getContainer().getNetwork(); BASTION_WITH_SSL = new SshBastionContainer(); BASTION_WITH_SSL.initAndStartBastion(NETWORK_WITH_SSL); } @@ -108,9 +107,7 @@ void testSSlRequiredWithTunnelIfServerDoesNotSupportSSL() throws Exception { @Test void testSSlRequiredNoTunnelIfServerSupportSSL() throws Exception { - final ImmutableMap configBuilderWithSSLMode = getDatabaseConfigBuilderWithSSLMode( - DB_WITH_SSL, SSL_MODE_REQUIRE, false).build(); - final JsonNode config = Jsons.jsonNode(configBuilderWithSSLMode); + final JsonNode config = configBuilderWithSSLMode(DB_WITH_SSL, SSL_MODE_REQUIRE, false).build(); addNoTunnel((ObjectNode) config); final AirbyteConnectionStatus connectionStatus = source().check(config); assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, connectionStatus.getStatus()); @@ -122,20 +119,20 @@ void testStrictSSLSecuredWithTunnel() throws Exception { assertEquals(AirbyteConnectionStatus.Status.SUCCEEDED, connectionStatus.getStatus()); } - private ImmutableMap.Builder getDatabaseConfigBuilderWithSSLMode(final PostgresTestDatabase db, - final String sslMode, - final boolean innerAddress) { + private PostgresTestDatabase.PostgresConfigBuilder configBuilderWithSSLMode( + final PostgresTestDatabase db, + final String sslMode, + final boolean innerAddress) { final var containerAddress = innerAddress - ? SshHelpers.getInnerContainerAddress(db.container) - : SshHelpers.getOuterContainerAddress(db.container); - return ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, Objects.requireNonNull(containerAddress.left)) - .put(JdbcUtils.PORT_KEY, containerAddress.right) - .put(JdbcUtils.DATABASE_KEY, db.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of("public")) - .put(JdbcUtils.USERNAME_KEY, db.userName) - .put(JdbcUtils.PASSWORD_KEY, db.password) - .put(JdbcUtils.SSL_MODE_KEY, Map.of(JdbcUtils.MODE_KEY, sslMode)); + ? SshHelpers.getInnerContainerAddress(db.getContainer()) + : SshHelpers.getOuterContainerAddress(db.getContainer()); + return db.configBuilder() + .with(JdbcUtils.HOST_KEY, Objects.requireNonNull(containerAddress.left)) + .with(JdbcUtils.PORT_KEY, containerAddress.right) + .withDatabase() + .withSchemas("public") + .withCredentials() + .with(JdbcUtils.SSL_MODE_KEY, Map.of(JdbcUtils.MODE_KEY, sslMode)); } private JsonNode getMockedSSLConfig(final String sslMode) { @@ -163,10 +160,10 @@ void testSslModesUnsecuredNoTunnel() throws Exception { } private AirbyteConnectionStatus checkWithTunnel(final PostgresTestDatabase db, SshBastionContainer bastion, final String sslmode) throws Exception { - final var configBuilderWithSSLMode = getDatabaseConfigBuilderWithSSLMode(db, sslmode, true); - final JsonNode configWithSSLModeDisable = - bastion.getTunnelConfig(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, configBuilderWithSSLMode, false); - ((ObjectNode) configWithSSLModeDisable).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1"); + final var configWithSSLModeDisable = configBuilderWithSSLMode(db, sslmode, true) + .with("tunnel_method", bastion.getTunnelMethod(SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH, false)) + .with(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1") + .build(); return source().check(configWithSSLModeDisable); } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java index 85aaf6c87ce6..026ff68dde00 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresCdcGetPublicizedTablesTest.java @@ -7,16 +7,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcDatabase; -import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.testutils.PostgresTestDatabase; -import io.airbyte.commons.json.Jsons; import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; import java.sql.SQLException; -import java.util.List; import java.util.Set; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -28,53 +22,33 @@ class PostgresCdcGetPublicizedTablesTest { private static final String SCHEMA_NAME = "public"; - protected static final int INITIAL_WAITING_SECONDS = 30; - private String publication; - private String replicationSlot; private PostgresTestDatabase testdb; @BeforeEach - void setup() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); - replicationSlot = testdb.withSuffix("replication_slot"); - publication = testdb.withSuffix("publication"); - testdb.database.query(ctx -> { - ctx.execute("create table table_1 (id serial primary key, text_column text);"); - ctx.execute("create table table_2 (id serial primary key, text_column text);"); - ctx.execute("create table table_irrelevant (id serial primary key, text_column text);"); - ctx.execute("SELECT pg_create_logical_replication_slot('" + replicationSlot + "', 'pgoutput');"); - // create a publication including table_1 and table_2, but not table_irrelevant - ctx.execute("CREATE PUBLICATION " + publication + " FOR TABLE table_1, table_2;"); - return null; - }); + void setup() { + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf") + .with("create table table_1 (id serial primary key, text_column text);") + .with("create table table_2 (id serial primary key, text_column text);") + .with("create table table_irrelevant (id serial primary key, text_column text);") + .withReplicationSlot(); + // create a publication including table_1 and table_2, but not table_irrelevant + testdb = testdb + .with("CREATE PUBLICATION %s FOR TABLE table_1, table_2;", testdb.getPublicationName()) + .onClose("DROP PUBLICATION %s CASCADE", testdb.getPublicationName()); } @AfterEach - void tearDown() throws SQLException { - testdb.database.query(ctx -> { - ctx.execute("DROP PUBLICATION " + publication + ";"); - ctx.execute("SELECT pg_drop_replication_slot('" + replicationSlot + "');"); - return null; - }); + void tearDown() { testdb.close(); } private JsonNode getConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.USERNAME_KEY, testdb.userName) - .put(JdbcUtils.PASSWORD_KEY, testdb.password) - .put(JdbcUtils.SSL_KEY, false) - .put("is_test", true) - .build()); + return testdb.testConfigBuilder().withSchemas(SCHEMA_NAME).withoutSsl().with("is_test", true).build(); } @Test public void testGetPublicizedTables() throws SQLException { - final JdbcDatabase database = new DefaultJdbcDatabase(testdb.dslContext.diagnosticsDataSource()); + final JdbcDatabase database = new DefaultJdbcDatabase(testdb.getDslContext().diagnosticsDataSource()); // when source config does not exist assertEquals(0, PostgresCatalogHelper.getPublicizedTables(database).size()); @@ -83,11 +57,8 @@ public void testGetPublicizedTables() throws SQLException { assertEquals(0, PostgresCatalogHelper.getPublicizedTables(database).size()); // when config is cdc - final ObjectNode cdcConfig = ((ObjectNode) getConfig()); - cdcConfig.set("replication_method", Jsons.jsonNode(ImmutableMap.of( - "replication_slot", replicationSlot, - "initial_waiting_seconds", INITIAL_WAITING_SECONDS, - "publication", publication))); + final JsonNode cdcConfig = + testdb.testConfigBuilder().withSchemas(SCHEMA_NAME).withoutSsl().withCdcReplication().build(); database.setSourceConfig(cdcConfig); final Set expectedTables = Set.of( new AirbyteStreamNameNamespacePair("table_1", SCHEMA_NAME), diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java index 343e736cd629..e8e958c3689b 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresJdbcSourceAcceptanceTest.java @@ -17,20 +17,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.factory.DataSourceFactory; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase; -import io.airbyte.cdk.db.jdbc.streaming.AdaptiveStreamingQueryConfig; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; import io.airbyte.cdk.integrations.source.relationaldb.models.DbStreamState; -import io.airbyte.cdk.testutils.PostgreSQLContainerHelper; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; -import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.resources.MoreResources; -import io.airbyte.commons.string.Strings; import io.airbyte.commons.util.MoreIterators; import io.airbyte.integrations.source.postgres.internal.models.CursorBasedStatus; import io.airbyte.integrations.source.postgres.internal.models.InternalModels.StateType; @@ -48,41 +41,26 @@ import io.airbyte.protocol.models.v0.ConnectorSpecification; import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; -import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.utility.MountableFile; -class PostgresJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +class PostgresJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { private static final String DATABASE = "new_db"; protected static final String USERNAME_WITHOUT_PERMISSION = "new_user"; protected static final String PASSWORD_WITHOUT_PERMISSION = "new_password"; - private static PostgreSQLContainer PSQL_DB; public static String COL_WAKEUP_AT = "wakeup_at"; public static String COL_LAST_VISITED_AT = "last_visited_at"; public static String COL_LAST_COMMENT_AT = "last_comment_at"; - @BeforeAll - static void init() { - PSQL_DB = new PostgreSQLContainer<>("postgres:13-alpine"); - PSQL_DB.start(); - } - - @Override - @BeforeEach - public void setup() throws Exception { - final String dbName = Strings.addRandomSuffix("db", "_", 10).toLowerCase(); + static { COLUMN_CLAUSE_WITH_PK = "id INTEGER, name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL, wakeup_at TIMETZ NOT NULL, last_visited_at TIMESTAMPTZ NOT NULL, last_comment_at TIMESTAMP NOT NULL"; COLUMN_CLAUSE_WITHOUT_PK = @@ -90,97 +68,72 @@ public void setup() throws Exception { COLUMN_CLAUSE_WITH_COMPOSITE_PK = "first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, updated_at DATE NOT NULL, wakeup_at TIMETZ NOT NULL, last_visited_at TIMESTAMPTZ NOT NULL, last_comment_at TIMESTAMP NOT NULL"; - config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, PSQL_DB.getHost()) - .put(JdbcUtils.PORT_KEY, PSQL_DB.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME, SCHEMA_NAME2)) - .put(JdbcUtils.USERNAME_KEY, PSQL_DB.getUsername()) - .put(JdbcUtils.PASSWORD_KEY, PSQL_DB.getPassword()) - .put(JdbcUtils.SSL_KEY, false) - .build()); - - final String initScriptName = "init_" + dbName.concat(".sql"); - final String tmpFilePath = IOs.writeFileToRandomTmpDir(initScriptName, "CREATE DATABASE " + dbName + ";"); - PostgreSQLContainerHelper.runSqlScript(MountableFile.forHostPath(tmpFilePath), PSQL_DB); - - source = getSource(); - final JsonNode jdbcConfig = getToDatabaseConfigFunction().apply(config); - - streamName = TABLE_NAME; - - dataSource = DataSourceFactory.create( - jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText(), - jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText() : null, - getDriverClass(), - jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), - JdbcUtils.parseJdbcParameters(jdbcConfig, JdbcUtils.CONNECTION_PROPERTIES_KEY, getJdbcParameterDelimiter())); - - database = new StreamingJdbcDatabase(dataSource, - JdbcUtils.getDefaultSourceOperations(), - AdaptiveStreamingQueryConfig::new); - - createSchemas(); - - database.execute(connection -> { - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, - primaryKeyClause(Collections.singletonList("id")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), - COLUMN_CLAUSE_WITHOUT_PK, "")); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK))); - - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), - COLUMN_CLAUSE_WITH_COMPOSITE_PK, - primaryKeyClause(ImmutableList.of("first_name", "last_name")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('first' ,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('second', 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('third', 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK))); - - }); - CREATE_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "CREATE TABLE %s (%s BIT(3) NOT NULL);"; INSERT_TABLE_WITHOUT_CURSOR_TYPE_QUERY = "INSERT INTO %s VALUES(B'101');"; } @Override - protected void maybeSetShorterConnectionTimeout() { + protected JsonNode config() { + return testdb.testConfigBuilder() + .withSchemas(SCHEMA_NAME, SCHEMA_NAME2) + .withoutSsl() + .build(); + } + + @Override + protected PostgresSource source() { + final var source = new PostgresSource(); + source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); + return source; + } + + @Override + protected PostgresTestDatabase createTestDatabase() { + return PostgresTestDatabase.in("postgres:16-bullseye"); + } + + @Override + @BeforeEach + public void setup() throws Exception { + testdb = createTestDatabase(); + if (supportsSchemas()) { + createSchemas(); + } + testdb.with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME), COLUMN_CLAUSE_WITH_PK, primaryKeyClause(Collections.singletonList("id")))) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK), COLUMN_CLAUSE_WITHOUT_PK, "")) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_WITHOUT_PK)) + .with(createTableQuery(getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK), COLUMN_CLAUSE_WITH_COMPOSITE_PK, + primaryKeyClause(ImmutableList.of("first_name", "last_name")))) + .with( + "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('first' ,'picard', '2004-10-19','12:12:12.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with( + "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('second', 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)) + .with( + "INSERT INTO %s(first_name, last_name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES ('third', 'vash', '2006-10-19','10:10:10.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME_COMPOSITE_PK)); + } + + @Override + protected void maybeSetShorterConnectionTimeout(final JsonNode config) { ((ObjectNode) config).put(JdbcUtils.JDBC_URL_PARAMS_KEY, "connectTimeout=1"); } @@ -284,31 +237,9 @@ public boolean supportsSchemas() { return true; } - @Override - public AbstractJdbcSource getJdbcSource() { - var source = new PostgresSource(); - source.setFeatureFlags(FeatureFlagsWrapper.overridingUseStreamCapableState(new EnvVariableFeatureFlags(), true)); - return source; - } - - @Override - public JsonNode getConfig() { - return config; - } - - @Override - public String getDriverClass() { - return PostgresSource.DRIVER_CLASS; - } - - @AfterAll - static void cleanUp() { - PSQL_DB.close(); - } - @Test void testSpec() throws Exception { - final ConnectorSpecification actual = source.spec(); + final ConnectorSpecification actual = source().spec(); final ConnectorSpecification expected = Jsons.deserialize(MoreResources.readResource("spec.json"), ConnectorSpecification.class); assertEquals(expected, actual); @@ -316,7 +247,7 @@ void testSpec() throws Exception { @Override protected List getTestMessages() { - return getTestMessages(streamName); + return getTestMessages(streamName()); } protected List getTestMessages(final String streamName) { @@ -351,17 +282,13 @@ protected List getTestMessages(final String streamName) { } @Override - protected void executeStatementReadIncrementallyTwice() throws SQLException { - database.execute(connection -> { - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (4,'riker', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (5, 'data', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(TABLE_NAME))); - }); + protected void executeStatementReadIncrementallyTwice() { + testdb.with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (4,'riker', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (5, 'data', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(TABLE_NAME)); } @Override @@ -454,111 +381,103 @@ protected boolean supportsPerStream() { */ @Test void testCheckIncorrectPasswordFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PASSWORD_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 28P01;")); } @Test public void testCheckIncorrectUsernameFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.USERNAME_KEY, "fake"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 28P01;")); } @Test public void testCheckIncorrectHostFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.HOST_KEY, "localhost2"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 08001;")); } @Test public void testCheckIncorrectPortFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.PORT_KEY, "30000"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 08001;")); } @Test public void testCheckIncorrectDataBaseFailure() throws Exception { - maybeSetShorterConnectionTimeout(); + final var config = config(); + maybeSetShorterConnectionTimeout(config); ((ObjectNode) config).put(JdbcUtils.DATABASE_KEY, "wrongdatabase"); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 3D000;")); } @Test public void testUserHasNoPermissionToDataBase() throws Exception { - maybeSetShorterConnectionTimeout(); - database.execute(connection -> connection.createStatement() - .execute(String.format("create user %s with password '%s';", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION))); - database.execute(connection -> connection.createStatement() - .execute(String.format("create database %s;", DATABASE))); - // deny access for database for all users from group public - database.execute(connection -> connection.createStatement() - .execute(String.format("revoke all on database %s from public;", DATABASE))); + final var config = config(); + maybeSetShorterConnectionTimeout(config); + testdb.with("create user %s with password '%s';", USERNAME_WITHOUT_PERMISSION, PASSWORD_WITHOUT_PERMISSION) + .with("create database %s;", DATABASE) + // deny access for database for all users from group public + .with("revoke all on database %s from public;", DATABASE); ((ObjectNode) config).put("username", USERNAME_WITHOUT_PERMISSION); ((ObjectNode) config).put("password", PASSWORD_WITHOUT_PERMISSION); ((ObjectNode) config).put("database", DATABASE); - final AirbyteConnectionStatus status = source.check(config); + final AirbyteConnectionStatus status = source().check(config); Assertions.assertEquals(AirbyteConnectionStatus.Status.FAILED, status.getStatus()); assertTrue(status.getMessage().contains("State code: 42501;")); } @Test void testReadMultipleTablesIncrementally() throws Exception { + final var config = config(); ((ObjectNode) config).put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1); final String namespace = getDefaultNamespace(); final String streamOneName = TABLE_NAME + "one"; // Create a fresh first table - database.execute(connection -> { - connection.createStatement().execute( - createTableQuery(getFullyQualifiedTableName(streamOneName), COLUMN_CLAUSE_WITH_PK, - primaryKeyClause(Collections.singletonList("id")))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - connection.createStatement().execute( - String.format( - "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - }); + testdb.with(createTableQuery(getFullyQualifiedTableName(streamOneName), COLUMN_CLAUSE_WITH_PK, + primaryKeyClause(Collections.singletonList("id")))) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (1,'picard', '2004-10-19','10:10:10.123456-05:00','2004-10-19T17:23:54.123456Z','2004-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (2, 'crusher', '2005-10-19','11:11:11.123456-05:00','2005-10-19T17:23:54.123456Z','2005-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)) + .with( + "INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at) VALUES (3, 'vash', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)); // Create a fresh second table final String streamTwoName = TABLE_NAME + "two"; final String streamTwoFullyQualifiedName = getFullyQualifiedTableName(streamTwoName); // Insert records into second table - database.execute(ctx -> { - ctx.createStatement().execute( - createTableQuery(streamTwoFullyQualifiedName, COLUMN_CLAUSE_WITH_PK, "")); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (40,'Jean Luc','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (41, 'Groot', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (42, 'Thanos','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - }); + testdb.with(createTableQuery(streamTwoFullyQualifiedName, COLUMN_CLAUSE_WITH_PK, "")) + .with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (40,'Jean Luc','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName) + .with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (41, 'Groot', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName) + .with(String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (42, 'Thanos','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName)); // Create records list that we expect to see in the state message final List streamTwoExpectedRecords = Arrays.asList( createRecord(streamTwoName, namespace, map( @@ -598,7 +517,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // Perform initial sync final List messagesFromFirstSync = MoreIterators - .toList(source.read(config, configuredCatalog, null)); + .toList(source().read(config, configuredCatalog, null)); final List recordsFromFirstSync = filterRecords(messagesFromFirstSync); @@ -664,7 +583,7 @@ void testReadMultipleTablesIncrementally() throws Exception { // - stream one state still being the first record read via CTID. // - stream two state being the CTID state before the final emitted state before the cursor switch final List messagesFromSecondSyncWithMixedStates = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromFirstSync.get(0), streamTwoStateMessagesFromFirstSync.get(1))))); @@ -691,20 +610,15 @@ void testReadMultipleTablesIncrementally() throws Exception { // Add some data to each table and perform a third read. // Expect to see all records be synced via cursorBased method and not ctid - - database.execute(ctx -> { - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (4,'Hooper','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - getFullyQualifiedTableName(streamOneName))); - ctx.createStatement().execute( - String.format("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" - + "VALUES (43, 'Iron Man', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", - streamTwoFullyQualifiedName)); - }); + testdb.with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (4,'Hooper','2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + getFullyQualifiedTableName(streamOneName)) + .with("INSERT INTO %s(id, name, updated_at, wakeup_at, last_visited_at, last_comment_at)" + + "VALUES (43, 'Iron Man', '2006-10-19','12:12:12.123456-05:00','2006-10-19T17:23:54.123456Z','2006-01-01T17:23:54.123456')", + streamTwoFullyQualifiedName); final List messagesFromThirdSync = MoreIterators - .toList(source.read(config, configuredCatalog, + .toList(source().read(config, configuredCatalog, Jsons.jsonNode(List.of(streamOneStateMessagesFromSecondSync.get(1), streamTwoStateMessagesFromSecondSync.get(0))))); @@ -751,7 +665,7 @@ protected DbStreamState buildStreamState(final ConfiguredAirbyteStream configure protected List getExpectedAirbyteMessagesSecondSync(final String namespace) { final List expectedMessages = new ArrayList<>(); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_4, COL_NAME, "riker", @@ -760,7 +674,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String COL_LAST_VISITED_AT, "2006-10-19T17:23:54.123456Z", COL_LAST_COMMENT_AT, "2006-01-01T17:23:54.123456"))))); expectedMessages.add(new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(streamName).withNamespace(namespace) + .withRecord(new AirbyteRecordMessage().withStream(streamName()).withNamespace(namespace) .withData(Jsons.jsonNode(ImmutableMap .of(COL_ID, ID_VALUE_5, COL_NAME, "data", @@ -771,7 +685,7 @@ protected List getExpectedAirbyteMessagesSecondSync(final String final DbStreamState state = new CursorBasedStatus() .withStateType(StateType.CURSOR_BASED) .withVersion(2L) - .withStreamName(streamName) + .withStreamName(streamName()) .withStreamNamespace(namespace) .withCursorField(ImmutableList.of(COL_ID)) .withCursor("5") diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java index 7eee25049e55..463484952671 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceOperationsTest.java @@ -11,7 +11,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import io.airbyte.cdk.db.jdbc.DateTimeConverter; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.json.Jsons; import java.sql.Connection; import java.sql.PreparedStatement; @@ -34,7 +33,7 @@ class PostgresSourceOperationsTest { @BeforeEach public void init() { - testdb = PostgresTestDatabase.make("postgres:16-bullseye", "withConf"); + testdb = PostgresTestDatabase.in("postgres:16-bullseye", "withConf"); } @AfterEach @@ -64,7 +63,7 @@ public void numericColumnAsCursor() throws SQLException { } final List actualRecords = new ArrayList<>(); - try (final Connection connection = testdb.container.createConnection("")) { + try (final Connection connection = testdb.getContainer().createConnection("")) { final PreparedStatement preparedStatement = connection.prepareStatement( "SELECT * FROM " + tableName + " WHERE " + cursorColumn + " > ?"); postgresSourceOperations.setCursorField(preparedStatement, @@ -104,7 +103,7 @@ public void timeColumnAsCursor() throws SQLException { } final List actualRecords = new ArrayList<>(); - try (final Connection connection = testdb.container.createConnection("")) { + try (final Connection connection = testdb.getContainer().createConnection("")) { final PreparedStatement preparedStatement = connection.prepareStatement( "SELECT * from " + tableName + " WHERE " + cursorColumn + " > ?"); postgresSourceOperations.setCursorField(preparedStatement, @@ -137,7 +136,7 @@ public void testParseMoneyValue() { } protected void executeQuery(final String query) throws SQLException { - try (final Connection connection = testdb.container.createConnection("")) { + try (final Connection connection = testdb.getContainer().createConnection("")) { connection.createStatement().execute(query); } } diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java index 414317dc1626..1013adad1243 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceSSLTest.java @@ -17,7 +17,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Sets; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.protocol.models.Field; @@ -77,20 +76,16 @@ class PostgresSourceSSLTest { @BeforeEach void setup() throws Exception { - testdb = PostgresTestDatabase.make("marcosmarxm/postgres-ssl:dev", "withSSL"); - testdb.database.query(ctx -> { - ctx.fetch( - "CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));"); - ctx.fetch("CREATE INDEX i1 ON id_and_name (id);"); - ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - ctx.fetch("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);"); - ctx.fetch("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - ctx.fetch( - "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));"); - ctx.fetch( - "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); - return null; - }); + testdb = PostgresTestDatabase.in("marcosmarxm/postgres-ssl:dev", "withSSL") + .with("CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);") + .with("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with( + "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));") + .with( + "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); } @AfterEach @@ -99,16 +94,10 @@ void tearDown() { } private JsonNode getConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of("public")) - .put(JdbcUtils.USERNAME_KEY, testdb.userName) - .put(JdbcUtils.PASSWORD_KEY, testdb.password) - .put(JdbcUtils.SSL_KEY, true) - .put("ssl_mode", ImmutableMap.builder().put("mode", "require").build()) - .build()); + return testdb.testConfigBuilder() + .withSchemas("public") + .withSsl(ImmutableMap.builder().put("mode", "require").build()) + .build(); } @Test diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java index a1069c9b00b4..9710bbdbc57a 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/PostgresSourceTest.java @@ -26,7 +26,6 @@ import io.airbyte.cdk.integrations.source.relationaldb.CursorInfo; import io.airbyte.cdk.integrations.source.relationaldb.state.StateManager; import io.airbyte.cdk.integrations.source.relationaldb.state.StateManagerFactory; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.exceptions.ConfigErrorException; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; @@ -135,27 +134,21 @@ class PostgresSourceTest { private PostgresTestDatabase testdb; @BeforeEach - void setup() throws Exception { - testdb = PostgresTestDatabase.make("postgres:16-bullseye"); - testdb.database.query(ctx -> { - ctx.fetch( - "CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));"); - ctx.fetch("CREATE INDEX i1 ON id_and_name (id);"); - ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);"); - ctx.fetch("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch( - "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));"); - ctx.fetch( - "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); - return null; - }); + void setup() { + testdb = PostgresTestDatabase.in("postgres:16-bullseye") + .with("CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);") + .with("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with( + "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));") + .with("INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', " + + "'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); } @AfterEach - void tearDown() throws SQLException { + void tearDown() { testdb.close(); } @@ -182,17 +175,17 @@ private static Database getDatabase(final DSLContext dslContext) { } private JsonNode getConfig() { - return getConfig(testdb.userName, testdb.password); + return getConfig(testdb.getUserName(), testdb.getPassword()); } private JsonNode getConfig(final String user, final String password) { - return getConfig(testdb.dbName, user, password); + return getConfig(testdb.getDatabaseName(), user, password); } private JsonNode getConfig(final String dbName, final String user, final String password) { return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) + .put(JdbcUtils.HOST_KEY, testdb.getContainer().getHost()) + .put(JdbcUtils.PORT_KEY, testdb.getContainer().getFirstMappedPort()) .put(JdbcUtils.DATABASE_KEY, dbName) .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) .put(JdbcUtils.USERNAME_KEY, user) @@ -201,13 +194,6 @@ private JsonNode getConfig(final String dbName, final String user, final String .build()); } - private JsonNode getConfig(PostgresTestDatabase db) { - return Jsons.jsonNode(db.makeConfigBuilder() - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.SSL_KEY, false) - .build()); - } - @Test public void testCanReadTablesAndColumnsWithDoubleQuotes() throws Exception { final AirbyteCatalog airbyteCatalog = new AirbyteCatalog().withStreams(List.of( @@ -218,7 +204,7 @@ public void testCanReadTablesAndColumnsWithDoubleQuotes() throws Exception { Field.of("\"test_column\"", JsonSchemaType.STRING)) .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL)) .withSourceDefinedPrimaryKey(List.of(List.of("id"))))); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("CREATE TABLE \"\"\"test_dq_table\"\"\"(id INTEGER PRIMARY KEY, \"\"\"test_column\"\"\" varchar);"); ctx.fetch("INSERT INTO \"\"\"test_dq_table\"\"\" (id, \"\"\"test_column\"\"\") VALUES (1,'test1'), (2, 'test2');"); return null; @@ -230,28 +216,17 @@ public void testCanReadTablesAndColumnsWithDoubleQuotes() throws Exception { null)); setEmittedAtToNull(actualMessages); assertEquals(DOUBLE_QUOTED_MESSAGES, actualMessages); - testdb.database.query(ctx -> ctx.execute("DROP TABLE \"\"\"test_dq_table\"\"\";")); + testdb.query(ctx -> ctx.execute("DROP TABLE \"\"\"test_dq_table\"\"\";")); } @Test public void testCanReadUtf8() throws Exception { // force the db server to start with sql_ascii encoding to verify the source can read UTF8 even when // default settings are in another encoding - try (final var asciiTestDB = PostgresTestDatabase.make("postgres:16-alpine", "withASCII")) { - asciiTestDB.database.query(ctx -> { - ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); - ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,E'\\u2013 someutfstring'), (2, E'\\u2215');"); - return null; - }); - final var config = Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, asciiTestDB.container.getHost()) - .put(JdbcUtils.PORT_KEY, asciiTestDB.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, asciiTestDB.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.USERNAME_KEY, asciiTestDB.userName) - .put(JdbcUtils.PASSWORD_KEY, asciiTestDB.password) - .put(JdbcUtils.SSL_KEY, false) - .build()); + try (final var asciiTestDB = PostgresTestDatabase.in("postgres:16-alpine", "withASCII") + .with("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));") + .with("INSERT INTO id_and_name (id, name) VALUES (1,E'\\u2013 someutfstring'), (2, E'\\u2215');")) { + final var config = asciiTestDB.testConfigBuilder().withSchemas(SCHEMA_NAME).withoutSsl().build(); final Set actualMessages = MoreIterators.toSet(source().read(config, CONFIGURED_CATALOG, null)); setEmittedAtToNull(actualMessages); assertEquals(UTF8_MESSAGES, actualMessages); @@ -260,14 +235,14 @@ public void testCanReadUtf8() throws Exception { @Test void testUserDoesntHasPrivilegesToSelectTable() throws Exception { - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.execute("DROP TABLE id_and_name CASCADE;"); ctx.execute("DROP TABLE id_and_name2 CASCADE;"); ctx.execute("DROP TABLE names CASCADE;"); ctx.fetch("CREATE TABLE id_and_name(id INTEGER, name VARCHAR(200));"); ctx.fetch("INSERT INTO id_and_name (id, name) VALUES (1,'John'), (2, 'Alfred'), (3, 'Alex');"); ctx.fetch("CREATE USER test_user_3 password '132';"); - ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.dbName + " TO test_user_3;"); + ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.getDatabaseName() + " TO test_user_3;"); ctx.fetch("GRANT ALL ON SCHEMA public TO test_user_3"); ctx.fetch("REVOKE ALL PRIVILEGES ON TABLE public.id_and_name FROM test_user_3"); return null; @@ -308,7 +283,7 @@ void testDiscoverWithPk() throws Exception { @Test void testDiscoverRecursiveRolePermissions() throws Exception { - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.execute("DROP TABLE id_and_name CASCADE;"); ctx.execute("DROP TABLE id_and_name2 CASCADE;"); ctx.execute("DROP TABLE names CASCADE;"); @@ -331,7 +306,7 @@ void testDiscoverRecursiveRolePermissions() throws Exception { ctx.fetch("GRANT airbyte TO test_user_4;"); ctx.fetch("CREATE TABLE unseen(id INTEGER, name VARCHAR(200));"); - ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.dbName + " TO test_user_4;"); + ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.getDatabaseName() + " TO test_user_4;"); return null; }); final var config = getConfig(); @@ -355,7 +330,7 @@ void testDiscoverRecursiveRolePermissions() throws Exception { @Test void testDiscoverDifferentGrantAvailability() throws Exception { final JsonNode config = getConfig(); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("create table not_granted_table_name_1(column_1 integer);"); ctx.fetch("create table not_granted_table_name_2(column_1 integer);"); ctx.fetch("create table not_granted_table_name_3(column_1 integer);"); @@ -410,7 +385,7 @@ void testDiscoverDifferentGrantAvailability() throws Exception { ctx.fetch("create user new_test_user;"); ctx.fetch("ALTER USER new_test_user WITH PASSWORD 'new_pass';"); - ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.dbName + " TO new_test_user;"); + ctx.fetch("GRANT CONNECT ON DATABASE " + testdb.getDatabaseName() + " TO new_test_user;"); ctx.fetch("GRANT ALL ON SCHEMA public TO test_user_4"); ctx.fetch("grant test_role to new_test_user;"); @@ -468,7 +443,7 @@ void testReadSuccess() throws Exception { @Test void testReadIncrementalSuccess() throws Exception { // We want to test ordering, so we can delete the NaN entry and add a 3. - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (3, 'gohan', 222.1);"); return null; @@ -504,7 +479,7 @@ void testReadIncrementalSuccess() throws Exception { final AirbyteStateMessage lastEmittedState = stateAfterFirstBatch.get(stateAfterFirstBatch.size() - 1); final JsonNode state = Jsons.jsonNode(List.of(lastEmittedState)); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (5, 'piccolo', 100.0);"); return null; }); @@ -583,7 +558,7 @@ void testGetUsername() { @Test public void tableWithInvalidCursorShouldThrowException() throws Exception { - final ConfiguredAirbyteStream tableWithInvalidCursorType = createTableWithInvalidCursorType(testdb.database); + final ConfiguredAirbyteStream tableWithInvalidCursorType = createTableWithInvalidCursorType(testdb.getDatabase()); final ConfiguredAirbyteCatalog configuredAirbyteCatalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(tableWithInvalidCursorType)); @@ -633,7 +608,7 @@ private JsonNode buildConfigEscapingNeeded() { @Test public void tableWithNullValueCursorShouldThrowException() throws SQLException { - final ConfiguredAirbyteStream table = createTableWithNullValueCursor(testdb.database); + final ConfiguredAirbyteStream table = createTableWithNullValueCursor(testdb.getDatabase()); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); @@ -664,7 +639,7 @@ private ConfiguredAirbyteStream createTableWithNullValueCursor(final Database da @Test public void viewWithNullValueCursorShouldThrowException() throws SQLException { - final ConfiguredAirbyteStream table = createViewWithNullValueCursor(testdb.database); + final ConfiguredAirbyteStream table = createViewWithNullValueCursor(testdb.getDatabase()); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(Collections.singletonList(table)); final Throwable throwable = catchThrowable(() -> MoreIterators.toSet(source().read(getConfig(), catalog, null))); @@ -733,7 +708,7 @@ void testParseJdbcParameters() { public void testJdbcOptionsParameter() throws Exception { // Populate DB. final JsonNode dbConfig = getConfig(); - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("CREATE TABLE id_and_bytes (id INTEGER, bytes BYTEA);"); ctx.fetch("INSERT INTO id_and_bytes (id, bytes) VALUES (1, decode('DEADBEEF', 'hex'));"); return null; @@ -771,7 +746,7 @@ public void testJdbcOptionsParameter() throws Exception { @DisplayName("Make sure initial incremental load is reading records in a certain order") void testReadIncrementalRecordOrder() throws Exception { // We want to test ordering, so we can delete the NaN entry - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); for (int i = 3; i < 1000; i++) { ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (%d, 'gohan%d', 222.1);".formatted(i, i)); diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java index 1383d04e60d1..da941383d626 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresSourceTest.java @@ -16,11 +16,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.cdk.db.jdbc.JdbcUtils; import io.airbyte.cdk.integrations.base.Source; -import io.airbyte.cdk.testutils.PostgresTestDatabase; import io.airbyte.commons.features.EnvVariableFeatureFlags; import io.airbyte.commons.features.FeatureFlagsWrapper; import io.airbyte.commons.json.Jsons; @@ -38,7 +35,6 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode; import io.airbyte.protocol.models.v0.SyncMode; import java.math.BigDecimal; -import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -99,48 +95,31 @@ protected String getDatabaseImageName() { } @BeforeEach - protected void setup() throws SQLException { - testdb = PostgresTestDatabase.make(getDatabaseImageName()); - testdb.database.query(ctx -> { - ctx.fetch( - "CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));"); - ctx.fetch("CREATE INDEX i1 ON id_and_name (id);"); - ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);"); - ctx.fetch("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');"); - - ctx.fetch( - "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));"); - ctx.fetch( - "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); - return null; - }); + protected void setup() { + testdb = PostgresTestDatabase.in(getDatabaseImageName()) + .with("CREATE TABLE id_and_name(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (id));") + .with("CREATE INDEX i1 ON id_and_name (id);") + .with("INSERT INTO id_and_name (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with("CREATE TABLE id_and_name2(id NUMERIC(20, 10) NOT NULL, name VARCHAR(200) NOT NULL, power double precision NOT NULL);") + .with("INSERT INTO id_and_name2 (id, name, power) VALUES (1,'goku', 'Infinity'), (2, 'vegeta', 9000.1), ('NaN', 'piccolo', '-Infinity');") + .with( + "CREATE TABLE names(first_name VARCHAR(200) NOT NULL, last_name VARCHAR(200) NOT NULL, power double precision NOT NULL, PRIMARY KEY (first_name, last_name));") + .with( + "INSERT INTO names (first_name, last_name, power) VALUES ('san', 'goku', 'Infinity'), ('prince', 'vegeta', 9000.1), ('piccolo', 'junior', '-Infinity');"); } @AfterEach - protected void tearDown() throws SQLException { + protected void tearDown() { testdb.close(); } protected JsonNode getXminConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put(JdbcUtils.HOST_KEY, testdb.container.getHost()) - .put(JdbcUtils.PORT_KEY, testdb.container.getFirstMappedPort()) - .put(JdbcUtils.DATABASE_KEY, testdb.dbName) - .put(JdbcUtils.SCHEMAS_KEY, List.of(SCHEMA_NAME)) - .put(JdbcUtils.USERNAME_KEY, testdb.userName) - .put(JdbcUtils.PASSWORD_KEY, testdb.password) - .put(JdbcUtils.SSL_KEY, false) - .put("replication_method", getReplicationMethod()) - .put(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) - .build()); - } - - private JsonNode getReplicationMethod() { - return Jsons.jsonNode(ImmutableMap.builder() - .put("method", "Xmin") - .build()); + return testdb.testConfigBuilder() + .withSchemas(SCHEMA_NAME) + .withoutSsl() + .withXminReplication() + .with(SYNC_CHECKPOINT_RECORDS_PROPERTY, 1) + .build(); } protected Source source() { @@ -254,7 +233,7 @@ void testReadSuccess() throws Exception { // We add some data and perform a third read. We should verify that (i) a delete is not captured and // (ii) the new record that is inserted into the // table is read. - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (3, 'gohan', 222.1);"); return null; diff --git a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java index 731c3c423471..65562b84ada1 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test/java/io/airbyte/integrations/source/postgres/XminPostgresWithOldServerSourceTest.java @@ -76,7 +76,7 @@ void testReadSuccess() throws Exception { // We add some data and perform a third read. We should verify that (i) a delete is not captured and // (ii) the new record that is inserted into the // table is read. - testdb.database.query(ctx -> { + testdb.query(ctx -> { ctx.fetch("DELETE FROM id_and_name WHERE id = 'NaN';"); ctx.fetch("INSERT INTO id_and_name (id, name, power) VALUES (3, 'gohan', 222.1);"); return null; diff --git a/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java new file mode 100644 index 000000000000..b92c319d9eec --- /dev/null +++ b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresContainerFactory.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.postgres; + +import io.airbyte.cdk.testutils.ContainerFactory; +import java.io.IOException; +import java.io.UncheckedIOException; +import org.testcontainers.containers.Network; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.MountableFile; + +public class PostgresContainerFactory implements ContainerFactory> { + + @Override + public PostgreSQLContainer createNewContainer(DockerImageName imageName) { + return new PostgreSQLContainer<>(imageName.asCompatibleSubstituteFor("postgres")); + + } + + @Override + public Class getContainerClass() { + return PostgreSQLContainer.class; + } + + /** + * Apply the postgresql.conf file that we've packaged as a resource. + */ + public void withConf(PostgreSQLContainer container) { + container + .withCopyFileToContainer( + MountableFile.forClasspathResource("postgresql.conf"), + "/etc/postgresql/postgresql.conf") + .withCommand("postgres -c config_file=/etc/postgresql/postgresql.conf"); + } + + /** + * Create a new network and bind it to the container. + */ + public void withNetwork(PostgreSQLContainer container) { + container.withNetwork(Network.newNetwork()); + } + + /** + * Configure postgres with wal_level=logical. + */ + public void withWalLevelLogical(PostgreSQLContainer container) { + container.withCommand("postgres -c wal_level=logical"); + } + + /** + * Generate SSL certificates and tell postgres to enable SSL and use them. + */ + public void withCert(PostgreSQLContainer container) { + container.start(); + String[] commands = { + "psql -U test -c \"CREATE USER postgres WITH PASSWORD 'postgres';\"", + "psql -U test -c \"GRANT CONNECT ON DATABASE \"test\" TO postgres;\"", + "psql -U test -c \"ALTER USER postgres WITH SUPERUSER;\"", + "openssl ecparam -name prime256v1 -genkey -noout -out ca.key", + "openssl req -new -x509 -sha256 -key ca.key -out ca.crt -subj \"/CN=127.0.0.1\"", + "openssl ecparam -name prime256v1 -genkey -noout -out server.key", + "openssl req -new -sha256 -key server.key -out server.csr -subj \"/CN=localhost\"", + "openssl x509 -req -in server.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out server.crt -days 365 -sha256", + "cp server.key /etc/ssl/private/", + "cp server.crt /etc/ssl/private/", + "cp ca.crt /etc/ssl/private/", + "chmod og-rwx /etc/ssl/private/server.* /etc/ssl/private/ca.*", + "chown postgres:postgres /etc/ssl/private/server.crt /etc/ssl/private/server.key /etc/ssl/private/ca.crt", + "echo \"ssl = on\" >> /var/lib/postgresql/data/postgresql.conf", + "echo \"ssl_cert_file = '/etc/ssl/private/server.crt'\" >> /var/lib/postgresql/data/postgresql.conf", + "echo \"ssl_key_file = '/etc/ssl/private/server.key'\" >> /var/lib/postgresql/data/postgresql.conf", + "echo \"ssl_ca_file = '/etc/ssl/private/ca.crt'\" >> /var/lib/postgresql/data/postgresql.conf", + "mkdir root/.postgresql", + "echo \"hostssl all all 127.0.0.1/32 cert clientcert=verify-full\" >> /var/lib/postgresql/data/pg_hba.conf", + "openssl ecparam -name prime256v1 -genkey -noout -out client.key", + "openssl req -new -sha256 -key client.key -out client.csr -subj \"/CN=postgres\"", + "openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 365 -sha256", + "cp client.crt ~/.postgresql/postgresql.crt", + "cp client.key ~/.postgresql/postgresql.key", + "chmod 0600 ~/.postgresql/postgresql.crt ~/.postgresql/postgresql.key", + "cp ca.crt root/.postgresql/ca.crt", + "chown postgres:postgres ~/.postgresql/ca.crt", + "psql -U test -c \"SELECT pg_reload_conf();\"", + }; + for (String cmd : commands) { + try { + container.execInContainer("su", "-c", cmd); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + + /** + * Tell postgres to enable SSL. + */ + public void withSSL(PostgreSQLContainer container) { + container.withCommand("postgres " + + "-c ssl=on " + + "-c ssl_cert_file=/var/lib/postgresql/server.crt " + + "-c ssl_key_file=/var/lib/postgresql/server.key"); + } + + /** + * Configure postgres with client_encoding=sql_ascii. + */ + public void withASCII(PostgreSQLContainer container) { + container.withCommand("postgres -c client_encoding=sql_ascii"); + } + +} diff --git a/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java new file mode 100644 index 000000000000..69c7f37eaaa8 --- /dev/null +++ b/airbyte-integrations/connectors/source-postgres/src/testFixtures/java/io/airbyte/integrations/source/postgres/PostgresTestDatabase.java @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.postgres; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.testutils.TestDatabase; +import io.airbyte.commons.json.Jsons; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.testcontainers.containers.PostgreSQLContainer; + +public class PostgresTestDatabase extends + TestDatabase, PostgresTestDatabase, PostgresTestDatabase.PostgresConfigBuilder> { + + static public PostgresTestDatabase in(String imageName, String... methods) { + final var container = new PostgresContainerFactory().shared(imageName, methods); + return new PostgresTestDatabase(container).initialized(); + } + + public PostgresTestDatabase(PostgreSQLContainer container) { + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + return Stream.of(psqlCmd(Stream.of( + String.format("CREATE DATABASE %s", getDatabaseName()), + String.format("CREATE USER %s PASSWORD '%s'", getUserName(), getPassword()), + String.format("GRANT ALL PRIVILEGES ON DATABASE %s TO %s", getDatabaseName(), getUserName()), + String.format("ALTER USER %s WITH SUPERUSER", getUserName())))); + } + + /** + * Close resources held by this instance. This deliberately avoids dropping the database, which is + * really expensive in Postgres. This is because a DROP DATABASE in Postgres triggers a CHECKPOINT. + * Call {@link #dropDatabaseAndUser} to explicitly drop the database and the user. + */ + @Override + protected Stream inContainerUndoBootstrapCmd() { + return Stream.empty(); + } + + /** + * Drop the database owned by this instance. + */ + public void dropDatabaseAndUser() { + execInContainer(psqlCmd(Stream.of( + String.format("DROP DATABASE %s", getDatabaseName()), + String.format("DROP OWNED BY %s", getUserName()), + String.format("DROP USER %s", getUserName())))); + } + + public Stream psqlCmd(Stream sql) { + return Stream.concat( + Stream.of("psql", + "-d", getContainer().getDatabaseName(), + "-U", getContainer().getUsername(), + "-v", "ON_ERROR_STOP=1", + "-a"), + sql.flatMap(stmt -> Stream.of("-c", stmt))); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + return DatabaseDriver.POSTGRESQL; + } + + @Override + public SQLDialect getSqlDialect() { + return SQLDialect.POSTGRES; + } + + private Certificates cachedCerts; + + public synchronized Certificates getCertificates() { + if (cachedCerts == null) { + final String caCert, clientKey, clientCert; + try { + caCert = getContainer().execInContainer("su", "-c", "cat ca.crt").getStdout().trim(); + clientKey = getContainer().execInContainer("su", "-c", "cat client.key").getStdout().trim(); + clientCert = getContainer().execInContainer("su", "-c", "cat client.crt").getStdout().trim(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + cachedCerts = new Certificates(caCert, clientCert, clientKey); + } + return cachedCerts; + } + + public record Certificates(String caCertificate, String clientCertificate, String clientKey) {} + + @Override + public PostgresConfigBuilder configBuilder() { + return new PostgresConfigBuilder(this); + } + + public String getReplicationSlotName() { + return withNamespace("debezium_slot"); + } + + public String getPublicationName() { + return withNamespace("publication"); + } + + public PostgresTestDatabase withReplicationSlot() { + return this + .with("SELECT pg_create_logical_replication_slot('%s', 'pgoutput');", getReplicationSlotName()) + .onClose("SELECT pg_drop_replication_slot('%s');", getReplicationSlotName()); + } + + public PostgresTestDatabase withPublicationForAllTables() { + return this + .with("CREATE PUBLICATION %s FOR ALL TABLES;", getPublicationName()) + .onClose("DROP PUBLICATION %s CASCADE;", getPublicationName()); + } + + static public class PostgresConfigBuilder extends ConfigBuilder { + + protected PostgresConfigBuilder(PostgresTestDatabase testdb) { + super(testdb); + } + + public PostgresConfigBuilder withSchemas(String... schemas) { + return with(JdbcUtils.SCHEMAS_KEY, List.of(schemas)); + } + + public PostgresConfigBuilder withStandardReplication() { + return with("replication_method", ImmutableMap.builder().put("method", "Standard").build()); + } + + public PostgresConfigBuilder withCdcReplication() { + return withCdcReplication("While reading Data"); + } + + public PostgresConfigBuilder withCdcReplication(String LsnCommitBehaviour) { + return this + .with("is_test", true) + .with("replication_method", Jsons.jsonNode(ImmutableMap.builder() + .put("method", "CDC") + .put("replication_slot", testDatabase.getReplicationSlotName()) + .put("publication", testDatabase.getPublicationName()) + .put("initial_waiting_seconds", DEFAULT_CDC_REPLICATION_INITIAL_WAIT.getSeconds()) + .put("lsn_commit_behaviour", LsnCommitBehaviour) + .build())); + } + + public PostgresConfigBuilder withXminReplication() { + return this.with("replication_method", Jsons.jsonNode(ImmutableMap.builder().put("method", "Xmin").build())); + } + + } + +} diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle b/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle index 748487784fab..516c96a06808 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/build.gradle @@ -4,13 +4,11 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.2.0' + cdkVersionRequired = '0.5.0' features = ['db-sources'] - useLocalCdk = true + useLocalCdk = false } -airbyteJavaConnector.addCdkDependencies() - application { mainClass = 'io.airbyte.integrations.source.scaffold-java-jdbc.ScaffoldJavaJdbcSource' } @@ -20,6 +18,9 @@ dependencies { //TODO Add jdbc driver import here. Ex: implementation 'com.microsoft.sqlserver:mssql-jdbc:8.4.1.jre14' testImplementation 'org.apache.commons:commons-lang3:3.11' + testImplementation libs.testcontainers.jdbc integrationTestJavaImplementation project(':airbyte-integrations:connectors:source-scaffold-java-jdbc') + + testFixturesImplementation libs.testcontainers.jdbc } diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java index 925eeae95e4d..b911468604e9 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test-integration/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcSourceAcceptanceTest.java @@ -12,24 +12,22 @@ import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.v0.ConnectorSpecification; import java.util.HashMap; +import org.junit.jupiter.api.Disabled; +@Disabled public class ScaffoldJavaJdbcSourceAcceptanceTest extends SourceAcceptanceTest { - private JsonNode config; + private ScaffoldJavaJdbcTestDatabase testdb; @Override protected void setupEnvironment(final TestDestinationEnv testEnv) { - // TODO create new container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g");" - // TODO make container started. Ex: "container.start();" - // TODO init JsonNode config - // TODO crete airbyte Database object "Databases.createJdbcDatabase(...)" - // TODO insert test data to DB. Ex: "database.execute(connection-> ...)" - // TODO close Database. Ex: "database.close();" + // TODO: create new TestDatabase instance and assign `testdb` to it. + // TODO: use it to create and populate test tables in the database. } @Override protected void tearDown(final TestDestinationEnv testEnv) { - // TODO close container that was initialized in setup() method. Ex: "container.close();" + testdb.close(); } @Override @@ -44,7 +42,8 @@ protected ConnectorSpecification getSpec() throws Exception { @Override protected JsonNode getConfig() { - return config; + // TODO: (optional) call more builder methods. + return testdb.integrationTestConfigBuilder().build(); } @Override diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java index 94a4db3070d3..70990256b9b8 100644 --- a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/test/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcJdbcSourceAcceptanceTest.java @@ -5,44 +5,32 @@ package io.airbyte.integrations.source.scaffold_java_jdbc; import com.fasterxml.jackson.databind.JsonNode; -import io.airbyte.cdk.integrations.source.jdbc.AbstractJdbcSource; import io.airbyte.cdk.integrations.source.jdbc.test.JdbcSourceAcceptanceTest; -import java.sql.JDBCType; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class ScaffoldJavaJdbcJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { +@Disabled +class ScaffoldJavaJdbcJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { private static final Logger LOGGER = LoggerFactory.getLogger(ScaffoldJavaJdbcJdbcSourceAcceptanceTest.class); - // TODO declare a test container for DB. EX: org.testcontainers.containers.OracleContainer - - @BeforeAll - static void init() { - // Oracle returns uppercase values - // TODO init test container. Ex: "new OracleContainer("epiclabs/docker-oracle-xe-11g")" - // TODO start container. Ex: "container.start();" - } - - @BeforeEach - public void setup() throws Exception { - // TODO init config. Ex: "config = Jsons.jsonNode(ImmutableMap.builder().put("host", - // host).put("port", port)....build()); - super.setup(); + @Override + protected JsonNode config() { + // TODO: (optional) call more builder methods. + return testdb.testConfigBuilder().build(); } - @AfterEach - public void tearDown() { - // TODO clean used resources + @Override + protected ScaffoldJavaJdbcSource source() { + // TODO: (optional) call `setFeatureFlags` before returning the source to mock setting env vars. + return new ScaffoldJavaJdbcSource(); } @Override - public AbstractJdbcSource getSource() { - return new ScaffoldJavaJdbcSource(); + protected ScaffoldJavaJdbcTestDatabase createTestDatabase() { + // TODO: return a suitable TestDatabase instance. + return new ScaffoldJavaJdbcTestDatabase(null).initialized(); } @Override @@ -51,25 +39,4 @@ public boolean supportsSchemas() { return false; } - @Override - public JsonNode getConfig() { - return config; - } - - @Override - public String getDriverClass() { - return ScaffoldJavaJdbcSource.DRIVER_CLASS; - } - - @Override - public AbstractJdbcSource getJdbcSource() { - // TODO - return null; - } - - @AfterAll - static void cleanUp() { - // TODO close the container. Ex: "container.close();" - } - } diff --git a/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java new file mode 100644 index 000000000000..4e0c24508217 --- /dev/null +++ b/airbyte-integrations/connectors/source-scaffold-java-jdbc/src/testFixtures/java/io/airbyte/integrations/source/scaffold_java_jdbc/ScaffoldJavaJdbcTestDatabase.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.source.scaffold_java_jdbc; + +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.testutils.TestDatabase; +import java.util.stream.Stream; +import org.jooq.SQLDialect; +import org.testcontainers.containers.JdbcDatabaseContainer; + +public class ScaffoldJavaJdbcTestDatabase + extends TestDatabase, ScaffoldJavaJdbcTestDatabase, ScaffoldJavaJdbcTestDatabase.ScaffoldJavaJdbcConfigBuilder> { + + public ScaffoldJavaJdbcTestDatabase(JdbcDatabaseContainer container) { + // TODO: (optional) consider also implementing a ContainerFactory to share testcontainer instances. + // Effective use requires parallelizing the tests using JUnit instead of gradle. + // This is best achieved by adding a `gradle.properties` file containing + // `testExecutionConcurrency=-1`. + super(container); + } + + @Override + protected Stream> inContainerBootstrapCmd() { + // TODO: return a stream of streams of command args to be passed to `execInContainer` calls to set + // up the test state. + // This usually involves the execution of CREATE DATABASE and CREATE USER statements as root. + return Stream.empty(); + } + + @Override + protected Stream inContainerUndoBootstrapCmd() { + // TODO: (optional) return a stream of command args to be passed to a `execInContainer` call to + // clean up the test state. + return Stream.empty(); + } + + @Override + public DatabaseDriver getDatabaseDriver() { + // TODO: return a suitable value. + return DatabaseDriver.POSTGRESQL; + } + + @Override + public SQLDialect getSqlDialect() { + // TODO: return a suitable value. + return SQLDialect.DEFAULT; + } + + @Override + public ScaffoldJavaJdbcConfigBuilder configBuilder() { + // TODO: flesh out the ConfigBuilder subclass and return a new instance of it here. + return new ScaffoldJavaJdbcConfigBuilder(this); + } + + public static class ScaffoldJavaJdbcConfigBuilder extends TestDatabase.ConfigBuilder { + + public ScaffoldJavaJdbcConfigBuilder(ScaffoldJavaJdbcTestDatabase testDatabase) { + super(testDatabase); + } + + } + +} diff --git a/build.gradle b/build.gradle index 99ea87e6f74c..dd81f11462cb 100644 --- a/build.gradle +++ b/build.gradle @@ -332,6 +332,8 @@ subprojects { subproj -> // Effectively disable JUnit concurrency by running tests in only one thread by default. systemProperty 'junit.jupiter.execution.parallel.config.strategy', 'fixed' systemProperty 'junit.jupiter.execution.parallel.config.fixed.parallelism', 1 + // Order test classes by annotation. + systemProperty 'junit.jupiter.testclass.order.default', 'org.junit.jupiter.api.ClassOrderer$OrderAnnotation' if (!subproj.hasProperty('testExecutionConcurrency')) { // By default, let gradle spawn as many independent workers as it wants. diff --git a/buildSrc/src/main/groovy/airbyte-java-connector.gradle b/buildSrc/src/main/groovy/airbyte-java-connector.gradle index 7c86cf297748..9d8a60ed88c8 100644 --- a/buildSrc/src/main/groovy/airbyte-java-connector.gradle +++ b/buildSrc/src/main/groovy/airbyte-java-connector.gradle @@ -5,11 +5,10 @@ Also facilitates importing and working with the Java CDK. import org.gradle.api.Plugin import org.gradle.api.Project -import org.gradle.api.tasks.testing.Test class AirbyteJavaConnectorExtension { - boolean useLocalCdk = true + boolean useLocalCdk String cdkVersionRequired List features = [] // e.g. 'db-sources', 'db-destinations' Project project @@ -18,67 +17,56 @@ class AirbyteJavaConnectorExtension { this.project = project } + void setUseLocalCdk(boolean useLocalCdk) { + this.useLocalCdk = useLocalCdk + addCdkDependencies() + } + + static final List IMPLEMENTATION = [ + 'airbyte-commons', + 'airbyte-json-validation', + 'airbyte-commons-cli', + 'airbyte-api', + 'config-models-oss', + 'init-oss', + ] + + static final List TEST_IMPLEMENTATION = [ + 'airbyte-commons', + 'airbyte-json-validation', + 'airbyte-api', + 'config-models-oss', + ] + + static final List INTEGRATION_TEST_IMPLEMENTATION = [ + 'config-models-oss', + 'init-oss', + 'acceptance-test-harness', + ] + void addCdkDependencies() { - // Create a list of CDK submodules to import - def submoduleNames = ['core'] - features.each { feature -> - submoduleNames.add(feature) - } - if (useLocalCdk) { - project.dependencies { - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-json-validation') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons-cli') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-api') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:config-models-oss') - implementation project.project(':airbyte-cdk:java:airbyte-cdk:init-oss') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-commons') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-json-validation') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:airbyte-api') - testImplementation project.project(':airbyte-cdk:java:airbyte-cdk:config-models-oss') - integrationTestJavaImplementation project.project(':airbyte-cdk:java:airbyte-cdk:config-models-oss') - integrationTestJavaImplementation project.project(':airbyte-cdk:java:airbyte-cdk:init-oss') - integrationTestJavaImplementation project.project(':airbyte-cdk:java:airbyte-cdk:acceptance-test-harness') - submoduleNames.each { submoduleName -> - // Add the CDK module to the dependencies - def cdkModule = project.project(":airbyte-cdk:java:airbyte-cdk:${submoduleName}") - def testFixturesRef = testFixtures(project.project(":airbyte-cdk:java:airbyte-cdk:${submoduleName}")) - implementation cdkModule - testImplementation cdkModule - testImplementation testFixturesRef - integrationTestJavaImplementation cdkModule - integrationTestJavaImplementation testFixturesRef - performanceTestJavaImplementation cdkModule - performanceTestJavaImplementation testFixturesRef - } + def projectName = { ":airbyte-cdk:java:airbyte-cdk:${it}" } + def jarName = { "io.airbyte.cdk:airbyte-cdk-${it}:${cdkVersionRequired}" } + project.dependencies { + def dep = { useLocalCdk ? project.project(projectName(it)) : jarName(it) } + def testFixturesDep = { useLocalCdk ? testFixtures(project.project(projectName(it))) : "${jarName(it)}:test-fixtures" } + + IMPLEMENTATION.each { + implementation dep(it) + testFixturesImplementation dep(it) } - } else { - project.dependencies { - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-commons:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-json-validation:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-commons-cli:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-airbyte-api:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-config-models-oss:${cdkVersionRequired}" - implementation "io.airbyte.cdk:airbyte-cdk-init-oss:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-airbyte-commons:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-airbyte-json-validation:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-airbyte-api:${cdkVersionRequired}" - testImplementation "io.airbyte.cdk:airbyte-cdk-config-models-oss:${cdkVersionRequired}" - integrationTestJavaImplementation "io.airbyte.cdk:airbyte-cdk-config-models-oss:${cdkVersionRequired}" - integrationTestJavaImplementation "io.airbyte.cdk:airbyte-cdk-init-oss:${cdkVersionRequired}" - integrationTestJavaImplementation "io.airbyte.cdk:airbyte-cdk-acceptance-test-harness:${cdkVersionRequired}" - submoduleNames.each { submoduleName -> - // Add the cdkModule to the dependencies - def cdkModule = "io.airbyte.cdk:airbyte-cdk-${submoduleName}:${cdkVersionRequired}" - def testFixturesRef = "io.airbyte.cdk:airbyte-cdk-${submoduleName}:${cdkVersionRequired}:test-fixtures" - implementation cdkModule - testImplementation cdkModule - testImplementation testFixturesRef - integrationTestJavaImplementation cdkModule - integrationTestJavaImplementation testFixturesRef - performanceTestJavaImplementation cdkModule - performanceTestJavaImplementation testFixturesRef - } + TEST_IMPLEMENTATION.each {testImplementation dep(it) } + INTEGRATION_TEST_IMPLEMENTATION.each {integrationTestJavaImplementation dep(it) } + (["core"] + features).each { + implementation dep(it) + testFixturesImplementation dep(it) + testFixturesImplementation testFixturesDep(it) + testImplementation dep(it) + testImplementation testFixturesDep(it) + integrationTestJavaImplementation dep(it) + integrationTestJavaImplementation testFixturesDep(it) + performanceTestJavaImplementation dep(it) + performanceTestJavaImplementation testFixturesDep(it) } } } @@ -89,24 +77,25 @@ class AirbyteJavaConnectorPlugin implements Plugin { @Override void apply(Project project) { - // def cdkTargetVersion = project.ext.getCdkTargetVersion(project) - def extension = project.extensions.create('airbyteJavaConnector', AirbyteJavaConnectorExtension, project) + project.plugins.apply('java-test-fixtures') project.plugins.apply(AirbyteIntegrationTestJavaPlugin) project.plugins.apply(AirbytePerformanceTestJavaPlugin) + project.configurations { + testFixturesImplementation.extendsFrom implementation + testFixturesRuntimeOnly.extendsFrom runtimeOnly + } + project.dependencies { // Integration and performance tests should automatically // have access to the project's own main source sets. integrationTestJavaImplementation project + integrationTestJavaImplementation testFixtures(project) performanceTestJavaImplementation project + performanceTestJavaImplementation testFixtures(project) } - // TODO: figure out how to make this work. - // Currently fails with error: - // "Cannot change dependencies of dependency configuration '...' after it has been included in dependency resolution." - // project.afterEvaluate(proj -> { - // extension.addCdkDependencies(); - // }); + project.extensions.create('airbyteJavaConnector', AirbyteJavaConnectorExtension, project) } } diff --git a/docs/integrations/sources/mssql.md b/docs/integrations/sources/mssql.md index c73999857959..c2029242225d 100644 --- a/docs/integrations/sources/mssql.md +++ b/docs/integrations/sources/mssql.md @@ -342,6 +342,7 @@ WHERE actor_definition_id ='b5ea17b1-f170-46dc-bc31-cc744ca984c1' AND (configura | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------| +| 3.0.1 | 2023-11-22 | [32656](https://github.com/airbytehq/airbyte/pull/32656) | Adopt java CDK version 0.5.0. | | 3.0.0 | 2023-11-07 | [31531](https://github.com/airbytehq/airbyte/pull/31531) | Remapped date, smalldatetime, datetime2, time, and datetimeoffset datatype to their correct Airbyte types | | 2.0.4 | 2023-11-06 | [#32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | | 2.0.3 | 2023-10-31 | [32024](https://github.com/airbytehq/airbyte/pull/32024) | Upgrade to Debezium version 2.4.0. | diff --git a/docs/integrations/sources/mysql.md b/docs/integrations/sources/mysql.md index 9f5c110266c7..2befdd9f78bf 100644 --- a/docs/integrations/sources/mysql.md +++ b/docs/integrations/sources/mysql.md @@ -220,136 +220,137 @@ Any database or table encoding combination of charset and collation is supported ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:-----------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 3.1.7 | 2023-11-08 | [32125](https://github.com/airbytehq/airbyte/pull/32125) | fix compilation warnings | -| 3.1.6 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | -| 3.1.5 | 2023-10-31 | [32024](https://github.com/airbytehq/airbyte/pull/32024) | Upgrade to Debezium version 2.4.0. | -| 3.1.4 | 2023-10-30 | [31960](https://github.com/airbytehq/airbyte/pull/31960) | Adopt java CDK version 0.2.0. | -| 3.1.3 | 2023-10-11 | [31322](https://github.com/airbytehq/airbyte/pull/31322) | Correct pevious release | -| 3.1.2 | 2023-09-29 | [30806](https://github.com/airbytehq/airbyte/pull/30806) | Cap log line length to 32KB to prevent loss of records | -| 3.1.1 | 2023-09-26 | [30744](https://github.com/airbytehq/airbyte/pull/30744) | Update MySQL JDBC connection configs to keep default auto-commit behavior | -| 3.1.0 | 2023-09-21 | [30270](https://github.com/airbytehq/airbyte/pull/30270) | Enhanced Standard Sync with initial load via Primary Key with a switch to cursor for incremental syncs | -| 3.0.9 | 2023-09-20 | [30620](https://github.com/airbytehq/airbyte/pull/30620) | Airbyte Certified MySQL Source connector | -| 3.0.8 | 2023-09-14 | [30333](https://github.com/airbytehq/airbyte/pull/30333) | CDC : Update the correct timezone parameter passed to Debezium to `database.connectionTimezone` | -| 3.0.7 | 2023-09-13 | [30375](https://github.com/airbytehq/airbyte/pull/30375) | Fix a bug causing a failure when DB views are included in sync | -| 3.0.6 | 2023-09-12 | [30308](https://github.com/airbytehq/airbyte/pull/30308) | CDC : Enable compression of schema history blob in state | -| 3.0.5 | 2023-09-12 | [30289](https://github.com/airbytehq/airbyte/pull/30289) | CDC : Introduce logic for compression of schema history blob in state | -| 3.0.4 | 2023-09-06 | [30213](https://github.com/airbytehq/airbyte/pull/30213) | CDC : Checkpointable initial snapshot | -| 3.0.3 | 2023-08-31 | [29821](https://github.com/airbytehq/airbyte/pull/29821) | Set replication_method display_type to radio | -| 3.0.2 | 2023-08-30 | [30015](https://github.com/airbytehq/airbyte/pull/30015) | Logging : Log storage engines associated with tables in the sync | -| 3.0.1 | 2023-08-21 | [29308](https://github.com/airbytehq/airbyte/pull/29308) | CDC: Enable frequent state emissions during incremental runs | -| 3.0.0 | 2023-08-08 | [28756](https://github.com/airbytehq/airbyte/pull/28756) | CDC: Set a default cursor | -| 2.1.2 | 2023-08-08 | [29220](https://github.com/airbytehq/airbyte/pull/29220) | Add indicator that CDC is the recommended update method | -| 2.1.1 | 2023-07-31 | [28882](https://github.com/airbytehq/airbyte/pull/28882) | Improve replication method labels and descriptions | -| 2.1.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | -| 2.0.25 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | -| 2.0.24 | 2023-05-25 | [26473](https://github.com/airbytehq/airbyte/pull/26473) | CDC : Limit queue size | -| 2.0.23 | 2023-05-24 | [25586](https://github.com/airbytehq/airbyte/pull/25586) | No need to base64 encode strings on databases sorted with binary collation | -| 2.0.22 | 2023-05-22 | [25859](https://github.com/airbytehq/airbyte/pull/25859) | Allow adding sessionVariables JDBC parameters | -| 2.0.21 | 2023-05-10 | [25460](https://github.com/airbytehq/airbyte/pull/25460) | Handle a decimal number with 0 decimal points as an integer | -| 2.0.20 | 2023-05-01 | [25740](https://github.com/airbytehq/airbyte/pull/25740) | Disable index logging | -| 2.0.19 | 2023-04-26 | [25401](https://github.com/airbytehq/airbyte/pull/25401) | CDC : Upgrade Debezium to version 2.2.0 | -| 2.0.18 | 2023-04-19 | [25345](https://github.com/airbytehq/airbyte/pull/25345) | Logging : Log database indexes per stream | -| 2.0.17 | 2023-04-19 | [24582](https://github.com/airbytehq/airbyte/pull/24582) | CDC : refactor for performance improvement | -| 2.0.16 | 2023-04-17 | [25220](https://github.com/airbytehq/airbyte/pull/25220) | Logging changes : Log additional metadata & clean up noisy logs | -| 2.0.15 | 2023-04-12 | [25131](https://github.com/airbytehq/airbyte/pull/25131) | Make Client Certificate and Client Key always show | -| 2.0.14 | 2023-04-11 | [24656](https://github.com/airbytehq/airbyte/pull/24656) | CDC minor refactor | -| 2.0.13 | 2023-04-06 | [24820](https://github.com/airbytehq/airbyte/pull/24820) | Fix data loss bug during an initial failed non-CDC incremental sync | -| 2.0.12 | 2023-04-04 | [24833](https://github.com/airbytehq/airbyte/pull/24833) | Fix Debezium retry policy configuration | -| 2.0.11 | 2023-03-28 | [24166](https://github.com/airbytehq/airbyte/pull/24166) | Fix InterruptedException bug during Debezium shutdown | -| 2.0.10 | 2023-03-27 | [24529](https://github.com/airbytehq/airbyte/pull/24373) | Preparing the connector for CDC checkpointing | -| 2.0.9 | 2023-03-24 | [24529](https://github.com/airbytehq/airbyte/pull/24529) | Set SSL Mode to required on strict-encrypt variant | -| 2.0.8 | 2023-03-22 | [20760](https://github.com/airbytehq/airbyte/pull/20760) | Removed redundant date-time datatypes formatting | -| 2.0.7 | 2023-03-21 | [24207](https://github.com/airbytehq/airbyte/pull/24207) | Fix incorrect schema change warning in CDC mode | -| 2.0.6 | 2023-03-21 | [23984](https://github.com/airbytehq/airbyte/pull/23984) | Support CDC heartbeats | -| 2.0.5 | 2023-03-21 | [24147](https://github.com/airbytehq/airbyte/pull/24275) | Fix error with CDC checkpointing | -| 2.0.4 | 2023-03-20 | [24147](https://github.com/airbytehq/airbyte/pull/24147) | Support different table structure during "DESCRIBE" query | -| 2.0.3 | 2023-03-15 | [24082](https://github.com/airbytehq/airbyte/pull/24082) | Fixed NPE during cursor values validation | -| 2.0.2 | 2023-03-14 | [23908](https://github.com/airbytehq/airbyte/pull/23908) | Log warning on null cursor values | -| 2.0.1 | 2023-03-10 | [23939](https://github.com/airbytehq/airbyte/pull/23939) | For network isolation, source connector accepts a list of hosts it is allowed to connect | -| 2.0.0 | 2023-03-06 | [23112](https://github.com/airbytehq/airbyte/pull/23112) | Upgrade Debezium version to 2.1.2 | -| 1.0.21 | 2023-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. | -| 1.0.20 | 2023-01-24 | [20593](https://github.com/airbytehq/airbyte/pull/20593) | Handle ssh time out exception | -| 1.0.19 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources | -| 1.0.18 | 2022-12-14 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions | -| 1.0.17 | 2022-12-13 | [20289](https://github.com/airbytehq/airbyte/pull/20289) | Mark unknown column exception as config error | -| 1.0.16 | 2022-12-12 | [18959](https://github.com/airbytehq/airbyte/pull/18959) | CDC : Don't timeout if snapshot is not complete. | -| 1.0.15 | 2022-12-06 | [20000](https://github.com/airbytehq/airbyte/pull/20000) | Add check and better messaging when user does not have permission to access binary log in CDC mode | -| 1.0.14 | 2022-11-22 | [19514](https://github.com/airbytehq/airbyte/pull/19514) | Adjust batch selection memory limits databases. | -| 1.0.13 | 2022-11-14 | [18956](https://github.com/airbytehq/airbyte/pull/18956) | Clean up Tinyint Unsigned data type identification | -| 1.0.12 | 2022-11-07 | [19025](https://github.com/airbytehq/airbyte/pull/19025) | Stop enforce SSL if ssl mode is disabled | -| 1.0.11 | 2022-11-03 | [18851](https://github.com/airbytehq/airbyte/pull/18851) | Fix bug with unencrypted CDC connections | -| 1.0.10 | 2022-11-02 | [18619](https://github.com/airbytehq/airbyte/pull/18619) | Fix bug with handling Tinyint(1) Unsigned values as boolean | -| 1.0.9 | 2022-10-31 | [18538](https://github.com/airbytehq/airbyte/pull/18538) | Encode database name | -| 1.0.8 | 2022-10-25 | [18383](https://github.com/airbytehq/airbyte/pull/18383) | Better SSH error handling + messages | -| 1.0.7 | 2022-10-21 | [18263](https://github.com/airbytehq/airbyte/pull/18263) | Fixes bug introduced in [15833](https://github.com/airbytehq/airbyte/pull/15833) and adds better error messaging for SSH tunnel in Destinations | -| 1.0.6 | 2022-10-19 | [18087](https://github.com/airbytehq/airbyte/pull/18087) | Better error messaging for configuration errors (SSH configs, choosing an invalid cursor) | -| 1.0.5 | 2022-10-17 | [18041](https://github.com/airbytehq/airbyte/pull/18041) | Fixes bug introduced 2022-09-12 with SshTunnel, handles iterator exception properly | -| | 2022-10-13 | [15535](https://github.com/airbytehq/airbyte/pull/16238) | Update incremental query to avoid data missing when new data is inserted at the same time as a sync starts under non-CDC incremental mode | -| 1.0.4 | 2022-10-11 | [17815](https://github.com/airbytehq/airbyte/pull/17815) | Expose setting server timezone for CDC syncs | -| 1.0.3 | 2022-10-07 | [17236](https://github.com/airbytehq/airbyte/pull/17236) | Fix large table issue by fetch size | -| 1.0.2 | 2022-10-03 | [17170](https://github.com/airbytehq/airbyte/pull/17170) | Make initial CDC waiting time configurable | -| 1.0.1 | 2022-10-01 | [17459](https://github.com/airbytehq/airbyte/pull/17459) | Upgrade debezium version to 1.9.6 from 1.9.2 | -| 1.0.0 | 2022-09-27 | [17164](https://github.com/airbytehq/airbyte/pull/17164) | Certify MySQL Source as Beta | -| 0.6.15 | 2022-09-27 | [17299](https://github.com/airbytehq/airbyte/pull/17299) | Improve error handling for strict-encrypt mysql source | -| 0.6.14 | 2022-09-26 | [16954](https://github.com/airbytehq/airbyte/pull/16954) | Implement support for snapshot of new tables in CDC mode | -| 0.6.13 | 2022-09-14 | [15668](https://github.com/airbytehq/airbyte/pull/15668) | Wrap logs in AirbyteLogMessage | -| 0.6.12 | 2022-09-13 | [16657](https://github.com/airbytehq/airbyte/pull/16657) | Improve CDC record queueing performance | -| 0.6.11 | 2022-09-08 | [16202](https://github.com/airbytehq/airbyte/pull/16202) | Adds error messaging factory to UI | -| 0.6.10 | 2022-09-08 | [16007](https://github.com/airbytehq/airbyte/pull/16007) | Implement per stream state support. | -| 0.6.9 | 2022-09-03 | [16216](https://github.com/airbytehq/airbyte/pull/16216) | Standardize spec for CDC replication. See upgrade instructions [above](#upgrading-from-0.6.8-and-older-versions-to-0.6.9-and-later-versions). | -| 0.6.8 | 2022-09-01 | [16259](https://github.com/airbytehq/airbyte/pull/16259) | Emit state messages more frequently | -| 0.6.7 | 2022-08-30 | [16114](https://github.com/airbytehq/airbyte/pull/16114) | Prevent traffic going on an unsecured channel in strict-encryption version of source mysql | -| 0.6.6 | 2022-08-25 | [15993](https://github.com/airbytehq/airbyte/pull/15993) | Improved support for connecting over SSL | -| 0.6.5 | 2022-08-25 | [15917](https://github.com/airbytehq/airbyte/pull/15917) | Fix temporal data type default value bug | -| 0.6.4 | 2022-08-18 | [14356](https://github.com/airbytehq/airbyte/pull/14356) | DB Sources: only show a table can sync incrementally if at least one column can be used as a cursor field | -| 0.6.3 | 2022-08-12 | [15044](https://github.com/airbytehq/airbyte/pull/15044) | Added the ability to connect using different SSL modes and SSL certificates | -| 0.6.2 | 2022-08-11 | [15538](https://github.com/airbytehq/airbyte/pull/15538) | Allow additional properties in db stream state | -| 0.6.1 | 2022-08-02 | [14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | -| 0.6.0 | 2022-07-26 | [14362](https://github.com/airbytehq/airbyte/pull/14362) | Integral columns are now discovered as int64 fields. | -| 0.5.17 | 2022-07-22 | [14714](https://github.com/airbytehq/airbyte/pull/14714) | Clarified error message when invalid cursor column selected | -| 0.5.16 | 2022-07-14 | [14574](https://github.com/airbytehq/airbyte/pull/14574) | Removed additionalProperties:false from JDBC source connectors | -| 0.5.15 | 2022-06-23 | [14077](https://github.com/airbytehq/airbyte/pull/14077) | Use the new state management | -| 0.5.13 | 2022-06-21 | [13945](https://github.com/airbytehq/airbyte/pull/13945) | Aligned datatype test | -| 0.5.12 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | -| 0.5.11 | 2022-05-03 | [12544](https://github.com/airbytehq/airbyte/pull/12544) | Prevent source from hanging under certain circumstances by adding a watcher for orphaned threads. | -| 0.5.10 | 2022-04-29 | [12480](https://github.com/airbytehq/airbyte/pull/12480) | Query tables with adaptive fetch size to optimize JDBC memory consumption | -| 0.5.9 | 2022-04-06 | [11729](https://github.com/airbytehq/airbyte/pull/11729) | Bump mina-sshd from 2.7.0 to 2.8.0 | -| 0.5.6 | 2022-02-21 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Fixed cursor for old connectors that use non-microsecond format. Now connectors work with both formats | -| 0.5.5 | 2022-02-18 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Updated timestamp transformation with microseconds | -| 0.5.4 | 2022-02-11 | [10251](https://github.com/airbytehq/airbyte/issues/10251) | bug Source MySQL CDC: sync failed when has Zero-date value in mandatory column | -| 0.5.2 | 2021-12-14 | [6425](https://github.com/airbytehq/airbyte/issues/6425) | MySQL CDC sync fails because starting binlog position not found in DB | -| 0.5.1 | 2021-12-13 | [8582](https://github.com/airbytehq/airbyte/pull/8582) | Update connector fields title/description | -| 0.5.0 | 2021-12-11 | [7970](https://github.com/airbytehq/airbyte/pull/7970) | Support all MySQL types | -| 0.4.13 | 2021-12-03 | [8335](https://github.com/airbytehq/airbyte/pull/8335) | Source-MySql: do not check cdc required param binlog_row_image for standard replication | -| 0.4.12 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | -| 0.4.11 | 2021-11-19 | [8047](https://github.com/airbytehq/airbyte/pull/8047) | Source MySQL: transform binary data base64 format | -| 0.4.10 | 2021-11-15 | [7820](https://github.com/airbytehq/airbyte/pull/7820) | Added basic performance test | -| 0.4.9 | 2021-11-02 | [7559](https://github.com/airbytehq/airbyte/pull/7559) | Correctly process large unsigned short integer values which may fall outside java's `Short` data type capability | -| 0.4.8 | 2021-09-16 | [6093](https://github.com/airbytehq/airbyte/pull/6093) | Improve reliability of processing various data types like decimals, dates, datetime, binary, and text | -| 0.4.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | -| 0.4.6 | 2021-09-29 | [6510](https://github.com/airbytehq/airbyte/pull/6510) | Support SSL connection | -| 0.4.5 | 2021-09-17 | [6146](https://github.com/airbytehq/airbyte/pull/6146) | Added option to connect to DB via SSH | -| 0.4.1 | 2021-07-23 | [4956](https://github.com/airbytehq/airbyte/pull/4956) | Fix log link | -| 0.3.7 | 2021-06-09 | [3179](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | -| 0.3.6 | 2021-06-09 | [3966](https://github.com/airbytehq/airbyte/pull/3966) | Fix excessive logging for CDC method | -| 0.3.5 | 2021-06-07 | [3890](https://github.com/airbytehq/airbyte/pull/3890) | Fix CDC handle tinyint\(1\) and boolean types | -| 0.3.4 | 2021-06-04 | [3846](https://github.com/airbytehq/airbyte/pull/3846) | Fix max integer value failure | -| 0.3.3 | 2021-06-02 | [3789](https://github.com/airbytehq/airbyte/pull/3789) | MySQL CDC poll wait 5 minutes when not received a single record | -| 0.3.2 | 2021-06-01 | [3757](https://github.com/airbytehq/airbyte/pull/3757) | MySQL CDC poll 5s to 5 min | -| 0.3.1 | 2021-06-01 | [3505](https://github.com/airbytehq/airbyte/pull/3505) | Implemented MySQL CDC | -| 0.3.0 | 2021-04-21 | [2990](https://github.com/airbytehq/airbyte/pull/2990) | Support namespaces | -| 0.2.5 | 2021-04-15 | [2899](https://github.com/airbytehq/airbyte/pull/2899) | Fix bug in tests | -| 0.2.4 | 2021-03-28 | [2600](https://github.com/airbytehq/airbyte/pull/2600) | Add NCHAR and NVCHAR support to DB and cursor type casting | -| 0.2.3 | 2021-03-26 | [2611](https://github.com/airbytehq/airbyte/pull/2611) | Add an optional `jdbc_url_params` in parameters | -| 0.2.2 | 2021-03-26 | [2460](https://github.com/airbytehq/airbyte/pull/2460) | Destination supports destination sync mode | -| 0.2.1 | 2021-03-18 | [2488](https://github.com/airbytehq/airbyte/pull/2488) | Sources support primary keys | -| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | -| 0.1.10 | 2021-02-02 | [1887](https://github.com/airbytehq/airbyte/pull/1887) | Migrate AbstractJdbcSource to use iterators | -| 0.1.9 | 2021-01-25 | [1746](https://github.com/airbytehq/airbyte/pull/1746) | Fix NPE in State Decorator | -| 0.1.8 | 2021-01-19 | [1724](https://github.com/airbytehq/airbyte/pull/1724) | Fix JdbcSource handling of tables with same names in different schemas | -| 0.1.7 | 2021-01-14 | [1655](https://github.com/airbytehq/airbyte/pull/1655) | Fix JdbcSource OOM | -| 0.1.6 | 2021-01-08 | [1307](https://github.com/airbytehq/airbyte/pull/1307) | Migrate Postgres and MySQL to use new JdbcSource | -| 0.1.5 | 2020-12-11 | [1267](https://github.com/airbytehq/airbyte/pull/1267) | Support incremental sync | -| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 3.1.8 | 2023-11-22 | [32656](https://github.com/airbytehq/airbyte/pull/32656) | Adopt java CDK version 0.5.0. | +| 3.1.7 | 2023-11-08 | [32125](https://github.com/airbytehq/airbyte/pull/32125) | fix compilation warnings | +| 3.1.6 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | +| 3.1.5 | 2023-10-31 | [32024](https://github.com/airbytehq/airbyte/pull/32024) | Upgrade to Debezium version 2.4.0. | +| 3.1.4 | 2023-10-30 | [31960](https://github.com/airbytehq/airbyte/pull/31960) | Adopt java CDK version 0.2.0. | +| 3.1.3 | 2023-10-11 | [31322](https://github.com/airbytehq/airbyte/pull/31322) | Correct pevious release | +| 3.1.2 | 2023-09-29 | [30806](https://github.com/airbytehq/airbyte/pull/30806) | Cap log line length to 32KB to prevent loss of records | +| 3.1.1 | 2023-09-26 | [30744](https://github.com/airbytehq/airbyte/pull/30744) | Update MySQL JDBC connection configs to keep default auto-commit behavior | +| 3.1.0 | 2023-09-21 | [30270](https://github.com/airbytehq/airbyte/pull/30270) | Enhanced Standard Sync with initial load via Primary Key with a switch to cursor for incremental syncs | +| 3.0.9 | 2023-09-20 | [30620](https://github.com/airbytehq/airbyte/pull/30620) | Airbyte Certified MySQL Source connector | +| 3.0.8 | 2023-09-14 | [30333](https://github.com/airbytehq/airbyte/pull/30333) | CDC : Update the correct timezone parameter passed to Debezium to `database.connectionTimezone` | +| 3.0.7 | 2023-09-13 | [30375](https://github.com/airbytehq/airbyte/pull/30375) | Fix a bug causing a failure when DB views are included in sync | +| 3.0.6 | 2023-09-12 | [30308](https://github.com/airbytehq/airbyte/pull/30308) | CDC : Enable compression of schema history blob in state | +| 3.0.5 | 2023-09-12 | [30289](https://github.com/airbytehq/airbyte/pull/30289) | CDC : Introduce logic for compression of schema history blob in state | +| 3.0.4 | 2023-09-06 | [30213](https://github.com/airbytehq/airbyte/pull/30213) | CDC : Checkpointable initial snapshot | +| 3.0.3 | 2023-08-31 | [29821](https://github.com/airbytehq/airbyte/pull/29821) | Set replication_method display_type to radio | +| 3.0.2 | 2023-08-30 | [30015](https://github.com/airbytehq/airbyte/pull/30015) | Logging : Log storage engines associated with tables in the sync | +| 3.0.1 | 2023-08-21 | [29308](https://github.com/airbytehq/airbyte/pull/29308) | CDC: Enable frequent state emissions during incremental runs | +| 3.0.0 | 2023-08-08 | [28756](https://github.com/airbytehq/airbyte/pull/28756) | CDC: Set a default cursor | +| 2.1.2 | 2023-08-08 | [29220](https://github.com/airbytehq/airbyte/pull/29220) | Add indicator that CDC is the recommended update method | +| 2.1.1 | 2023-07-31 | [28882](https://github.com/airbytehq/airbyte/pull/28882) | Improve replication method labels and descriptions | +| 2.1.0 | 2023-06-26 | [27737](https://github.com/airbytehq/airbyte/pull/27737) | License Update: Elv2 | +| 2.0.25 | 2023-06-20 | [27212](https://github.com/airbytehq/airbyte/pull/27212) | Fix silent exception swallowing in StreamingJdbcDatabase | +| 2.0.24 | 2023-05-25 | [26473](https://github.com/airbytehq/airbyte/pull/26473) | CDC : Limit queue size | +| 2.0.23 | 2023-05-24 | [25586](https://github.com/airbytehq/airbyte/pull/25586) | No need to base64 encode strings on databases sorted with binary collation | +| 2.0.22 | 2023-05-22 | [25859](https://github.com/airbytehq/airbyte/pull/25859) | Allow adding sessionVariables JDBC parameters | +| 2.0.21 | 2023-05-10 | [25460](https://github.com/airbytehq/airbyte/pull/25460) | Handle a decimal number with 0 decimal points as an integer | +| 2.0.20 | 2023-05-01 | [25740](https://github.com/airbytehq/airbyte/pull/25740) | Disable index logging | +| 2.0.19 | 2023-04-26 | [25401](https://github.com/airbytehq/airbyte/pull/25401) | CDC : Upgrade Debezium to version 2.2.0 | +| 2.0.18 | 2023-04-19 | [25345](https://github.com/airbytehq/airbyte/pull/25345) | Logging : Log database indexes per stream | +| 2.0.17 | 2023-04-19 | [24582](https://github.com/airbytehq/airbyte/pull/24582) | CDC : refactor for performance improvement | +| 2.0.16 | 2023-04-17 | [25220](https://github.com/airbytehq/airbyte/pull/25220) | Logging changes : Log additional metadata & clean up noisy logs | +| 2.0.15 | 2023-04-12 | [25131](https://github.com/airbytehq/airbyte/pull/25131) | Make Client Certificate and Client Key always show | +| 2.0.14 | 2023-04-11 | [24656](https://github.com/airbytehq/airbyte/pull/24656) | CDC minor refactor | +| 2.0.13 | 2023-04-06 | [24820](https://github.com/airbytehq/airbyte/pull/24820) | Fix data loss bug during an initial failed non-CDC incremental sync | +| 2.0.12 | 2023-04-04 | [24833](https://github.com/airbytehq/airbyte/pull/24833) | Fix Debezium retry policy configuration | +| 2.0.11 | 2023-03-28 | [24166](https://github.com/airbytehq/airbyte/pull/24166) | Fix InterruptedException bug during Debezium shutdown | +| 2.0.10 | 2023-03-27 | [24529](https://github.com/airbytehq/airbyte/pull/24373) | Preparing the connector for CDC checkpointing | +| 2.0.9 | 2023-03-24 | [24529](https://github.com/airbytehq/airbyte/pull/24529) | Set SSL Mode to required on strict-encrypt variant | +| 2.0.8 | 2023-03-22 | [20760](https://github.com/airbytehq/airbyte/pull/20760) | Removed redundant date-time datatypes formatting | +| 2.0.7 | 2023-03-21 | [24207](https://github.com/airbytehq/airbyte/pull/24207) | Fix incorrect schema change warning in CDC mode | +| 2.0.6 | 2023-03-21 | [23984](https://github.com/airbytehq/airbyte/pull/23984) | Support CDC heartbeats | +| 2.0.5 | 2023-03-21 | [24147](https://github.com/airbytehq/airbyte/pull/24275) | Fix error with CDC checkpointing | +| 2.0.4 | 2023-03-20 | [24147](https://github.com/airbytehq/airbyte/pull/24147) | Support different table structure during "DESCRIBE" query | +| 2.0.3 | 2023-03-15 | [24082](https://github.com/airbytehq/airbyte/pull/24082) | Fixed NPE during cursor values validation | +| 2.0.2 | 2023-03-14 | [23908](https://github.com/airbytehq/airbyte/pull/23908) | Log warning on null cursor values | +| 2.0.1 | 2023-03-10 | [23939](https://github.com/airbytehq/airbyte/pull/23939) | For network isolation, source connector accepts a list of hosts it is allowed to connect | +| 2.0.0 | 2023-03-06 | [23112](https://github.com/airbytehq/airbyte/pull/23112) | Upgrade Debezium version to 2.1.2 | +| 1.0.21 | 2023-01-25 | [20939](https://github.com/airbytehq/airbyte/pull/20939) | Adjust batch selection memory limits databases. | +| 1.0.20 | 2023-01-24 | [20593](https://github.com/airbytehq/airbyte/pull/20593) | Handle ssh time out exception | +| 1.0.19 | 2022-12-14 | [20436](https://github.com/airbytehq/airbyte/pull/20346) | Consolidate date/time values mapping for JDBC sources | +| 1.0.18 | 2022-12-14 | [20378](https://github.com/airbytehq/airbyte/pull/20378) | Improve descriptions | +| 1.0.17 | 2022-12-13 | [20289](https://github.com/airbytehq/airbyte/pull/20289) | Mark unknown column exception as config error | +| 1.0.16 | 2022-12-12 | [18959](https://github.com/airbytehq/airbyte/pull/18959) | CDC : Don't timeout if snapshot is not complete. | +| 1.0.15 | 2022-12-06 | [20000](https://github.com/airbytehq/airbyte/pull/20000) | Add check and better messaging when user does not have permission to access binary log in CDC mode | +| 1.0.14 | 2022-11-22 | [19514](https://github.com/airbytehq/airbyte/pull/19514) | Adjust batch selection memory limits databases. | +| 1.0.13 | 2022-11-14 | [18956](https://github.com/airbytehq/airbyte/pull/18956) | Clean up Tinyint Unsigned data type identification | +| 1.0.12 | 2022-11-07 | [19025](https://github.com/airbytehq/airbyte/pull/19025) | Stop enforce SSL if ssl mode is disabled | +| 1.0.11 | 2022-11-03 | [18851](https://github.com/airbytehq/airbyte/pull/18851) | Fix bug with unencrypted CDC connections | +| 1.0.10 | 2022-11-02 | [18619](https://github.com/airbytehq/airbyte/pull/18619) | Fix bug with handling Tinyint(1) Unsigned values as boolean | +| 1.0.9 | 2022-10-31 | [18538](https://github.com/airbytehq/airbyte/pull/18538) | Encode database name | +| 1.0.8 | 2022-10-25 | [18383](https://github.com/airbytehq/airbyte/pull/18383) | Better SSH error handling + messages | +| 1.0.7 | 2022-10-21 | [18263](https://github.com/airbytehq/airbyte/pull/18263) | Fixes bug introduced in [15833](https://github.com/airbytehq/airbyte/pull/15833) and adds better error messaging for SSH tunnel in Destinations | +| 1.0.6 | 2022-10-19 | [18087](https://github.com/airbytehq/airbyte/pull/18087) | Better error messaging for configuration errors (SSH configs, choosing an invalid cursor) | +| 1.0.5 | 2022-10-17 | [18041](https://github.com/airbytehq/airbyte/pull/18041) | Fixes bug introduced 2022-09-12 with SshTunnel, handles iterator exception properly | +| | 2022-10-13 | [15535](https://github.com/airbytehq/airbyte/pull/16238) | Update incremental query to avoid data missing when new data is inserted at the same time as a sync starts under non-CDC incremental mode | +| 1.0.4 | 2022-10-11 | [17815](https://github.com/airbytehq/airbyte/pull/17815) | Expose setting server timezone for CDC syncs | +| 1.0.3 | 2022-10-07 | [17236](https://github.com/airbytehq/airbyte/pull/17236) | Fix large table issue by fetch size | +| 1.0.2 | 2022-10-03 | [17170](https://github.com/airbytehq/airbyte/pull/17170) | Make initial CDC waiting time configurable | +| 1.0.1 | 2022-10-01 | [17459](https://github.com/airbytehq/airbyte/pull/17459) | Upgrade debezium version to 1.9.6 from 1.9.2 | +| 1.0.0 | 2022-09-27 | [17164](https://github.com/airbytehq/airbyte/pull/17164) | Certify MySQL Source as Beta | +| 0.6.15 | 2022-09-27 | [17299](https://github.com/airbytehq/airbyte/pull/17299) | Improve error handling for strict-encrypt mysql source | +| 0.6.14 | 2022-09-26 | [16954](https://github.com/airbytehq/airbyte/pull/16954) | Implement support for snapshot of new tables in CDC mode | +| 0.6.13 | 2022-09-14 | [15668](https://github.com/airbytehq/airbyte/pull/15668) | Wrap logs in AirbyteLogMessage | +| 0.6.12 | 2022-09-13 | [16657](https://github.com/airbytehq/airbyte/pull/16657) | Improve CDC record queueing performance | +| 0.6.11 | 2022-09-08 | [16202](https://github.com/airbytehq/airbyte/pull/16202) | Adds error messaging factory to UI | +| 0.6.10 | 2022-09-08 | [16007](https://github.com/airbytehq/airbyte/pull/16007) | Implement per stream state support. | +| 0.6.9 | 2022-09-03 | [16216](https://github.com/airbytehq/airbyte/pull/16216) | Standardize spec for CDC replication. See upgrade instructions [above](#upgrading-from-0.6.8-and-older-versions-to-0.6.9-and-later-versions). | +| 0.6.8 | 2022-09-01 | [16259](https://github.com/airbytehq/airbyte/pull/16259) | Emit state messages more frequently | +| 0.6.7 | 2022-08-30 | [16114](https://github.com/airbytehq/airbyte/pull/16114) | Prevent traffic going on an unsecured channel in strict-encryption version of source mysql | +| 0.6.6 | 2022-08-25 | [15993](https://github.com/airbytehq/airbyte/pull/15993) | Improved support for connecting over SSL | +| 0.6.5 | 2022-08-25 | [15917](https://github.com/airbytehq/airbyte/pull/15917) | Fix temporal data type default value bug | +| 0.6.4 | 2022-08-18 | [14356](https://github.com/airbytehq/airbyte/pull/14356) | DB Sources: only show a table can sync incrementally if at least one column can be used as a cursor field | +| 0.6.3 | 2022-08-12 | [15044](https://github.com/airbytehq/airbyte/pull/15044) | Added the ability to connect using different SSL modes and SSL certificates | +| 0.6.2 | 2022-08-11 | [15538](https://github.com/airbytehq/airbyte/pull/15538) | Allow additional properties in db stream state | +| 0.6.1 | 2022-08-02 | [14801](https://github.com/airbytehq/airbyte/pull/14801) | Fix multiple log bindings | +| 0.6.0 | 2022-07-26 | [14362](https://github.com/airbytehq/airbyte/pull/14362) | Integral columns are now discovered as int64 fields. | +| 0.5.17 | 2022-07-22 | [14714](https://github.com/airbytehq/airbyte/pull/14714) | Clarified error message when invalid cursor column selected | +| 0.5.16 | 2022-07-14 | [14574](https://github.com/airbytehq/airbyte/pull/14574) | Removed additionalProperties:false from JDBC source connectors | +| 0.5.15 | 2022-06-23 | [14077](https://github.com/airbytehq/airbyte/pull/14077) | Use the new state management | +| 0.5.13 | 2022-06-21 | [13945](https://github.com/airbytehq/airbyte/pull/13945) | Aligned datatype test | +| 0.5.12 | 2022-06-17 | [13864](https://github.com/airbytehq/airbyte/pull/13864) | Updated stacktrace format for any trace message errors | +| 0.5.11 | 2022-05-03 | [12544](https://github.com/airbytehq/airbyte/pull/12544) | Prevent source from hanging under certain circumstances by adding a watcher for orphaned threads. | +| 0.5.10 | 2022-04-29 | [12480](https://github.com/airbytehq/airbyte/pull/12480) | Query tables with adaptive fetch size to optimize JDBC memory consumption | +| 0.5.9 | 2022-04-06 | [11729](https://github.com/airbytehq/airbyte/pull/11729) | Bump mina-sshd from 2.7.0 to 2.8.0 | +| 0.5.6 | 2022-02-21 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Fixed cursor for old connectors that use non-microsecond format. Now connectors work with both formats | +| 0.5.5 | 2022-02-18 | [10242](https://github.com/airbytehq/airbyte/pull/10242) | Updated timestamp transformation with microseconds | +| 0.5.4 | 2022-02-11 | [10251](https://github.com/airbytehq/airbyte/issues/10251) | bug Source MySQL CDC: sync failed when has Zero-date value in mandatory column | +| 0.5.2 | 2021-12-14 | [6425](https://github.com/airbytehq/airbyte/issues/6425) | MySQL CDC sync fails because starting binlog position not found in DB | +| 0.5.1 | 2021-12-13 | [8582](https://github.com/airbytehq/airbyte/pull/8582) | Update connector fields title/description | +| 0.5.0 | 2021-12-11 | [7970](https://github.com/airbytehq/airbyte/pull/7970) | Support all MySQL types | +| 0.4.13 | 2021-12-03 | [8335](https://github.com/airbytehq/airbyte/pull/8335) | Source-MySql: do not check cdc required param binlog_row_image for standard replication | +| 0.4.12 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | +| 0.4.11 | 2021-11-19 | [8047](https://github.com/airbytehq/airbyte/pull/8047) | Source MySQL: transform binary data base64 format | +| 0.4.10 | 2021-11-15 | [7820](https://github.com/airbytehq/airbyte/pull/7820) | Added basic performance test | +| 0.4.9 | 2021-11-02 | [7559](https://github.com/airbytehq/airbyte/pull/7559) | Correctly process large unsigned short integer values which may fall outside java's `Short` data type capability | +| 0.4.8 | 2021-09-16 | [6093](https://github.com/airbytehq/airbyte/pull/6093) | Improve reliability of processing various data types like decimals, dates, datetime, binary, and text | +| 0.4.7 | 2021-09-30 | [6585](https://github.com/airbytehq/airbyte/pull/6585) | Improved SSH Tunnel key generation steps | +| 0.4.6 | 2021-09-29 | [6510](https://github.com/airbytehq/airbyte/pull/6510) | Support SSL connection | +| 0.4.5 | 2021-09-17 | [6146](https://github.com/airbytehq/airbyte/pull/6146) | Added option to connect to DB via SSH | +| 0.4.1 | 2021-07-23 | [4956](https://github.com/airbytehq/airbyte/pull/4956) | Fix log link | +| 0.3.7 | 2021-06-09 | [3179](https://github.com/airbytehq/airbyte/pull/3973) | Add AIRBYTE_ENTRYPOINT for Kubernetes support | +| 0.3.6 | 2021-06-09 | [3966](https://github.com/airbytehq/airbyte/pull/3966) | Fix excessive logging for CDC method | +| 0.3.5 | 2021-06-07 | [3890](https://github.com/airbytehq/airbyte/pull/3890) | Fix CDC handle tinyint\(1\) and boolean types | +| 0.3.4 | 2021-06-04 | [3846](https://github.com/airbytehq/airbyte/pull/3846) | Fix max integer value failure | +| 0.3.3 | 2021-06-02 | [3789](https://github.com/airbytehq/airbyte/pull/3789) | MySQL CDC poll wait 5 minutes when not received a single record | +| 0.3.2 | 2021-06-01 | [3757](https://github.com/airbytehq/airbyte/pull/3757) | MySQL CDC poll 5s to 5 min | +| 0.3.1 | 2021-06-01 | [3505](https://github.com/airbytehq/airbyte/pull/3505) | Implemented MySQL CDC | +| 0.3.0 | 2021-04-21 | [2990](https://github.com/airbytehq/airbyte/pull/2990) | Support namespaces | +| 0.2.5 | 2021-04-15 | [2899](https://github.com/airbytehq/airbyte/pull/2899) | Fix bug in tests | +| 0.2.4 | 2021-03-28 | [2600](https://github.com/airbytehq/airbyte/pull/2600) | Add NCHAR and NVCHAR support to DB and cursor type casting | +| 0.2.3 | 2021-03-26 | [2611](https://github.com/airbytehq/airbyte/pull/2611) | Add an optional `jdbc_url_params` in parameters | +| 0.2.2 | 2021-03-26 | [2460](https://github.com/airbytehq/airbyte/pull/2460) | Destination supports destination sync mode | +| 0.2.1 | 2021-03-18 | [2488](https://github.com/airbytehq/airbyte/pull/2488) | Sources support primary keys | +| 0.2.0 | 2021-03-09 | [2238](https://github.com/airbytehq/airbyte/pull/2238) | Protocol allows future/unknown properties | +| 0.1.10 | 2021-02-02 | [1887](https://github.com/airbytehq/airbyte/pull/1887) | Migrate AbstractJdbcSource to use iterators | +| 0.1.9 | 2021-01-25 | [1746](https://github.com/airbytehq/airbyte/pull/1746) | Fix NPE in State Decorator | +| 0.1.8 | 2021-01-19 | [1724](https://github.com/airbytehq/airbyte/pull/1724) | Fix JdbcSource handling of tables with same names in different schemas | +| 0.1.7 | 2021-01-14 | [1655](https://github.com/airbytehq/airbyte/pull/1655) | Fix JdbcSource OOM | +| 0.1.6 | 2021-01-08 | [1307](https://github.com/airbytehq/airbyte/pull/1307) | Migrate Postgres and MySQL to use new JdbcSource | +| 0.1.5 | 2020-12-11 | [1267](https://github.com/airbytehq/airbyte/pull/1267) | Support incremental sync | +| 0.1.4 | 2020-11-30 | [1046](https://github.com/airbytehq/airbyte/pull/1046) | Add connectors using an index YAML file | diff --git a/docs/integrations/sources/postgres.md b/docs/integrations/sources/postgres.md index 2d31f7286bb0..e9d7a5928d3b 100644 --- a/docs/integrations/sources/postgres.md +++ b/docs/integrations/sources/postgres.md @@ -291,8 +291,9 @@ According to Postgres [documentation](https://www.postgresql.org/docs/14/datatyp | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 3.2.21 | 2023-11-07 | [31856](https://github.com/airbytehq/airbyte/pull/31856) | handle date/timestamp infinity values properly | -| 3.2.20 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | +| 3.2.22 | 2023-11-22 | [32656](https://github.com/airbytehq/airbyte/pull/32656) | Adopt java CDK version 0.5.0. | +| 3.2.21 | 2023-11-07 | [31856](https://github.com/airbytehq/airbyte/pull/31856) | handle date/timestamp infinity values properly | +| 3.2.20 | 2023-11-06 | [32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | | 3.2.19 | 2023-11-03 | [32050](https://github.com/airbytehq/airbyte/pull/32050) | Adopt java CDK version 0.4.0. | | 3.2.18 | 2023-11-01 | [29038](https://github.com/airbytehq/airbyte/pull/29038) | Fix typo (s/Airbtye/Airbyte/) | | 3.2.17 | 2023-11-01 | [32068](https://github.com/airbytehq/airbyte/pull/32068) | Bump Debezium 2.2.0Final -> 2.4.0Final |