-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
OZ-402: Jobs refactored to support configuring source databases + Swi…
…tch to using MiniCluster. (#13) * OZ-402: Refactor Streaming,Batch and Export jobs to support using arbitrary databses and Switch to using MiniCluster * Rename BatchParquetExport class * Fix Connect Mysql boolean handling * README enhanced with ChatGPT. * Add sl4j for logging * Update README * Update README * Update README * Remove log pushed in error --------- Co-authored-by: Dimitri R <dimitri@mekomsolutions.com>
- Loading branch information
Showing
20 changed files
with
782 additions
and
240 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
parquet | ||
parquet | ||
*.ser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Configuration Jdbc catalogs which Map to actual databases allowing direct read and write without temporary tables | ||
jdbcCatalogs: | ||
# Name of the catalog | ||
- name: analytics | ||
# Name of the default database in the catalog | ||
defaultDatabase: '${ANALYTICS_DB_NAME:-analytics}' | ||
# Database username | ||
username: '${ANALYTICS_DB_USER:-analytics}' | ||
# Databse password | ||
password: '${ANALYTICS_DB_PASSWORD:-analytics}' | ||
# Jdbc Database Url | ||
baseUrl: 'jdbc:postgresql://${ANALYTICS_DB_HOST:-localhost}:${ANALYTICS_DB_PORT:-5432}' | ||
driver: postgresql | ||
# Configuration for Kafka data streams used for streaming analytics | ||
kafkaStreams: | ||
# Topic prefix generated by Kafka Connect | ||
- topicPrefix: openmrs.openmrs | ||
# Path to the table definitions for temporary source tables | ||
tableDefinitionsPath: '${ANALYTICS_SOURCE_TABLES_PATH:-/analytics/source-tables}/openmrs' | ||
# Kafka bootstrap servers | ||
bootstrapServers: '${ANALYTICS_KAFKA_URL:-localhost:9092}' | ||
- topicPrefix: odoo.public | ||
tableDefinitionsPath: '${ANALYTICS_SOURCE_TABLES_PATH:-/analytics/source-tables}/odoo' | ||
bootstrapServers: '${ANALYTICS_KAFKA_URL:-localhost:9092}' | ||
# Configuration for Jdbc data sources used for batch analytics | ||
jdbcSources: | ||
# Database url for the source database | ||
- databaseUrl: jdbc:mysql://${OPENMRS_DB_HOST:-localhost}:${OPENMRS_DB_PORT:-3306}/${OPENMRS_DB_NAME:-openmrs}?sslmode=disable | ||
# Username for the source database | ||
username: '${OPENMRS_DB_USER:-openmrs}' | ||
# Password for the source database | ||
password: '${OPENMRS_DB_PASSWORD:-openmrs}' | ||
tableDefinitionsPath: '${ANALYTICS_SOURCE_TABLES_PATH:-/analytics/source-tables}/openmrs' | ||
- databaseUrl: jdbc:postgresql://${ODOO_DB_HOST:-localhost}:${ODOO_DB_PORT:-5432}/${ODOO_DB_NAME:-odoo}?sslmode=disable | ||
username: '${ODOO_DB_USER:-odoo}' | ||
password: '${ODOO_DB_PASSWORD:-odoo}' | ||
tableDefinitionsPath: '${ANALYTICS_SOURCE_TABLES_PATH:-/analytics/source-tables}/odoo' | ||
# Configuration for Jdbc data sinks | ||
jdbcSinks: | ||
# Name of the jdbc catalog to write into. The catalog must be defined in the jdbcCatalogs section | ||
- jdbcCatalog: analytics | ||
# Name of the databse. The database must be defined in the jdbcCatalog above | ||
databaseName: analytics | ||
# The path to the queries to use for flattening data | ||
queryPath: '${ANALYTICS_QUERIES_PATH:-/analytics/queries}' | ||
# Configuration for File based data sinks | ||
fileSinks: | ||
# Path to definations for temporary destination tables | ||
- destinationTableDefinitionsPath: '${EXPORT_DESTINATION_TABLES_PATH:-/export/destination-tables}' | ||
# The path for the queries to use for export | ||
queryPath: '${EXPORT_SOURCE_QUERIES_PATH:-/export/queries}' | ||
# The path to use for the exported output | ||
exportOutputPath: '${EXPORT_OUTPUT_PATH:-/tmp/parquet}' | ||
# The tag to use for the exported output. This is used to create a subdirectory in the exportOutputPath | ||
exportOutPutTag: '${EXPORT_OUTPUT_TAG:-location1}' | ||
# The format for export files. Currently only parquet and csv are supported | ||
format: parquet |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.