From c17839b339f3386c1a36002cc1b82ed370c058c7 Mon Sep 17 00:00:00 2001 From: agl29 Date: Wed, 1 Nov 2023 13:58:13 +0530 Subject: [PATCH] [Importer] Adding an option to copy and not just move files --- .../libs/indexer/src/indexer/indexers/sql.py | 6 ++++- .../indexer/src/indexer/indexers/sql_tests.py | 24 +++++++++---------- .../src/indexer/templates/importer.mako | 10 ++++++++ 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/desktop/libs/indexer/src/indexer/indexers/sql.py b/desktop/libs/indexer/src/indexer/indexers/sql.py index 7af5554c937..5064a14fdc6 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql.py @@ -85,6 +85,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco source_path = source['path'] load_data = destination['importData'] isIceberg = destination['isIceberg'] + copyFile = destination['copyFile'] external = not destination['useDefaultLocation'] external_path = destination['nonDefaultLocation'] @@ -166,7 +167,10 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco # If dir not just the file, create data dir and move file there. Make sure it's unique. external_path = external_path + '/%s%s_table' % (external_file_name, str(uuid.uuid4())) self.fs.mkdir(external_path) - self.fs.rename(source_path, external_path) + if copyFile: + self.fs.copy(source_path, external_path) + else: + self.fs.rename(source_path, external_path) elif load_data: # We'll use load data command parent_path = self.fs.parent_path(source_path) stats = self.fs.stats(parent_path) diff --git a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py index e5d885773d4..fcfbfc13f54 100644 --- a/desktop/libs/indexer/src/indexer/indexers/sql_tests.py +++ b/desktop/libs/indexer/src/indexer/indexers/sql_tests.py @@ -296,7 +296,7 @@ def test_generate_create_text_table_with_data_partition(): u'outputFormats': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'}], u'customMapDelimiter': u'\\003', u'showProperties': False, u'useDefaultLocation': True, u'description': u'', u'primaryKeyObjects': [], u'customFieldDelimiter': u',', u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, - u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, + u'copyFile': False, u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], u'outputFormat': u'table', u'nonDefaultLocation': u'/user/romain/customer_stats.csv', u'name': u'default.customer_stats', u'tableFormat': u'text', 'ouputFormat': u'table', @@ -371,7 +371,7 @@ def test_generate_create_kudu_table_with_data(): u'description': u'Big Data', u'primaryKeyObjects': [{u'operations': [], u'comment': u'', u'name': u'id', u'level': 0, u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False, u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'customFieldDelimiter': u',', - u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'databaseName': u'default', + u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'copyFile': False, u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [u'id'], u'outputFormat': u'table', u'nonDefaultLocation': u'/user/admin/index_data.csv', u'name': u'index_data', @@ -527,9 +527,9 @@ def test_generate_create_parquet_table(): '''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},''' '''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",''' '''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,''' - '''"isIceberg":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,''' - '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",''' - '''"customRegexp":""}''' + '''"isIceberg":false,"copyFile":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",''' + '''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",''' + '''"customMapDelimiter":"\\\\003","customRegexp":""}''' ) path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']} @@ -619,9 +619,9 @@ def test_generate_create_iceberg_table(): '''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},''' '''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",''' '''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,''' - '''"isIceberg":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,''' - '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",''' - '''"customRegexp":""}''' + '''"isIceberg":true,"copyFile":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",''' + '''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",''' + '''"customMapDelimiter":"\\\\003","customRegexp":""}''' ) path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']} @@ -715,7 +715,7 @@ def test_generate_create_orc_table_transactional(): '''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],''' '''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,''' '''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",''' - '''"customRegexp":"","isIceberg":false}''' + '''"customRegexp":"","isIceberg":false,"copyFile":false}''' ) path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']} @@ -781,7 +781,7 @@ def test_generate_create_empty_kudu_table(): '''"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys": ["acct_client"],"primaryKeyObjects":[],"importData":false,''' '''"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":false,"useCustomDelimiters":''' '''false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003","customRegexp":"",''' - '''"isIceberg":false}''' + '''"isIceberg":false,"copyFile":false}''' ) path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']} @@ -899,8 +899,8 @@ def test_create_ddl_with_nonascii(): u'rdbmsSplitByColumn': [], u'existingTargetUrl': u'', u'channelSinkTypes': [{u'name': u'This topic', u'value': u'kafka'}, {u'name': u'Solr', u'value': u'solr'}, {u'name': u'HDFS', u'value': u'hdfs'}], u'defaultName': u'default.renamed_chinese_cities_gb2312', - u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'databaseName': u'default', - u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': + u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'copyFile': False, u'databaseName': + u'default', u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], u'indexerConfigSet': u'', u'sqoopJobLibPaths': [{u'path': u''}], u'outputFormat': u'table', diff --git a/desktop/libs/indexer/src/indexer/templates/importer.mako b/desktop/libs/indexer/src/indexer/templates/importer.mako index 17e015390c8..87a386e8598 100644 --- a/desktop/libs/indexer/src/indexer/templates/importer.mako +++ b/desktop/libs/indexer/src/indexer/templates/importer.mako @@ -690,6 +690,12 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode } +
+ +
+