Skip to content

Commit

Permalink
[Importer] Adding an option to copy and not just move files
Browse files Browse the repository at this point in the history
  • Loading branch information
agl29 committed Nov 1, 2023
1 parent f076fe4 commit c17839b
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 13 deletions.
6 changes: 5 additions & 1 deletion desktop/libs/indexer/src/indexer/indexers/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco
source_path = source['path']
load_data = destination['importData']
isIceberg = destination['isIceberg']
copyFile = destination['copyFile']

external = not destination['useDefaultLocation']
external_path = destination['nonDefaultLocation']
Expand Down Expand Up @@ -166,7 +167,10 @@ def create_table_from_a_file(self, source, destination, start_time=-1, file_enco
# If dir not just the file, create data dir and move file there. Make sure it's unique.
external_path = external_path + '/%s%s_table' % (external_file_name, str(uuid.uuid4()))
self.fs.mkdir(external_path)
self.fs.rename(source_path, external_path)
if copyFile:
self.fs.copy(source_path, external_path)
else:
self.fs.rename(source_path, external_path)
elif load_data: # We'll use load data command
parent_path = self.fs.parent_path(source_path)
stats = self.fs.stats(parent_path)
Expand Down
24 changes: 12 additions & 12 deletions desktop/libs/indexer/src/indexer/indexers/sql_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def test_generate_create_text_table_with_data_partition():
u'outputFormats': [{u'name': u'Table', u'value': u'table'}, {u'name': u'Solr index', u'value': u'index'}],
u'customMapDelimiter': u'\\003', u'showProperties': False, u'useDefaultLocation': True, u'description': u'',
u'primaryKeyObjects': [], u'customFieldDelimiter': u',', u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False,
u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1,
u'copyFile': False, u'databaseName': u'default', u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1,
u'name': u'VALUES', u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [],
u'outputFormat': u'table', u'nonDefaultLocation': u'/user/romain/customer_stats.csv', u'name': u'default.customer_stats',
u'tableFormat': u'text', 'ouputFormat': u'table',
Expand Down Expand Up @@ -371,7 +371,7 @@ def test_generate_create_kudu_table_with_data():
u'description': u'Big Data', u'primaryKeyObjects': [{u'operations': [], u'comment': u'', u'name': u'id', u'level': 0,
u'keyType': u'string', u'required': False, u'nested': [], u'isPartition': False, u'length': 100, u'multiValued': False,
u'unique': False, u'type': u'string', u'showProperties': False, u'keep': True}], u'customFieldDelimiter': u',',
u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'databaseName': u'default',
u'existingTargetUrl': u'', u'importData': True, u'isIceberg': False, u'copyFile': False, u'databaseName': u'default',
u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN': {u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES',
u'include_lower_val': u'<=', u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [u'id'],
u'outputFormat': u'table', u'nonDefaultLocation': u'/user/admin/index_data.csv', u'name': u'index_data',
Expand Down Expand Up @@ -527,9 +527,9 @@ def test_generate_create_parquet_table():
'''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},'''
'''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",'''
'''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,'''
'''"isIceberg":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
'''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
'''"customRegexp":""}'''
'''"isIceberg":false,"copyFile":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",'''
'''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",'''
'''"customMapDelimiter":"\\\\003","customRegexp":""}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -619,9 +619,9 @@ def test_generate_create_iceberg_table():
'''"text","name":"Text"},{"value":"parquet","name":"Parquet"},{"value":"kudu","name":"Kudu"},{"value":"csv","name":"Csv"},'''
'''{"value":"avro","name":"Avro"},{"value":"json","name":"Json"},{"value":"regexp","name":"Regexp"},{"value":"orc",'''
'''"name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],"importData":true,'''
'''"isIceberg":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
'''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
'''"customRegexp":""}'''
'''"isIceberg":true,"copyFile":false,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv",'''
'''"hasHeader":true,"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002",'''
'''"customMapDelimiter":"\\\\003","customRegexp":""}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -715,7 +715,7 @@ def test_generate_create_orc_table_transactional():
'''{"value":"orc","name":"ORC"}],"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys":[],"primaryKeyObjects":[],'''
'''"importData":true,"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":true,'''
'''"useCustomDelimiters":false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003",'''
'''"customRegexp":"","isIceberg":false}'''
'''"customRegexp":"","isIceberg":false,"copyFile":false}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -781,7 +781,7 @@ def test_generate_create_empty_kudu_table():
'''"partitionColumns":[],"kuduPartitionColumns":[],"primaryKeys": ["acct_client"],"primaryKeyObjects":[],"importData":false,'''
'''"useDefaultLocation":true,"nonDefaultLocation":"/user/hue/data/query-hive-360.csv","hasHeader":false,"useCustomDelimiters":'''
'''false,"customFieldDelimiter":",","customCollectionDelimiter":"\\\\002","customMapDelimiter":"\\\\003","customRegexp":"",'''
'''"isIceberg":false}'''
'''"isIceberg":false,"copyFile":false}'''
)

path = {'isDir': False, 'split': ('/user/hue/data', 'query-hive-360.csv'), 'listdir': ['/user/hue/data']}
Expand Down Expand Up @@ -899,8 +899,8 @@ def test_create_ddl_with_nonascii():
u'rdbmsSplitByColumn': [], u'existingTargetUrl': u'', u'channelSinkTypes':
[{u'name': u'This topic', u'value': u'kafka'}, {u'name': u'Solr', u'value': u'solr'},
{u'name': u'HDFS', u'value': u'hdfs'}], u'defaultName': u'default.renamed_chinese_cities_gb2312',
u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'databaseName': u'default',
u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN':
u'isTransactionalUpdateEnabled': False, u'importData': True, u'isIceberg': False, u'copyFile': False, u'databaseName':
u'default', u'indexerRunJob': False, u'indexerReplicationFactor': 1, u'KUDU_DEFAULT_RANGE_PARTITION_COLUMN':
{u'include_upper_val': u'<=', u'upper_val': 1, u'name': u'VALUES', u'include_lower_val': u'<=',
u'lower_val': 0, u'values': [{u'value': u''}]}, u'primaryKeys': [], u'indexerConfigSet': u'',
u'sqoopJobLibPaths': [{u'path': u''}], u'outputFormat': u'table',
Expand Down
10 changes: 10 additions & 0 deletions desktop/libs/indexer/src/indexer/templates/importer.mako
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,12 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
</label>
</div>
<div class="control-group" data-bind="visible: !useDefaultLocation() && !isTransactional() && $root.createWizard.source.inputFormat() === 'file'">
<label class="checkbox inline-block">
<input data-hue-analytics="importer:copyFile-checkbox-interaction" type="checkbox" data-bind="checked: copyFile"> ${_('Copy file')}
</label>
</div>
<div class="control-group">
<label><div>${ _('Description') }</div>
<input type="text" class="form-control input-xxlarge" data-bind="value: description, valueUpdate: 'afterkeydown'" placeholder="${ _('Description') }">
Expand Down Expand Up @@ -2622,6 +2628,7 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
})
self.useDefaultLocation = ko.observable(true);
self.useDefaultLocation.subscribe(function(val) {
self.copyFile(false);
window.hueAnalytics.log('importer', 'default-location/' + val);
})
self.nonDefaultLocation = ko.observable('');
Expand All @@ -2633,6 +2640,7 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
self.isTransactionalVisible = ko.observable((vm.sourceType == 'impala' && isTransactionalVisibleImpala) || (vm.sourceType == 'hive' && isTransactionalVisibleHive));
self.isTransactional = ko.observable(self.isTransactionalVisible());
self.isTransactional.subscribe(function(val) {
self.copyFile(false);
window.hueAnalytics.log('importer', 'is-transactional/' + val);
})
self.isInsertOnly = ko.observable(true); // Impala doesn't have yet full support.
Expand All @@ -2659,6 +2667,8 @@ ${ commonheader(_("Importer"), "indexer", user, request, "60px") | n,unicode }
window.hueAnalytics.log('importer', 'is-iceberg/' + val);
});
self.copyFile = ko.observable(false);
self.hasHeader = ko.observable(false);
self.useCustomDelimiters = ko.observable(false);
Expand Down

0 comments on commit c17839b

Please sign in to comment.