-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathft.py
53 lines (42 loc) · 2.13 KB
/
ft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import glob
import importlib
import os
from pathlib import Path
import click
from datapackage import Package
# https://github.com/frictionlessdata/datapackage-py/issues/273
# Is it https://github.com/openknowledge-archive/datapackage-storage-py ??
from helpers import package_to_sqlite, sqlpackage_to_disk, natural_sort
PREPROCESSING_MODULE = "preprocessing"
POSTPROCESSING_MODULE = "postprocessing"
@click.command()
@click.argument('input_package_path', type=click.Path(exists=True)) # TODO: accept URL in addition to local directory?
@click.argument('transform_dir_path', type=click.Path(exists=True))
@click.argument('output_package_path', type=click.Path(exists=False))
def cli(input_package_path: str, transform_dir_path: str, output_package_path: str):
"""Reshape a (frictionless) data package."""
click.echo("Opening the source data package...")
input_package = Package(input_package_path)
try:
preprocessing = importlib.import_module(f"{transform_dir_path}.{PREPROCESSING_MODULE}")
click.echo("Data preprocessing...")
input_package = preprocessing.process(input_package)
except ModuleNotFoundError:
click.echo("No preprocessing module found, skipping")
click.echo("Load into SQLite...")
engine = package_to_sqlite(input_package)
with engine.connect() as con:
transformation_files = natural_sort(glob.glob(os.path.join(transform_dir_path, '*.sql')))
for transformation_file in transformation_files:
click.echo(f"Processing transformation: {transformation_file}")
con.execute(Path(transformation_file).read_text())
try:
postprocessing = importlib.import_module(f"{transform_dir_path}.{POSTPROCESSING_MODULE}")
click.echo("Data postprocessing...")
postprocessing.postprocess_database(con)
except ModuleNotFoundError:
click.echo("No postprocessing module found, skipping")
click.echo(f"Saving the transformed data to {output_package_path}...")
package_from_sql = Package(storage='sql', engine=engine)
sqlpackage_to_disk(package_from_sql, output_package_path)
click.echo("Done.")