diff --git a/datasets/thelook_ecommerce/pipelines/_images/run_thelook_kub/fake.py b/datasets/thelook_ecommerce/pipelines/_images/run_thelook_kub/fake.py index dd3993448..d81e1db45 100644 --- a/datasets/thelook_ecommerce/pipelines/_images/run_thelook_kub/fake.py +++ b/datasets/thelook_ecommerce/pipelines/_images/run_thelook_kub/fake.py @@ -129,6 +129,8 @@ def generate_locations() -> typing.List[str]: SECONDS_IN_MINUTE = 60 MINUTES_IN_HOUR = 60 MINUTES_IN_DAY = 1440 +MIN_AGE = 12 +MAX_AGE = 71 products = generate_products() logging.info("generating products helper dict") @@ -393,6 +395,7 @@ class Users(DataUtil): first_name: str = dataclasses.field(init=False) last_name: str = dataclasses.field(init=False) email: str = dataclasses.field(init=False) + age: int = dataclasses.field(init=False) gender: str = dataclasses.field(init=False) state: str = dataclasses.field(init=False) street_address: str = dataclasses.field(init=False) @@ -428,6 +431,7 @@ def __post_init__(self): self.latitude = address.latitude self.longitude = address.longitude self.email = f"{self.first_name.lower()}{self.last_name.lower()}@{fake.safe_domain_name()}" + self.age = random.randrange(MIN_AGE, MAX_AGE) # weight newer users/orders choice = random.choices([0, 1], weights=[0.975, 0.025])[0] if choice == 0: @@ -446,7 +450,7 @@ def __post_init__(self): orders.append(dataclasses.asdict(Order(user=self))) def __str__(self): - return f"{self.id}, {self.first_name}, {self.last_name}, {self.email}, {self.gender}, {self.state}, {self.street_address}, {self.postal_code}, {self.city}, {self.traffic_source}, {self.created_at}" + return f"{self.id}, {self.first_name}, {self.last_name}, {self.email}, {self.age}, {self.gender}, {self.state}, {self.street_address}, {self.postal_code}, {self.city}, {self.traffic_source}, {self.created_at}" @dataclasses.dataclass diff --git a/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/pipeline.yaml b/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/pipeline.yaml index d684cb9de..44ab226c8 100644 --- a/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/pipeline.yaml +++ b/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/pipeline.yaml @@ -417,6 +417,9 @@ dag: - name: "email" type: "STRING" mode: "NULLABLE" + - name: "age" + type: "INTEGER" + mode: "NULLABLE" - name: "gender" type: "STRING" mode: "NULLABLE" diff --git a/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/thelook_ecommerce_dag.py b/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/thelook_ecommerce_dag.py index a8e5ea4ea..2f73b590a 100644 --- a/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/thelook_ecommerce_dag.py +++ b/datasets/thelook_ecommerce/pipelines/thelook_ecommerce/thelook_ecommerce_dag.py @@ -187,6 +187,7 @@ {"name": "first_name", "type": "STRING", "mode": "NULLABLE"}, {"name": "last_name", "type": "STRING", "mode": "NULLABLE"}, {"name": "email", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, {"name": "gender", "type": "STRING", "mode": "NULLABLE"}, {"name": "state", "type": "STRING", "mode": "NULLABLE"}, {"name": "street_address", "type": "STRING", "mode": "NULLABLE"},