Skip to content

Commit

Permalink
feat: Add aggregate MVs for site wide stats
Browse files Browse the repository at this point in the history
  • Loading branch information
bmtcril committed Apr 29, 2024
1 parent a57a2f5 commit 2fa70fd
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 143 deletions.
15 changes: 15 additions & 0 deletions models/instance/fact_instance_actors.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
uniqCombinedState(actor_id) as actors_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
15 changes: 15 additions & 0 deletions models/instance/fact_instance_courses.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
uniqCombinedState(course_id) as courses_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
29 changes: 29 additions & 0 deletions models/instance/fact_instance_enrollments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("SummingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
)
}}

with
enrollments as (
select
emission_time,
course_key,
enrollment_mode,
splitByString('/', verb_id)[-1] as enrollment_status
from {{ ref("enrollment_events") }}
)

select
date_trunc('hour', emission_time) as emission_hour,
courses.course_name as course_name,
enrollments.enrollment_mode as enrollment_mode,
enrollments.enrollment_status as enrollment_status,
count() as course_enrollment_mode_status_cnt
from enrollments
join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key
group by emission_hour, course_name, enrollment_mode, enrollment_status
15 changes: 15 additions & 0 deletions models/instance/fact_instance_events.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{
config(
materialized="materialized_view",
schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"),
engine=get_engine("AggregatingMergeTree()"),
order_by="(emission_hour)",
partition_by="(toYYYYMM(emission_hour))",
)
}}

select
date_trunc('hour', emission_time) as emission_hour,
uniqCombinedState(event_id) as events_cnt
from {{ ref("xapi_events_all_parsed") }}
group by emission_hour
54 changes: 54 additions & 0 deletions models/instance/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
version: 2

models:
- name: fact_instance_courses
description: "A materialized view summarizing site-wide course activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: courses_cnt
data_type: int
description: "The number of xAPI courses active in the given hour"

- name: fact_instance_events
description: "A materialized view summarizing site-wide xAPI event activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: events_cnt
data_type: int
description: "The number of xAPI events that occurred in the given hour"

- name: fact_instance_actors
description: "A materialized view summarizing site-wide user activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: actors_cnt
data_type: int
description: "The number of xAPI actors active in the given hour"

- name: fact_instance_enrollments
description: "A materialized view for summarizing site-wide enrollment activity"
columns:
- name: emission_hour
data_type: datetime(64)
description: "Time of summary, rounded to the nearest hour"
- name: course_name
data_type: String
description: "The name of the course"
- name: enrollment_mode
data_type: string
description: "The name of the enrollment mode (ex: audit, honor)"
- name: enrollment_status
data_type: string
description: "The type of enrollment event (ex: registered, unregistered)"
tests:
- accepted_values:
values: [ "registered", "unregistered" ]
- name: course_enrollment_mode_status_cnt
data_type: int
description: "The number of enrollment events for this mode that occurred in the given hour"
18 changes: 1 addition & 17 deletions models/users/dim_user_pii.sql
Original file line number Diff line number Diff line change
@@ -1,17 +1 @@
select
external_user_id,
external_id_type,
username,
name,
email,
language,
year_of_birth,
gender,
level_of_education,
country,
if(
toInt32OrZero(year_of_birth) = 0,
NULL,
toYear(now()) - toInt32OrZero(year_of_birth)
) as age
from {{ ref("user_pii") }} user_pii
select external_user_id, username, name, email from {{ ref("user_pii") }} user_pii
69 changes: 0 additions & 69 deletions models/users/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ models:
- name: external_user_id
data_type: String
description: "The external user id"
- name: external_id_type
data_type: String
description: "The external id type"
- name: username
data_type: String
description: "The username of the user"
Expand All @@ -23,61 +20,13 @@ models:
- name: email
data_type: String
description: "The email of the user"
- name: meta
data_type: String
description: "The meta information of the user"
- name: courseware
data_type: String
description: "The courseware information of the user"
- name: language
data_type: String
description: "The language of the user. This field is deprecated and will be removed in the future."
- name: location
data_type: String
description: "The location of the user"
- name: year_of_birth
data_type: Int
description: "The year of birth of the user"
- name: gender
data_type: String
description: "The gender of the user"
- name: level_of_education
data_type: String
description: "The level of education of the user"
- name: mailing_address
data_type: String
description: "The mailing address of the user"
- name: city
data_type: String
description: "The city of the user"
- name: country
data_type: String
description: "The country of the user"
- name: state
data_type: String
description: "The state where the user resides"
- name: goals
data_type: String
description: "The goals of the user"
- name: bio
data_type: String
description: "The biography of the user"
- name: profile_image_uploaded_at
data_type: DateTime
description: "The time the profile image was uploaded"
- name: phone_number
data_type: String
description: "The phone number of the user"

- name: dim_user_pii
description: "A denormalized table of user PII information"
columns:
- name: external_user_id
data_type: String
description: "The external user id"
- name: external_id_type
data_type: String
description: "The external id type"
- name: username
data_type: String
description: "The username"
Expand All @@ -87,24 +36,6 @@ models:
- name: email
data_type: String
description: "The email of the user"
- name: language
data_type: String
description: "The language of the user"
- name: year_of_birth
data_type: Int
description: "The year of birth of the user"
- name: gender
data_type: String
description: "The gender of the user"
- name: level_of_education
data_type: String
description: "The level of education of the user"
- name: country
data_type: String
description: "The country of the user"
- name: age
data_type: Int
description: "The age of the user"

- name: dim_at_risk_learners
description: "A record for each learner determined to be at risk of dropping out of a given course"
Expand Down
59 changes: 2 additions & 57 deletions models/users/user_pii.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,9 @@
fields=[
("user_id", "Int32"),
("external_user_id", "UUID"),
("external_id_type", "String"),
("username", "String"),
("name", "String"),
("email", "String"),
("meta", "String"),
("courseware", "String"),
("language", "String"),
("location", "String"),
("year_of_birth", "String"),
("gender", "String"),
("level_of_education", "String"),
("mailing_address", "String"),
("city", "String"),
("country", "String"),
("state", "String"),
("goals", "String"),
("bio", "String"),
("profile_image_uploaded_at", "String"),
("phone_number", "String"),
],
primary_key="(user_id, external_user_id)",
layout="COMPLEX_KEY_SPARSE_HASHED()",
Expand All @@ -41,51 +25,12 @@ with
user_id,
name,
email,
meta,
courseware,
language,
location,
year_of_birth,
gender,
level_of_education,
mailing_address,
city,
country,
state,
goals,
bio,
profile_image_uploaded_at,
phone_number,
ROW_NUMBER() over (
partition by user_id order by (id, time_last_dumped) DESC
) as rn
from {{ source("event_sink", "user_profile") }}
)
select
mrup.user_id as user_id,
external_user_id,
external_id_type,
username,
name,
email,
meta,
courseware,
language,
location,
year_of_birth,
gender,
level_of_education,
mailing_address,
city,
country,
state,
goals,
bio,
profile_image_uploaded_at,
phone_number
select mrup.user_id as user_id, external_user_id, username, name, email
from {{ source("event_sink", "external_id") }} ex
left outer join
most_recent_user_profile mrup
on mrup.user_id = ex.user_id
and (ex.external_id_type = 'xapi' or ex.external_id_type is NULL)
left outer join most_recent_user_profile mrup on mrup.user_id = ex.user_id
where mrup.rn = 1

0 comments on commit 2fa70fd

Please sign in to comment.