From 6fa3c0f79dfb74584346cb42e0032adee564cfe5 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Tue, 16 Apr 2024 14:48:17 -0500 Subject: [PATCH 1/2] feat: join user_pii data with reports fix: use outer join for user pii join fix: use alias for user pii data --- models/completion/fact_completions.sql | 8 +++- models/completion/schema.yml | 9 +++++ models/enrollment/fact_enrollments.sql | 7 +++- models/enrollment/schema.yml | 9 +++++ models/forum/fact_forum_interactions.sql | 7 +++- models/forum/schema.yml | 9 +++++ models/grading/fact_grades.sql | 7 +++- models/grading/fact_student_status.sql | 7 +++- models/grading/schema.yml | 18 +++++++++ models/navigation/fact_navigation.sql | 7 +++- .../navigation/fact_navigation_completion.sql | 23 +++++++----- models/navigation/fact_navigation_dropoff.sql | 7 +++- models/navigation/schema.yml | 27 ++++++++++++++ .../problems/fact_learner_problem_summary.sql | 12 +++++- models/problems/fact_problem_engagement.sql | 25 ++++++++----- models/problems/fact_problem_responses.sql | 8 ++++ .../fact_problem_responses_extended.sql | 5 +++ models/problems/schema.yml | 37 +++++++++++++++++++ models/video/fact_transcript_usage.sql | 7 +++- models/video/fact_video_engagement.sql | 10 ++++- models/video/fact_video_plays.sql | 7 +++- models/video/schema.yml | 27 ++++++++++++++ 22 files changed, 251 insertions(+), 32 deletions(-) diff --git a/models/completion/fact_completions.sql b/models/completion/fact_completions.sql index 87f14f98..2e733305 100644 --- a/models/completion/fact_completions.sql +++ b/models/completion/fact_completions.sql @@ -48,8 +48,14 @@ select when scaled_progress >= 0.1 and scaled_progress < 0.2 then '10-19%' else '0-9%' - end as completion_bucket + end as completion_bucket, + users.username as username, + users.name as name, + users.email as email from completions join {{ ref("course_names") }} courses on completions.course_key = courses.course_key left join {{ ref("course_block_names") }} blocks on completions.entity_id = blocks.location +left outer join + {{ ref("dim_user_pii") }} users + on toUUID(completions.actor_id) = users.external_user_id diff --git a/models/completion/schema.yml b/models/completion/schema.yml index cf60bb88..ade06b83 100644 --- a/models/completion/schema.yml +++ b/models/completion/schema.yml @@ -38,6 +38,15 @@ models: - name: completion_bucket description: "A displayable value of progress sorted into 10% buckets. Useful for grouping progress together to show high-level learner performance" data_type: String + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: completion_events description: "A materialized view for xAPI events related to course completions" diff --git a/models/enrollment/fact_enrollments.sql b/models/enrollment/fact_enrollments.sql index e0e39af2..cdfb9268 100644 --- a/models/enrollment/fact_enrollments.sql +++ b/models/enrollment/fact_enrollments.sql @@ -18,6 +18,11 @@ select courses.course_run as course_run, enrollments.actor_id as actor_id, enrollments.enrollment_mode as enrollment_mode, - enrollments.enrollment_status as enrollment_status + enrollments.enrollment_status as enrollment_status, + users.username as username, + users.name as name, + users.email as email from enrollments join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/enrollment/schema.yml b/models/enrollment/schema.yml index fa24f8bd..5fa029e1 100644 --- a/models/enrollment/schema.yml +++ b/models/enrollment/schema.yml @@ -31,6 +31,15 @@ models: - accepted_values: values: ["registered", "unregistered"] data_type: String + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: enrollment_events description: "A materialized view for xAPI events related to course enrollment" diff --git a/models/forum/fact_forum_interactions.sql b/models/forum/fact_forum_interactions.sql index 18a05a9d..3da949aa 100644 --- a/models/forum/fact_forum_interactions.sql +++ b/models/forum/fact_forum_interactions.sql @@ -7,6 +7,11 @@ select courses.course_run as course_run, forum.object_id as object_id, forum.actor_id as actor_id, - forum.verb_id as verb_id + forum.verb_id as verb_id, + users.username as username, + users.name as name, + users.email as email from {{ ref("forum_events") }} forum join {{ ref("course_names") }} courses on (forum.course_key = courses.course_key) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/forum/schema.yml b/models/forum/schema.yml index 682a9479..2abbaa84 100644 --- a/models/forum/schema.yml +++ b/models/forum/schema.yml @@ -31,6 +31,15 @@ models: - name: verb_id data_type: LowCardinality(String) description: "The xAPI verb identifier" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: forum_events description: "One record per forum event" diff --git a/models/grading/fact_grades.sql b/models/grading/fact_grades.sql index e8351f0a..8ce028b7 100644 --- a/models/grading/fact_grades.sql +++ b/models/grading/fact_grades.sql @@ -41,7 +41,12 @@ select grades.grade_type as grade_type, grades.actor_id as actor_id, grades.scaled_score as scaled_score, - {{ get_bucket("scaled_score") }} as grade_bucket + {{ get_bucket("scaled_score") }} as grade_bucket, + users.username as username, + users.name as name, + users.email as email from grades join {{ ref("course_names") }} courses on grades.course_key = courses.course_key left join {{ ref("course_block_names") }} blocks on grades.entity_id = blocks.location +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/grading/fact_student_status.sql b/models/grading/fact_student_status.sql index f95aba75..29eebeb7 100644 --- a/models/grading/fact_student_status.sql +++ b/models/grading/fact_student_status.sql @@ -8,7 +8,10 @@ select enrollment_mode, enrollment_status, course_grade as course_grade, - {{ get_bucket("course_grade") }} as grade_bucket + {{ get_bucket("course_grade") }} as grade_bucket, + users.username as username, + users.name as name, + users.email as email from {{ ref("fact_enrollment_status") }} fes left join {{ ref("fact_learner_course_status") }} lg @@ -24,3 +27,5 @@ join {{ ref("course_names") }} courses on fes.org = courses.org and fes.course_key = courses.course_key +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/grading/schema.yml b/models/grading/schema.yml index a9e1cb09..c11984a6 100644 --- a/models/grading/schema.yml +++ b/models/grading/schema.yml @@ -43,6 +43,15 @@ models: - name: grade_bucket description: "A displayable value of grades sorted into 10% buckets. Useful for grouping grades together to show high-level learner performance" data_type: String + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: fact_learner_course_grade description: "One record per learner per course for the most recent grade" @@ -149,3 +158,12 @@ models: - name: grade_bucket data_type: string description: "A displayable value of grades sorted into 10% buckets. Useful for grouping grades together to show high-level learner performance" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" diff --git a/models/navigation/fact_navigation.sql b/models/navigation/fact_navigation.sql index b92c4c6a..a0c04893 100644 --- a/models/navigation/fact_navigation.sql +++ b/models/navigation/fact_navigation.sql @@ -10,7 +10,10 @@ select blocks.display_name_with_location as block_name_with_location, navigation.object_type as object_type, navigation.starting_position as starting_position, - navigation.ending_point as ending_point + navigation.ending_point as ending_point, + users.username as username, + users.name as name, + users.email as email from {{ ref("navigation_events") }} navigation join {{ ref("dim_course_blocks") }} blocks @@ -18,3 +21,5 @@ join navigation.course_key = blocks.course_key and navigation.block_id = blocks.block_id ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/navigation/fact_navigation_completion.sql b/models/navigation/fact_navigation_completion.sql index 32b06a5b..7e7e4362 100644 --- a/models/navigation/fact_navigation_completion.sql +++ b/models/navigation/fact_navigation_completion.sql @@ -15,15 +15,18 @@ with ) select - visits.visited_on, - visits.org, - visits.course_key, - visits.course_run, - pages.section_with_name, - pages.subsection_with_name, - pages.page_count, - visits.actor_id, - visits.block_id + visits.visited_on as visited_on, + visits.org as org, + visits.course_key as course_key, + visits.course_run as course_run, + pages.section_with_name as section_with_name, + pages.subsection_with_name as subsection_with_name, + pages.page_count as page_count, + visits.actor_id as actor_id, + visits.block_id as block_id, + users.username as username, + users.name as name, + users.email as email from visited_subsection_pages visits join {{ ref("int_pages_per_subsection") }} pages @@ -33,3 +36,5 @@ join and visits.section_number = pages.section_number and visits.subsection_number = pages.subsection_number ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/navigation/fact_navigation_dropoff.sql b/models/navigation/fact_navigation_dropoff.sql index ffb89935..12979d9a 100644 --- a/models/navigation/fact_navigation_dropoff.sql +++ b/models/navigation/fact_navigation_dropoff.sql @@ -64,7 +64,10 @@ select page_views.rollup_name as rollup_name, blocks.display_name_with_location as block_name, page_views.actor_id as actor_id, - page_views.total_views as total_views + page_views.total_views as total_views, + users.username as username, + users.name as name, + users.email as email from page_views join blocks @@ -73,3 +76,5 @@ join and page_views.course_key = blocks.course_key and page_views.hierarchy_location = blocks.hierarchy_location ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/navigation/schema.yml b/models/navigation/schema.yml index 6a463670..a0773294 100644 --- a/models/navigation/schema.yml +++ b/models/navigation/schema.yml @@ -74,6 +74,15 @@ models: - name: ending_point data_type: string description: "The tab in the unit navigation bar that the learner selected to navigate to" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: fact_navigation_dropoff description: "A view for analyzing the number of page visits per learner per section and subsection" @@ -102,6 +111,15 @@ models: - name: total_views data_type: uint64 description: "The total number of times a learner viewed pages in this section or subsection on a given day" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: fact_navigation_completion description: "A view for analyzing how many pages a learner has visited in a section or subsection" @@ -136,6 +154,15 @@ models: - name: block_id data_type: string description: "The ID of the specific page visited" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: int_pages_per_subsection description: "A view for analyzing the number of pages in each subsection" diff --git a/models/problems/fact_learner_problem_summary.sql b/models/problems/fact_learner_problem_summary.sql index 4e5557c1..001fa7f1 100644 --- a/models/problems/fact_learner_problem_summary.sql +++ b/models/problems/fact_learner_problem_summary.sql @@ -49,8 +49,13 @@ select coalesce(any(success), false) as success, coalesce(any(attempts), 0) as attempts, sum(num_hints_displayed) as num_hints_displayed, - sum(num_answers_displayed) as num_answers_displayed + sum(num_answers_displayed) as num_answers_displayed, + users.username as username, + users.name as name, + users.email as email from results_with_hints +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id group by org, course_key, @@ -59,4 +64,7 @@ group by problem_id, problem_name, problem_name_with_location, - actor_id + actor_id, + username, + name, + email diff --git a/models/problems/fact_problem_engagement.sql b/models/problems/fact_problem_engagement.sql index fde14d90..1c5ed3b4 100644 --- a/models/problems/fact_problem_engagement.sql +++ b/models/problems/fact_problem_engagement.sql @@ -15,16 +15,19 @@ with ) select - attempts.attempted_on, - attempts.org, - attempts.course_key, - attempts.course_run, - problems.section_with_name, - problems.subsection_with_name, - problems.item_count, - attempts.actor_id, - attempts.problem_id, - attempts.graded + attempts.attempted_on as attempted_on, + attempts.org as org, + attempts.course_key as course_key, + attempts.course_run as course_run, + problems.section_with_name as section_with_name, + problems.subsection_with_name as subsection_with_name, + problems.item_count as item_count, + attempts.actor_id as actor_id, + attempts.problem_id as problem_id, + attempts.graded as graded, + users.username as username, + users.name as name, + users.email as email from attempted_subsection_problems attempts join {{ ref("int_problems_per_subsection") }} problems @@ -34,3 +37,5 @@ join and attempts.section_number = problems.section_number and attempts.subsection_number = problems.subsection_number ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/problems/fact_problem_responses.sql b/models/problems/fact_problem_responses.sql index a69a8249..3ae445c4 100644 --- a/models/problems/fact_problem_responses.sql +++ b/models/problems/fact_problem_responses.sql @@ -31,6 +31,9 @@ select responses.success as success, responses.attempts as attempts, responses.interaction_type as interaction_type + users.username as username, + users.name as name, + users.email as email from responses join {{ ref("dim_course_blocks") }} blocks @@ -38,6 +41,8 @@ join responses.course_key = blocks.course_key and responses.problem_id = blocks.block_id ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id group by -- multi-part questions include an extra record for the response to the first -- part of the question. this group by clause eliminates the duplicate record @@ -56,3 +61,6 @@ group by attempts, graded, interaction_type + username, + name, + email diff --git a/models/problems/fact_problem_responses_extended.sql b/models/problems/fact_problem_responses_extended.sql index d2fa6a05..70ca5d55 100644 --- a/models/problems/fact_problem_responses_extended.sql +++ b/models/problems/fact_problem_responses_extended.sql @@ -25,6 +25,9 @@ select results.attempts as attempts, results.graded as graded, results.interaction_type as interaction_type + users.username as username, + users.name as name, + users.email as email from problem_results results join {{ ref("int_problems_per_subsection") }} problems @@ -34,3 +37,5 @@ join and results.section_number = problems.section_number and results.subsection_number = problems.subsection_number ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/problems/schema.yml b/models/problems/schema.yml index abe8c5dc..740e291f 100644 --- a/models/problems/schema.yml +++ b/models/problems/schema.yml @@ -44,6 +44,15 @@ models: - name: num_answers_displayed description: "The number of times a learner requested the answers for the problem" data_type: UInt64 + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: fact_problem_responses description: "One record for each submitted response to a problem" @@ -96,6 +105,15 @@ models: - name: interaction_type data_type: string description: "The type of interaction - e.g. multiple choice" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: int_problem_hints description: "Internal table for problem hints" @@ -281,6 +299,16 @@ models: - name: graded data_type: bool description: "Whether the block is graded" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" + - name: fact_problem_responses_extended description: "int_problem_results with section and subsection names" @@ -336,3 +364,12 @@ models: - name: interaction_type data_type: string description: "The type of interaction" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" diff --git a/models/video/fact_transcript_usage.sql b/models/video/fact_transcript_usage.sql index a7dcbba9..97cf894f 100644 --- a/models/video/fact_transcript_usage.sql +++ b/models/video/fact_transcript_usage.sql @@ -7,7 +7,10 @@ select transcripts.video_id as video_id, blocks.block_name as video_name, blocks.display_name_with_location as video_name_with_location, - transcripts.actor_id as actor_id + transcripts.actor_id as actor_id, + users.username as username, + users.name as name, + users.email as email from {{ ref("video_transcript_events") }} transcripts join {{ ref("dim_course_blocks") }} blocks @@ -15,4 +18,6 @@ join transcripts.course_key = blocks.course_key and transcripts.video_id = blocks.block_id ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id where transcripts.cc_enabled diff --git a/models/video/fact_video_engagement.sql b/models/video/fact_video_engagement.sql index da06e39f..823b173f 100644 --- a/models/video/fact_video_engagement.sql +++ b/models/video/fact_video_engagement.sql @@ -9,7 +9,10 @@ with as subsection_number, graded, actor_id, - video_id + video_id, + username, + name, + email from {{ ref("fact_video_plays") }} ) @@ -22,7 +25,10 @@ select videos.item_count, views.actor_id, views.video_id, - views.graded + views.graded, + views.username as username, + views.name as name, + views.email as email from viewed_subsection_videos views join {{ ref("int_videos_per_subsection") }} videos diff --git a/models/video/fact_video_plays.sql b/models/video/fact_video_plays.sql index 814f8c55..242efb96 100644 --- a/models/video/fact_video_plays.sql +++ b/models/video/fact_video_plays.sql @@ -30,8 +30,13 @@ select video_position, video_duration, {{ get_bucket("video_position/video_duration") }} as visualization_bucket, - plays.actor_id as actor_id + plays.actor_id as actor_id, + users.username as username, + users.name as name, + users.email as email from plays join {{ ref("dim_course_blocks") }} blocks on (plays.course_key = blocks.course_key and plays.video_id = blocks.block_id) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id diff --git a/models/video/schema.yml b/models/video/schema.yml index 5297a3cb..8558524d 100644 --- a/models/video/schema.yml +++ b/models/video/schema.yml @@ -46,6 +46,15 @@ models: - name: visualization_bucket data_type: String description: "The percentile bucket for the video play event" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: fact_transcript_usage description: "One record for each time a transcript or closed caption was enabled" @@ -77,6 +86,15 @@ models: - name: actor_id data_type: String description: "The xAPI actor identifier" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" - name: video_playback_events description: "Events related to video playback" @@ -192,3 +210,12 @@ models: - name: graded data_type: Boolean description: "Whether the block is graded" + - name: username + data_type: String + description: "The username of the learner" + - name: name + data_type: String + description: "The full name of the learner" + - name: email + data_type: String + description: "The email address of the learner" From 9c96262978485adb838606c7c3732366f52b8b41 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Wed, 17 Apr 2024 15:56:53 -0500 Subject: [PATCH 2/2] fix: add course_name for fact_navigation_completion model (cherry picked from commit e0feb205624fe7f0ca8dc41981a2427c1b8c43a1) --- models/navigation/fact_navigation_completion.sql | 2 ++ models/navigation/schema.yml | 4 ++-- models/problems/fact_problem_responses.sql | 4 ++-- models/problems/fact_problem_responses_extended.sql | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/models/navigation/fact_navigation_completion.sql b/models/navigation/fact_navigation_completion.sql index 7e7e4362..b5060b79 100644 --- a/models/navigation/fact_navigation_completion.sql +++ b/models/navigation/fact_navigation_completion.sql @@ -5,6 +5,7 @@ with date(emission_time) as visited_on, org, course_key, + course_name, course_run, {{ section_from_display("block_name_with_location") }} as section_number, {{ subsection_from_display("block_name_with_location") }} @@ -18,6 +19,7 @@ select visits.visited_on as visited_on, visits.org as org, visits.course_key as course_key, + visits.course_name as course_name, visits.course_run as course_run, pages.section_with_name as section_with_name, pages.subsection_with_name as subsection_with_name, diff --git a/models/navigation/schema.yml b/models/navigation/schema.yml index a0773294..8527bd0f 100644 --- a/models/navigation/schema.yml +++ b/models/navigation/schema.yml @@ -133,9 +133,9 @@ models: - name: course_key data_type: string description: "The course identifier" - - name: course_run + - name: course_name data_type: string - description: "The course run for the course" + description: "The course name" - name: course_run data_type: string description: "The course run for the course" diff --git a/models/problems/fact_problem_responses.sql b/models/problems/fact_problem_responses.sql index 3ae445c4..da125fba 100644 --- a/models/problems/fact_problem_responses.sql +++ b/models/problems/fact_problem_responses.sql @@ -30,7 +30,7 @@ select responses.responses as responses, responses.success as success, responses.attempts as attempts, - responses.interaction_type as interaction_type + responses.interaction_type as interaction_type, users.username as username, users.name as name, users.email as email @@ -60,7 +60,7 @@ group by success, attempts, graded, - interaction_type + interaction_type, username, name, email diff --git a/models/problems/fact_problem_responses_extended.sql b/models/problems/fact_problem_responses_extended.sql index 70ca5d55..15aad4fb 100644 --- a/models/problems/fact_problem_responses_extended.sql +++ b/models/problems/fact_problem_responses_extended.sql @@ -24,7 +24,7 @@ select results.success as success, results.attempts as attempts, results.graded as graded, - results.interaction_type as interaction_type + results.interaction_type as interaction_type, users.username as username, users.name as name, users.email as email