From b4dc92190ff557297b0d0defc33d86ec3ae40af7 Mon Sep 17 00:00:00 2001 From: andrey-canon Date: Tue, 29 Oct 2024 16:24:17 -0500 Subject: [PATCH 1/5] fix: parse external_id to string instead of actor id to uuid and allow searches by email --- models/enrollment/fact_enrollments.sql | 4 +++- models/grading/fact_student_status.sql | 4 +++- models/navigation/fact_navigation.sql | 4 +++- models/navigation/fact_pageview_engagement.sql | 4 +++- models/problems/fact_problem_responses.sql | 4 +++- models/video/fact_video_engagement.sql | 4 +++- models/video/fact_video_plays.sql | 4 +++- 7 files changed, 21 insertions(+), 7 deletions(-) diff --git a/models/enrollment/fact_enrollments.sql b/models/enrollment/fact_enrollments.sql index cdfb9268..43c64f8a 100644 --- a/models/enrollment/fact_enrollments.sql +++ b/models/enrollment/fact_enrollments.sql @@ -25,4 +25,6 @@ select from enrollments join {{ ref("course_names") }} courses on enrollments.course_key = courses.course_key left outer join - {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (actor_id like 'mailto:%' and SUBSTRING(actor_id, 8) = users.email) + or actor_id = toString(users.external_user_id) diff --git a/models/grading/fact_student_status.sql b/models/grading/fact_student_status.sql index 4ffc7871..5153df29 100644 --- a/models/grading/fact_student_status.sql +++ b/models/grading/fact_student_status.sql @@ -29,4 +29,6 @@ join on fes.org = courses.org and fes.course_key = courses.course_key left outer join - {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (actor_id like 'mailto:%' and SUBSTRING(actor_id, 8) = users.email) + or actor_id = toString(users.external_user_id) diff --git a/models/navigation/fact_navigation.sql b/models/navigation/fact_navigation.sql index 12ece559..3b3415a0 100644 --- a/models/navigation/fact_navigation.sql +++ b/models/navigation/fact_navigation.sql @@ -23,4 +23,6 @@ join and navigation.block_id = blocks.block_id ) left outer join - {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (actor_id like 'mailto:%' and SUBSTRING(actor_id, 8) = users.email) + or actor_id = toString(users.external_user_id) diff --git a/models/navigation/fact_pageview_engagement.sql b/models/navigation/fact_pageview_engagement.sql index 62103a6d..e5371fe7 100644 --- a/models/navigation/fact_pageview_engagement.sql +++ b/models/navigation/fact_pageview_engagement.sql @@ -47,4 +47,6 @@ join and pv.block_id = course_blocks.block_id ) left outer join - {{ ref("dim_user_pii") }} users on toUUID(pv.actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (pv.actor_id like 'mailto:%' and SUBSTRING(pv.actor_id, 8) = users.email) + or pv.actor_id = toString(users.external_user_id) diff --git a/models/problems/fact_problem_responses.sql b/models/problems/fact_problem_responses.sql index 0c3855d8..4a909025 100644 --- a/models/problems/fact_problem_responses.sql +++ b/models/problems/fact_problem_responses.sql @@ -44,7 +44,9 @@ join and responses.problem_id = blocks.block_id ) left outer join - {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (actor_id like 'mailto:%' and SUBSTRING(actor_id, 8) = users.email) + or actor_id = toString(users.external_user_id) group by -- multi-part questions include an extra record for the response to the first -- part of the question. this group by clause eliminates the duplicate record diff --git a/models/video/fact_video_engagement.sql b/models/video/fact_video_engagement.sql index 3a3b2a02..cd61dbe3 100644 --- a/models/video/fact_video_engagement.sql +++ b/models/video/fact_video_engagement.sql @@ -46,4 +46,6 @@ join and ve.block_id = course_blocks.block_id ) left outer join - {{ ref("dim_user_pii") }} users on toUUID(ve.actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (ve.actor_id like 'mailto:%' and SUBSTRING(ve.actor_id, 8) = users.email) + or ve.actor_id = toString(users.external_user_id) diff --git a/models/video/fact_video_plays.sql b/models/video/fact_video_plays.sql index c998079a..5ab4bd8e 100644 --- a/models/video/fact_video_plays.sql +++ b/models/video/fact_video_plays.sql @@ -42,4 +42,6 @@ join {{ ref("dim_course_blocks_extended") }} blocks on (plays.course_key = blocks.course_key and plays.video_id = blocks.block_id) left outer join - {{ ref("dim_user_pii") }} users on toUUID(actor_id) = users.external_user_id + {{ ref("dim_user_pii") }} users + on (actor_id like 'mailto:%' and SUBSTRING(actor_id, 8) = users.email) + or actor_id = toString(users.external_user_id) From cee1b6577fff6c5f24baeb38252c467d245d4d98 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 31 Oct 2024 10:28:30 -0500 Subject: [PATCH 2/5] fix: invert join side to user user_profiles as base table --- models/users/user_pii.sql | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/models/users/user_pii.sql b/models/users/user_pii.sql index a9ce7147..c36ade43 100644 --- a/models/users/user_pii.sql +++ b/models/users/user_pii.sql @@ -25,9 +25,19 @@ with from {{ source("event_sink", "user_profile") }} group by user_id ) -select ex.user_id as user_id, ex.external_user_id, ex.username, up.name, up.email -from {{ source("event_sink", "external_id") }} ex -left outer join most_recent_user_profile mrup on mrup.user_id = ex.user_id +select + ex.user_id as user_id, + if( + empty(ex.external_user_id), + concat('mailto:', email), + ex.external_user_id::String + ), + ex.username, + up.name, + up.email +from most_recent_user_profile mrup +left outer join + {{ source("event_sink", "external_id") }} ex on mrup.user_id = ex.user_id left outer join {{ source("event_sink", "user_profile") }} up on up.user_id = mrup.user_id From a11e9816150ea415a6611a567c6abb306e2f0ab6 Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 31 Oct 2024 10:30:10 -0500 Subject: [PATCH 3/5] fix: invert join side to user user_profiles as base table --- models/users/user_pii.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/models/users/user_pii.sql b/models/users/user_pii.sql index c36ade43..1c9a5dbb 100644 --- a/models/users/user_pii.sql +++ b/models/users/user_pii.sql @@ -4,7 +4,7 @@ schema=env_var("ASPECTS_EVENT_SINK_DATABASE", "event_sink"), fields=[ ("user_id", "Int32"), - ("external_user_id", "UUID"), + ("external_user_id", "String"), ("username", "String"), ("name", "String"), ("email", "String"), From e6c234459124776cd32afae9b86dbb5e050b640d Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 31 Oct 2024 10:52:48 -0500 Subject: [PATCH 4/5] fix: invert join side to user user_profiles as base table --- models/users/user_pii.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/users/user_pii.sql b/models/users/user_pii.sql index 1c9a5dbb..8746a4c3 100644 --- a/models/users/user_pii.sql +++ b/models/users/user_pii.sql @@ -31,8 +31,8 @@ select empty(ex.external_user_id), concat('mailto:', email), ex.external_user_id::String - ), - ex.username, + ) as external_user_id, + up.username, up.name, up.email from most_recent_user_profile mrup From 69ff4c18cdaa0a1a5b6e3f81eef050e1ddd0503a Mon Sep 17 00:00:00 2001 From: Cristhian Garcia Date: Thu, 31 Oct 2024 13:58:16 -0500 Subject: [PATCH 5/5] fix: use alias for column names --- models/users/user_pii.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/users/user_pii.sql b/models/users/user_pii.sql index 8746a4c3..2999f8b5 100644 --- a/models/users/user_pii.sql +++ b/models/users/user_pii.sql @@ -32,9 +32,9 @@ select concat('mailto:', email), ex.external_user_id::String ) as external_user_id, - up.username, - up.name, - up.email + up.username as username, + up.name as name, + up.email as email from most_recent_user_profile mrup left outer join {{ source("event_sink", "external_id") }} ex on mrup.user_id = ex.user_id