diff --git a/models/video/fact_video_engagement.sql b/models/video/fact_video_engagement.sql index 29163292..3a3b2a02 100644 --- a/models/video/fact_video_engagement.sql +++ b/models/video/fact_video_engagement.sql @@ -1,112 +1,49 @@ with - subsection_counts as ( + subsection_engagement as ( select org, course_key, - course_run, - section_with_name, - subsection_with_name, - actor_id, - item_count, - count(distinct video_id) as videos_viewed, - case - when videos_viewed = 0 - then 'No videos viewed yet' - when videos_viewed = item_count - then 'All videos viewed' - else 'At least one video viewed' - end as engagement_level, - username, - name, - email - from {{ ref("fact_video_engagement_per_subsection") }} - group by - org, - course_key, - course_run, - section_with_name, - subsection_with_name, + 'subsection' as content_level, actor_id, - item_count, - username, - name, - email + subsection_block_id as block_id, + engagement_level as section_subsection_video_engagement + from {{ ref("subsection_video_engagement") }} ), - section_counts as ( + section_engagement as ( select org, course_key, - course_run, - section_with_name, - '' as subsection_with_name, - actor_id, - sum(item_count) as item_count, - sum(videos_viewed) as videos_viewed, - case - when videos_viewed = 0 - then 'No videos viewed yet' - when videos_viewed = item_count - then 'All videos viewed' - else 'At least one video viewed' - end as engagement_level, - username, - name, - email - from subsection_counts - group by - org, - course_key, - course_run, - section_with_name, - subsection_with_name, + 'section' as content_level, actor_id, - username, - name, - email + section_block_id as block_id, + engagement_level as section_subsection_video_engagement + from {{ ref("section_video_engagement") }} ), - all_counts as ( - - select - org, - course_key, - course_run, - section_with_name as section_with_name, - subsection_with_name as subsection_with_name, - subsection_with_name as section_subsection_name, - 'subsection' as content_level, - actor_id as actor_id, - engagement_level as section_subsection_video_engagement, - username, - name, - email - from subsection_counts + video_engagement as ( + select * + from subsection_engagement union all - select - org, - course_key, - course_run, - section_with_name as section_with_name, - subsection_with_name as subsection_with_name, - section_with_name as section_subsection_name, - 'section' as content_level, - actor_id as actor_id, - engagement_level as section_subsection_video_engagement, - username, - name, - email - from section_counts + select * + from section_engagement ) select - ac.org as org, - ac.course_key as course_key, - ac.course_run as course_run, - ac.section_with_name as section_with_name, - ac.subsection_with_name as subsection_with_name, - ac.section_subsection_name as section_subsection_name, - ac.content_level as content_level, - ac.actor_id as actor_id, - ac.section_subsection_video_engagement as section_subsection_video_engagement, - ac.username as username, - ac.name as name, - ac.email as email -from all_counts ac + ve.org as org, + ve.course_key as course_key, + course_blocks.course_run as course_run, + course_blocks.display_name_with_location as section_subsection_name, + ve.content_level as content_level, + ve.actor_id as actor_id, + ve.section_subsection_video_engagement as section_subsection_video_engagement, + users.username as username, + users.name as name, + users.email as email +from video_engagement ve +join + {{ ref("dim_course_blocks") }} course_blocks + on ( + ve.org = course_blocks.org + and ve.course_key = course_blocks.course_key + and ve.block_id = course_blocks.block_id + ) +left outer join + {{ ref("dim_user_pii") }} users on toUUID(ve.actor_id) = users.external_user_id diff --git a/models/video/fact_video_engagement_per_subsection.sql b/models/video/fact_video_engagement_per_subsection.sql deleted file mode 100644 index 32e050aa..00000000 --- a/models/video/fact_video_engagement_per_subsection.sql +++ /dev/null @@ -1,43 +0,0 @@ -with - viewed_subsection_videos as ( - select distinct - date(emission_time) as viewed_on, - org, - course_key, - course_run, - {{ section_from_display("video_name_with_location") }} as section_number, - {{ subsection_from_display("video_name_with_location") }} - as subsection_number, - graded, - actor_id, - video_id, - username, - name, - email - from {{ ref("fact_video_plays") }} - ) - -select - views.viewed_on, - views.org, - views.course_key, - views.course_run, - videos.section_with_name, - videos.subsection_with_name, - videos.course_order, - videos.item_count, - views.actor_id, - views.video_id, - views.graded, - views.username as username, - views.name as name, - views.email as email -from viewed_subsection_videos views -join - {{ ref("int_videos_per_subsection") }} videos - on ( - views.org = videos.org - and views.course_key = videos.course_key - and views.section_number = videos.section_number - and views.subsection_number = videos.subsection_number - ) diff --git a/models/video/schema.yml b/models/video/schema.yml index e9fa7740..5b1d9c04 100644 --- a/models/video/schema.yml +++ b/models/video/schema.yml @@ -201,52 +201,6 @@ models: data_type: string description: "The unique identifier for the section block" - - name: fact_video_engagement_per_subsection - description: "A dataset with one record representing a video viewed by a learner and the section and subsection that video belongs to" - columns: - - name: viewed_on - data_type: date - description: "The date on which the video was viewed" - - name: org - data_type: string - description: "The organization that the video belongs to" - - name: course_key - data_type: string - description: "The course key for the course" - - name: course_run - data_type: String - description: "The course run for the course" - - name: section_with_name - data_type: string - description: "The name of the section this subsection belongs to, with section_number prepended" - - name: subsection_with_name - data_type: string - description: "The name of the subsection, with section_number prepended" - - name: item_count - data_type: uint64 - description: "The number of videos in this subsection" - - name: actor_id - data_type: string - description: "The xAPI actor identifier" - - name: video_id - data_type: string - description: "The xAPI object identifier" - - name: graded - data_type: Boolean - description: "Whether the block is graded" - - name: username - data_type: String - description: "The username of the learner" - - name: name - data_type: String - description: "The full name of the learner" - - name: email - data_type: String - description: "The email address of the learner" - - name: course_order - data_type: Int32 - description: "The sort order of this block in the course across all course blocks" - - name: fact_video_engagement description: "A dataset with one record representing the engagement of a learner with a video" columns: @@ -353,3 +307,90 @@ models: - name: email data_type: string description: "The email address of the learner" + + - name: section_video_engagement + description: "A record per course per section per learner with their engagement level" + columns: + - name: org + data_type: string + description: "The organization that the course belongs to" + - name: course_key + data_type: string + description: "The course key for the course" + - name: actor_id + data_type: string + description: "The xAPI actor identifier" + - name: section_block_id + data_type: string + description: "The unique identifier for the section block" + - name: engagement_level + data_type: string + description: "The engagement level of the learner with the section" + tests: + - accepted_values: + values: + - 'No videos viewed yet' + - 'All videos viewed' + - 'At least one video viewed' + + - name: subsection_video_engagement + description: "A record per course per subsection per learner with their engagement level" + columns: + - name: org + data_type: string + description: "The organization that the course belongs to" + - name: course_key + data_type: string + description: "The course key for the course" + - name: actor_id + data_type: string + description: "The xAPI actor identifier" + - name: subsection_block_id + data_type: string + description: "The unique identifier for the subsection block" + - name: engagement_level + data_type: string + description: "The engagement level of the learner with the subsection" + tests: + - accepted_values: + values: + - 'No videos viewed yet' + - 'All videos viewed' + - 'At least one video viewed' + + - name: watched_video_segments + description: "A dataset with one record per learner per video segment watched in a course" + columns: + - name: org + data_type: string + description: "The organization that the course belongs to" + - name: course_key + data_type: string + description: "The course key for the course" + - name: actor_id + data_type: string + description: "The xAPI actor identifier" + - name: video_id + data_type: string + description: "The xAPI object identifier" + - name: start_position + data_type: int64 + description: "The start position of the segment" + - name: end_position + data_type: int64 + description: "The end position of the segment" + - name: started_at + data_type: datetime + description: "The time the video segment was started" + - name: ended_at + data_type: datetime + description: "The time the video segment was ended" + - name: emission_time + data_type: datetime + description: "The time the event was emitted" + - name: end_type + data_type: string + description: "The type of event that ended the video segment" + - name: video_duration + data_type: int64 + description: "The duration of the video in seconds" diff --git a/models/video/section_video_engagement.sql b/models/video/section_video_engagement.sql new file mode 100644 index 00000000..9fd22ef9 --- /dev/null +++ b/models/video/section_video_engagement.sql @@ -0,0 +1,90 @@ +{{ + config( + materialized="materialized_view", + schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), + engine=get_engine("ReplacingMergeTree()"), + primary_key="(org, course_key)", + order_by="(org, course_key, section_block_id, actor_id)", + ) +}} + +with + viewed_subsection_videos as ( + select distinct + date(emission_time) as viewed_on, + org, + course_key, + {{ section_from_display("video_name_with_location") }} as section_number, + {{ subsection_from_display("video_name_with_location") }} + as subsection_number, + actor_id, + video_id + from {{ ref("fact_video_plays") }} + ), + fact_video_engagement_per_subsection as ( + select + views.org as org, + views.course_key as course_key, + videos.section_with_name as section_with_name, + videos.subsection_with_name as subsection_with_name, + videos.item_count as item_count, + views.actor_id as actor_id, + views.video_id as video_id, + videos.section_block_id as section_block_id + from viewed_subsection_videos views + join + {{ ref("int_videos_per_subsection") }} videos + on ( + views.org = videos.org + and views.course_key = videos.course_key + and views.section_number = videos.section_number + and views.subsection_number = videos.subsection_number + ) + ), + subsection_counts as ( + select + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + count(distinct video_id) as videos_viewed, + case + when videos_viewed = 0 + then 'No videos viewed yet' + when videos_viewed = item_count + then 'All videos viewed' + else 'At least one video viewed' + end as engagement_level, + section_block_id + from fact_video_engagement_per_subsection + group by + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + section_block_id + ), + section_counts as ( + select + org, + course_key, + actor_id, + sum(item_count) as item_count, + sum(videos_viewed) as videos_viewed, + case + when videos_viewed = 0 + then 'No videos viewed yet' + when videos_viewed = item_count + then 'All videos viewed' + else 'At least one video viewed' + end as engagement_level, + section_block_id + from subsection_counts + group by org, course_key, section_block_id, actor_id + ) +select org, course_key, actor_id, section_block_id, engagement_level +from section_counts diff --git a/models/video/subsection_video_engagement.sql b/models/video/subsection_video_engagement.sql new file mode 100644 index 00000000..d7c78ef4 --- /dev/null +++ b/models/video/subsection_video_engagement.sql @@ -0,0 +1,72 @@ +{{ + config( + materialized="materialized_view", + schema=env_var("ASPECTS_XAPI_DATABASE", "xapi"), + engine=get_engine("ReplacingMergeTree()"), + primary_key="(org, course_key)", + order_by="(org, course_key, subsection_block_id, actor_id)", + ) +}} + +with + viewed_subsection_videos as ( + select distinct + date(emission_time) as viewed_on, + org, + course_key, + {{ section_from_display("video_name_with_location") }} as section_number, + {{ subsection_from_display("video_name_with_location") }} + as subsection_number, + actor_id, + video_id + from {{ ref("fact_video_plays") }} + ), + fact_video_engagement_per_subsection as ( + select + views.org as org, + views.course_key as course_key, + videos.section_with_name as section_with_name, + videos.subsection_with_name as subsection_with_name, + videos.item_count as item_count, + views.actor_id as actor_id, + views.video_id as video_id, + videos.subsection_block_id as subsection_block_id + from viewed_subsection_videos views + join + {{ ref("int_videos_per_subsection") }} videos + on ( + views.org = videos.org + and views.course_key = videos.course_key + and views.section_number = videos.section_number + and views.subsection_number = videos.subsection_number + ) + ), + subsection_counts as ( + select + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + count(distinct video_id) as videos_viewed, + case + when videos_viewed = 0 + then 'No videos viewed yet' + when videos_viewed = item_count + then 'All videos viewed' + else 'At least one video viewed' + end as engagement_level, + subsection_block_id + from fact_video_engagement_per_subsection + group by + org, + course_key, + section_with_name, + subsection_with_name, + actor_id, + item_count, + subsection_block_id + ) +select org, course_key, actor_id, subsection_block_id, engagement_level +from subsection_counts