-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcovid_dataExplore.sql
144 lines (120 loc) · 5.55 KB
/
covid_dataExplore.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
-- /*
-- Covid 19 Data Exploration
-- Skills used: Joins, CTE's, Temp Tables creation, Windows Functions, Aggregate Functions, Creating Views.
-- */
-- select * from covid_deaths
-- select * from covid_vaccinations
-- Death percentage calculation
SELECT LOCATION, DATE, TOTAL_CASES, TOTAL_DEATHS, (TOTAL_DEATHS/TOTAL_CASES) * 100 AS DEATH_PERCENTAGE
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
ORDER BY 1,2
-- Death percentage calculation (filter by location)
SELECT LOCATION, DATE, TOTAL_CASES, TOTAL_DEATHS, (TOTAL_DEATHS/TOTAL_CASES) * 100 AS DEATH_PERCENTAGE
FROM COVID_DEATHS
WHERE LOCATION = "India"
ORDER BY 1,2
-- calculate Percentage of population affected by covid
SELECT LOCATION, DATE, TOTAL_CASES, POPULATION, (TOTAL_CASES/POPULATION) * 100 AS INFECTED_POPULATION_PERCENTAGE
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
-- and location = "India"
ORDER BY 1,2
-- Find countries with the Highest infection rate compared to population ***
SELECT LOCATION, POPULATION, MAX(TOTAL_CASES) AS HIGHEST_TOTALCASE_COUNT, MAX((TOTAL_CASES/POPULATION)) * 100 AS HIGHEST_INFECTED_POPULATION_PERCENTAGE
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
GROUP BY LOCATION,POPULATION
ORDER BY HIGHEST_INFECTED_POPULATION_PERCENTAGE DESC
-- Find countries with the Highest mortality rate compared to population ****
SELECT LOCATION, POPULATION, MAX(CAST(TOTAL_DEATHS AS UNSIGNED)) AS HIGHEST_TOTALDEATH_COUNT, (MAX(CAST(TOTAL_DEATHS AS UNSIGNED))/POPULATION) * 100 AS HIGHEST_DEATH_PERCENTAGE
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
GROUP BY LOCATION,POPULATION
ORDER BY HIGHEST_DEATH_PERCENTAGE DESC
-- Countries with Highest Death Count ******
SELECT LOCATION, MAX(CAST(TOTAL_DEATHS AS UNSIGNED)) AS TOTALDEATHCOUNT
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
-- and location = 'India'
GROUP BY LOCATION
ORDER BY TOTALDEATHCOUNT DESC
-- Continents with highest death COUNT **
SELECT CONTINENT, MAX(CAST(TOTAL_DEATHS AS UNSIGNED)) AS HIGHEST_DEATHCOUNT
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
GROUP BY CONTINENT
ORDER BY HIGHEST_DEATHCOUNT DESC
-- Global data analysis (filter by per day)
SELECT DATE, SUM(NEW_CASES) AS SUM_TOTAL_CASES, SUM(NEW_DEATHS) AS SUM_TOTAL_DEATHS, SUM(NEW_DEATHS)/SUM(NEW_CASES) *100 AS TOTAL_DEATH_PERCENT
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
GROUP BY DATE
ORDER BY TOTAL_DEATH_PERCENT DESC
-- Global data analysis: total death percentage globally upto date 01/25/2022
SELECT SUM(NEW_CASES) AS SUM_TOTAL_CASES, SUM(NEW_DEATHS) AS SUM_TOTAL_DEATHS, SUM(NEW_DEATHS)/SUM(NEW_CASES) *100 AS TOTAL_DEATH_PERCENT
FROM COVID_DEATHS
WHERE CONTINENT IS NOT NULL
-- -----------------------------------
-- ********Joins******
-- total population vs vaccinations
SELECT DEATH.CONTINENT, DEATH.LOCATION, DEATH.DATE, DEATH.POPULATION, VACC.NEW_VACCINATIONS
FROM COVID_DEATHS DEATH
JOIN COVID_VACCINATIONS VACC
ON DEATH.LOCATION = VACC.LOCATION
AND DEATH.DATE = VACC.DATE
WHERE DEATH.CONTINENT IS NOT NULL
ORDER BY 1,2
-- total population vs vaccinations : calculate rolling sum of population that has received covid vaccination
SELECT DEATH.CONTINENT, DEATH.LOCATION, DEATH.DATE, DEATH.POPULATION, VACC.NEW_VACCINATIONS,
SUM(VACC.NEW_VACCINATIONS) OVER (PARTITION BY DEATH.LOCATION ORDER BY DEATH.LOCATION,DEATH.DATE) AS TOTALPOPULATION_VACCINATED
FROM COVID_DEATHS DEATH
JOIN COVID_VACCINATIONS VACC
ON DEATH.LOCATION = VACC.LOCATION
AND DEATH.DATE = VACC.DATE
WHERE DEATH.CONTINENT IS NOT NULL
ORDER BY 2,3
-- using CTE to perform calculation on partition by in previous query: calculate percentage of population that has received covid vaccination
WITH VACCINATED_POP_PERCENT (CONTINENT, LOCATION, DATE, POPULATION, NEW_VACCINATIONS, TOTALPOPULATION_VACCINATED)
AS
(SELECT DEATH.CONTINENT, DEATH.LOCATION, DEATH.DATE, DEATH.POPULATION, VACC.NEW_VACCINATIONS,
SUM(VACC.NEW_VACCINATIONS) OVER (PARTITION BY DEATH.LOCATION ORDER BY DEATH.LOCATION,DEATH.DATE) AS TOTALPOPULATION_VACCINATED
FROM COVID_DEATHS DEATH
JOIN COVID_VACCINATIONS VACC
ON DEATH.LOCATION = VACC.LOCATION
AND DEATH.DATE = VACC.DATE
WHERE DEATH.CONTINENT IS NOT NULL
)
SELECT *, (TOTALPOPULATION_VACCINATED/POPULATION)*100 AS VACCINATED_POP
FROM VACCINATED_POP_PERCENT
--- using temp table to perform calculation on partition by in previous query: calculate percentage of population that has received covid vaccination
DROP TABLE IF EXISTS VACCINATED_POPULATION_PERCET
CREATE TABLE VACCINATED_POPULATION_PERCET (
CONTINENT TEXT,
LOCATION TEXT,
PER DATETIME,
POPULATION BIGINT,
NEW_VACCINATION BIGINT,
TOTALPOPULATION_VACCINATED BIGINT);
INSERT INTO VACCINATED_POPULATION_PERCET
SELECT DEATH.CONTINENT, DEATH.LOCATION, DEATH.DATE, DEATH.POPULATION, VACC.NEW_VACCINATIONS,
SUM(VACC.NEW_VACCINATIONS) OVER (PARTITION BY DEATH.LOCATION ORDER BY DEATH.LOCATION,DEATH.DATE) AS TOTALPOPULATION_VACCINATED
FROM COVID_DEATHS DEATH
JOIN COVID_VACCINATIONS VACC
ON DEATH.LOCATION = VACC.LOCATION
AND DEATH.DATE = VACC.DATE
--where death.continent is not null
--order by 2,3
SELECT *, (TOTALPOPULATION_VACCINATED/POPULATION)*100 AS VACCINATED_POP
FROM VACCINATED_POPULATION_PERCET
-- **** Views *******
-- creating views to store data
CREATE VIEW VACCINATED_POPULATION_PERCENT AS
SELECT DEATH.CONTINENT, DEATH.LOCATION, DEATH.DATE, DEATH.POPULATION, VACC.NEW_VACCINATIONS,
SUM(VACC.NEW_VACCINATIONS) OVER (PARTITION BY DEATH.LOCATION ORDER BY DEATH.LOCATION,DEATH.DATE) AS TOTALPOPULATION_VACCINATED
FROM COVID_DEATHS DEATH
JOIN COVID_VACCINATIONS VACC
ON DEATH.LOCATION = VACC.LOCATION
AND DEATH.DATE = VACC.DATE
WHERE DEATH.CONTINENT IS NOT NULL
--order by 2,3