-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDS_job queries.sql
136 lines (120 loc) · 3.96 KB
/
DS_job queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
-- Let's check the data set
SELECT *
FROM ds_job
LIMIT 10;
--Removing the (glassdoor) from Salary_Estimate
SELECT Salary_Estimate,
SUBSTRING_INDEX(Salary_Estimate,' ', 1) salary_estimate
FROM ds_job;
-- Updating the table with new salary estimate
UPDATE ds_job
SET Salary_Estimate = SUBSTRING_INDEX(Salary_Estimate,' ', 1);
SELECT *
FROM ds_job;
-- Removing rating from the `Company_Name`
SELECT `Company_Name`,
SUBSTRING_INDEX(`Company_Name`, '
', 1) company_name
FROM ds_job;
-- Counting charactor length and comparing with the the Comapny_name column
WITH subtring AS (SELECT `Company_Name`,
SUBSTRING_INDEX(`Company_Name`, '
', 1) company
FROM ds_job)
SELECT D.`Company_Name`,
LENGTH(D.`Company_Name`) total_D_length,
S.company,
LENGTH(S.company) total_length
FROM ds_job D
JOIN subtring S
ON D.`Company_Name` = S.`Company_Name`;
-- Updating the column Company_Name
UPDATE ds_job
SET `Company_Name` = SUBSTRING_INDEX(`Company_Name`, '
', 1);
SELECT *
FROM ds_job;
-- Removing the 'Company' in `Type_of_ownership`
SELECT `Type_of_ownership`,
SUBSTRING_INDEX(`Type_of_ownership`, '- ', -1) ownership
FROM ds_job;
UPDATE ds_job
SET `Type_of_ownership` = SUBSTRING_INDEX(`Type_of_ownership`, '- ', -1);
-- See the new table
SELECT *
FROM ds_job;
-- Counting '-1' in each column these columns
SELECT (SELECT COUNT(Rating) FROM ds_job WHERE Rating = -1) rating,
(SELECT COUNT(`Headquarters`) FROM ds_job WHERE `Headquarters`= -1) headquarters,
(SELECT COUNT(Size) FROM ds_job WHERE `Size` = -1) size,
(SELECT COUNT(Type_of_ownership) FROM ds_job WHERE Type_of_ownership = -1) type_of_ownership,
(SELECT COUNT(Industry) FROM ds_job WHERE Industry = -1) industry,
(SELECT COUNT(Sector) FROM ds_job WHERE Sector = -1) sector,
(SELECT COUNT(Revenue) FROM ds_job WHERE Revenue = -1) revenue
FROM ds_job
LIMIT 1;
-- Now, removing the '-1' values with 'Unknown' and 'Unknown / Non-Applicable' with Unknown in Revenue column
UPDATE ds_job
SET `Rating` = REPLACE(Rating, '-1', '0')
WHERE `Rating` = -1;
UPDATE ds_job
SET `Headquarters` = REPLACE(`Headquarters`, '-1', 'Unknown')
WHERE `Headquarters` = '-1';
UPDATE ds_job
SET `Size` = REPLACE(`Size`, '-1', 'Unknown')
WHERE `Size` = '-1';
UPDATE ds_job
SET `Type_of_ownership` = REPLACE(`Type_of_ownership`, '-1', 'Unknown')
WHERE `Type_of_ownership` = '-1';
UPDATE ds_job
SET `Industry` = REPLACE(`Industry`, '-1', 'Unknown')
WHERE `Industry` = '-1';
UPDATE ds_job
SET `Sector` = REPLACE(`Sector`, '-1', 'Unknown')
WHERE `Sector` = '-1';
UPDATE ds_job
SET `Revenue` = REPLACE(`Revenue`, '-1', 'Unknown')
WHERE `Revenue` = '-1';
UPDATE ds_job
SET Revenue = REPLACE(`Revenue`, 'Unknown / Non-Applicable', 'Uknown')
WHERE `Revenue` = 'Unknown / Non-Applicable';
-- There was a spelling mistake while changing the Revenue column value 'Unknown / Non-Applicable' to 'Unkown'
-- Now, we're going to change 'Uknown' to 'Unknown'
SET Revenue = REPLACE(`Revenue`, 'Uknown', 'Unknown')
WHERE `Revenue` = 'Uknown';
-- Deleting unuse columns Index_, Founded and Competitors
ALTER TABLE ds_job
DROP COLUMN index_,
DROP COLUMN Founded,
DROP COLUMN Competitors;
--let's look at the cleaned dataset
SELECT *
FROM ds_job
LIMIT 10;
-- let's analyise it a little bit about job description
WITH skill AS (SELECT `Job_Description`,
CASE
WHEN `Job_Description` LIKE "%Python%"
THEN '1'
ELSE '0'
END AS Python,
CASE
WHEN `Job_Description` LIKE "%Excel%"
THEN '1'
ELSE '0'
END AS Excel,
CASE
WHEN `Job_Description` LIKE "%AWS%"
THEN '1'
ELSE '0'
END AS AWS,
CASE
WHEN `Job_Description` LIKE "%SQL%"
THEN '1'
ELSE '0'
END AS 'SQL'
FROM ds_job)
SELECT *
FROM ds_job d
JOIN skill s
ON d.`Job_Description` = s.`Job_Description`;