-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproject.sql
More file actions
175 lines (117 loc) · 3.07 KB
/
project.sql
File metadata and controls
175 lines (117 loc) · 3.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
--Data cleaning
select *
from layoff_staging;
--created duplicated table 'layoff_staging' where we work on the data
create table layoff_staging
select *
from layoffs;
-- Main steps for data clenaing
-- Removing Duplicates
-- Standardizing values
-- Remove null and blank values and alter the table
-- duplicate remove
-- create cte to find the duplicate values and add row_num column to find repeated data
with duplicate_cte as
(
select *,
row_number () over (partition by company, location, industry, total_laid_off, percentage_laid_off,
`date`, stage, country, funds_raised_millions) as row_num
from layoff_staging
)
select *
from duplicate_cte
where company = 'casper';
select *
from layoff_staging;
-- create table 'layoff_staging2' to remove duplicates by using row_num column
CREATE TABLE `layoff_staging2` (
`company` text,
`location` text,
`industry` text,
`total_laid_off` int DEFAULT NULL,
`percentage_laid_off` text,
`date` text,
`stage` text,
`country` text,
`funds_raised_millions` int DEFAULT NULL,
`row_num` int
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci;
select *
from layoff_staging2;
-- inserting row_num values with whole data into 'layoff_staging2' table
insert into layoff_staging2
select *,
row_number () over (partition by company, location, industry, total_laid_off, percentage_laid_off,
`date`, stage, country, funds_raised_millions) as row_num
from layoff_staging;
--find the data where row_num values greater than 1 due to which find common values
select *
from layoff_staging2
where row_num >1 ;
select *
from layoff_staging2;
--deleting row_num values greter then 1
delete
from layoff_staging2
where row_num >1 ;
-- Standardizing
select *
from layoff_staging2 ;
select distinct company
from layoff_staging2
order by 1;
update layoff_staging2
set company = trim(company);
select distinct industry
from layoff_staging2
order by 1;
update layoff_staging2
set industry = null
where industry = '';
SELECT distinct industry
FROM layoff_staging2
WHERE industry LIKE 'crypto%';
update layoff_staging2
set industry = 'Crypto'
where industry like 'Crypto%';
select distinct country
from layoff_staging2
order by 1;
update layoff_staging2
set country = 'United Sates'
where country like 'united states%';
select `date`
from layoff_staging2;
select `date`,
str_to_date(`date`, '%m/%d/%Y')
from layoff_staging2;
update layoff_staging2
set `date` = str_to_date(`date`, '%m/%d/%Y');
select *
from layoff_staging2;
alter table layoff_staging2
modify column `date` date;
-- REMOVE NULL AND BLANK AND ALTER TABLE
SELECT *
FROM layoff_staging2;
SELECT distinct industry
FROM layoff_staging2
ORDER BY 1;
SELECT *
FROM layoff_staging2
WHERE industry IS NULL
ORDER BY industry;
UPDATE layoff_staging2 t1
JOIN layoff_staging2 t2
ON t1.company = t2.company
SET t1.industry = t2.industry
WHERE t1.industry IS NULL
AND t2.industry IS NOT NULL;
DELETE
FROM layoff_staging2
WHERE total_laid_off IS NULL
AND percentage_laid_off IS NULL;
ALTER TABLE layoff_staging2
DROP COLUMN row_num;
SELECT *
FROM layoff_staging2;