-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQLQuery1.sql
More file actions
213 lines (187 loc) · 5.47 KB
/
Copy pathSQLQuery1.sql
File metadata and controls
213 lines (187 loc) · 5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
USE OULAD;
GO
CREATE TABLE FlightsRaw (
FL_DATE DATE,
AIRLINE VARCHAR(100),
AIRLINE_DOT VARCHAR(10),
AIRLINE_CODE VARCHAR(10),
DOT_CODE VARCHAR(10),
FL_NUMBER VARCHAR(10),
ORIGIN VARCHAR(10),
ORIGIN_CITY VARCHAR(100),
DEST VARCHAR(10),
DEST_CITY VARCHAR(100),
CRS_DEP_TIME VARCHAR(10),
DEP_TIME VARCHAR(10),
DEP_DELAY FLOAT,
TAXI_OUT FLOAT,
WHEELS_OFF VARCHAR(10),
WHEELS_ON VARCHAR(10),
TAXI_IN FLOAT,
CRS_ARR_TIME VARCHAR(10),
ARR_TIME VARCHAR(10),
ARR_DELAY FLOAT,
CANCELLED BIT,
CANCELLATION_CODE VARCHAR(10),
DIVERTED BIT,
CRS_ELAPSED_TIME FLOAT,
ELAPSED_TIME FLOAT,
AIR_TIME FLOAT,
DISTANCE FLOAT,
DELAY_DUE_CARRIER FLOAT,
DELAY_DUE_WEATHER FLOAT,
DELAY_DUE_NAS FLOAT,
DELAY_DUE_SECURITY FLOAT,
DELAY_DUE_LATE_AIRCRAFT FLOAT
)
EXEC sp_configure 'show advanced options', 1;
RECONFIGURE;
EXEC sp_configure 'Ad Hoc Distributed Queries', 1;
RECONFIGURE;
EXEC sp_configure 'xp_cmdshell', 1;
RECONFIGURE;
DROP TABLE IF EXISTS FlightsRaw;
GO
CREATE TABLE FlightsRaw (
FL_DATE VARCHAR(MAX),
AIRLINE VARCHAR(MAX),
AIRLINE_DOT VARCHAR(MAX),
AIRLINE_CODE VARCHAR(MAX),
DOT_CODE VARCHAR(MAX),
FL_NUMBER VARCHAR(MAX),
ORIGIN VARCHAR(MAX),
ORIGIN_CITY VARCHAR(MAX),
DEST VARCHAR(MAX),
DEST_CITY VARCHAR(MAX),
CRS_DEP_TIME VARCHAR(MAX),
DEP_TIME VARCHAR(MAX),
DEP_DELAY VARCHAR(MAX),
TAXI_OUT VARCHAR(MAX),
WHEELS_OFF VARCHAR(MAX),
WHEELS_ON VARCHAR(MAX),
TAXI_IN VARCHAR(MAX),
CRS_ARR_TIME VARCHAR(MAX),
ARR_TIME VARCHAR(MAX),
ARR_DELAY VARCHAR(MAX),
CANCELLED VARCHAR(MAX),
CANCELLATION_CODE VARCHAR(MAX),
DIVERTED VARCHAR(MAX),
CRS_ELAPSED_TIME VARCHAR(MAX),
ELAPSED_TIME VARCHAR(MAX),
AIR_TIME VARCHAR(MAX),
DISTANCE VARCHAR(MAX),
DELAY_DUE_CARRIER VARCHAR(MAX),
DELAY_DUE_WEATHER VARCHAR(MAX),
DELAY_DUE_NAS VARCHAR(MAX),
DELAY_DUE_SECURITY VARCHAR(MAX),
DELAY_DUE_LATE_AIRCRAFT VARCHAR(MAX)
);
BULK INSERT FlightsRaw
FROM 'C:\Users\dell\Downloads\flights_sample_3m.csv'
WITH (
FIRSTROW = 2,
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n',
CODEPAGE = '65001',
TABLOCK
);
select *
from
FlightsRaw
-- Update all textual columns
UPDATE FlightsRaw
SET
AIRLINE = LTRIM(RTRIM(AIRLINE)),
AIRLINE_DOT = LTRIM(RTRIM(AIRLINE_DOT)),
AIRLINE_CODE = LTRIM(RTRIM(AIRLINE_CODE)),
DOT_CODE = LTRIM(RTRIM(DOT_CODE)),
ORIGIN = LTRIM(RTRIM(ORIGIN)),
ORIGIN_CITY = LTRIM(RTRIM(ORIGIN_CITY)),
DEST = LTRIM(RTRIM(DEST)),
DEST_CITY = LTRIM(RTRIM(DEST_CITY)),
CANCELLATION_CODE = LTRIM(RTRIM(CANCELLATION_CODE));
select top 10 FL_DATE
from
FlightsRaw
UPDATE FlightsRaw
SET ORIGIN_CITY =
LTRIM(RTRIM(REPLACE(REPLACE(ORIGIN_CITY, '"', ''), '""', '')));
UPDATE FlightsRaw
SET DEST = LTRIM(RTRIM(REPLACE(REPLACE(DEST, '"', ''), '""', '')))
WHERE DEST LIKE '"%' OR DEST LIKE '%"';
UPDATE FlightsRaw
SET CRS_DEP_TIME = LTRIM(RTRIM(REPLACE(REPLACE(CRS_DEP_TIME, '"', ''), '""', '')))
WHERE CRS_DEP_TIME LIKE '"%' OR CRS_DEP_TIME LIKE '%"';
UPDATE FlightsRaw
SET DEP_TIME = LTRIM(RTRIM(REPLACE(REPLACE(DEP_TIME, '"', ''), '""', '')))
WHERE DEP_TIME LIKE '"%' OR DEP_TIME LIKE '%"';
ALTER TABLE FlightsRaw DROP COLUMN AIRLINE_DOT;
ALTER TABLE FlightsRaw DROP COLUMN DOT_CODE;
ALTER TABLE FlightsRaw DROP COLUMN DIVERTED;
ALTER TABLE FlightsRaw DROP COLUMN ELAPSED_TIME;
ALTER TABLE FlightsRaw DROP COLUMN CRS_ELAPSED_TIME;
WITH CTE_Duplicates AS (
SELECT *,
ROW_NUMBER() OVER (
PARTITION BY FL_DATE, AIRLINE, FL_NUMBER, ORIGIN, DEST, DEP_DELAY, TAXI_OUT
ORDER BY (SELECT NULL)
) AS rn
FROM FlightsRaw
)
DELETE FROM CTE_Duplicates
WHERE rn > 1;
--Query: Flights with delay greater than 15 minutes
SELECT FL_DATE, AIRLINE, FL_NUMBER, DEP_DELAY, ARR_DELAY
FROM FlightsRaw
WHERE ARR_DELAY > 15
ORDER BY ARR_DELAY DESC;
SELECT Count(AIRLINE)
FROM FlightsRaw
WHERE ARR_DELAY > 15
ORDER BY ARR_DELAY DESC;
--Query: Average delay by airline
SELECT AIRLINE,
AVG(TRY_CAST(DEP_DELAY AS FLOAT)) AS avg_departure_delay
FROM FlightsRaw
WHERE ISNUMERIC(DEP_DELAY) = 1
GROUP BY AIRLINE;
--Query: Top 10 origin-destination pairs
SELECT ORIGIN, DEST, COUNT(*) AS total_flights
FROM FlightsRaw
GROUP BY ORIGIN, DEST
ORDER BY total_flights DESC
OFFSET 0 ROWS FETCH NEXT 10 ROWS ONLY;
--Delay by departure hour
SELECT
LEFT(RIGHT('0000' + CAST(CRS_DEP_TIME AS VARCHAR(4)), 4), 2) AS dep_hour,
COUNT(*) AS flights,
AVG(TRY_CAST(DEP_DELAY AS FLOAT)) AS avg_delay
FROM FlightsRaw
WHERE ISNUMERIC(CRS_DEP_TIME) = 1 AND ISNUMERIC(DEP_DELAY) = 1
GROUP BY LEFT(RIGHT('0000' + CAST(CRS_DEP_TIME AS VARCHAR(4)), 4), 2)
ORDER BY dep_hour;
--Cancellation reasons distribution
SELECT CANCELLATION_CODE, COUNT(*) AS cancelled_count
FROM FlightsRaw
WHERE CANCELLED = 1
GROUP BY CANCELLATION_CODE;
--Flights with min and max air time
SELECT TOP 5 *
FROM FlightsRaw
WHERE AIR_TIME IS NOT NULL
ORDER BY AIR_TIME DESC;
SELECT TOP 5 *
FROM FlightsRaw
WHERE AIR_TIME IS NOT NULL
ORDER BY AIR_TIME ASC;
--Flights per day
SELECT FL_DATE, COUNT(*) AS flight_count
FROM FlightsRaw
GROUP BY FL_DATE
ORDER BY FL_DATE;
--Flights delayed due to weather
SELECT FL_DATE, AIRLINE, ORIGIN, DEST,
DELAY_DUE_WEATHER
FROM FlightsRaw
WHERE DELAY_DUE_WEATHER > 0
ORDER BY DELAY_DUE_WEATHER DESC;