Q1

Q1

Q2

Q2

之后执行

wget https://github.com/DataTalksClub/nyc-tlc-data/releases/download/green/green_tripdata_2019-10.csv.gz wget https://github.com/DataTalksClub/nyc-tlc-data/releases/download/misc/taxi_zone_lookup.csv

下载数据

image-20250117164137006

Q3

image-20250117181036231

1
2
3
4
5
6
7
8
9
10
11
12
SELECT
SUM(CASE WHEN trip_distance <= 1 THEN 1 ELSE 0 END) AS up_to_1_mile,
SUM(CASE WHEN trip_distance > 1 AND trip_distance <= 3 THEN 1 ELSE 0 END) AS between_1_and_3_miles,
SUM(CASE WHEN trip_distance > 3 AND trip_distance <= 7 THEN 1 ELSE 0 END) AS between_3_and_7_miles,
SUM(CASE WHEN trip_distance > 7 AND trip_distance <= 10 THEN 1 ELSE 0 END) AS between_7_and_10_miles,
SUM(CASE WHEN trip_distance > 10 THEN 1 ELSE 0 END) AS over_10_miles
FROM
green_tripdata
WHERE
lpep_pickup_datetime >= '2019-10-01' AND lpep_pickup_datetime < '2019-11-01'
AND
lpep_dropoff_datetime >= '2019-10-01' AND lpep_dropoff_datetime < '2019-11-01';

Q4

image-20250117181534535

也可以用limit 1

1
2
3
4
5
6
7
8
9
10
11
12
SELECT
CAST(lpep_pickup_datetime AS DATE) AS pickup_day,
MAX(trip_distance) as longest_trip
FROM
green_tripdata
WHERE
lpep_pickup_datetime >= '2019-10-01' AND lpep_pickup_datetime < '2019-11-01'
GROUP BY
CAST(lpep_pickup_datetime AS DATE)
ORDER BY
longest_trip DESC
LIMIT 1;

Q5

image-20250117213546136

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
SELECT 
z."Zone" AS "pickup_zone",
SUM(g."total_amount") AS "total_amount"
FROM
Green_tripdata g,
taxi_zones z
WHERE
g."PULocationID" = z."LocationID" AND
g."lpep_pickup_datetime" >= '2019-10-18' AND
g."lpep_pickup_datetime" < '2019-10-19'
GROUP BY
z."Zone"
HAVING
SUM(g."total_amount") > 13000
ORDER BY
SUM(g."total_amount") DESC;

Q6

image-20250117215822205

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
SELECT 
dz."Zone" AS "dropoff_zone",
MAX(g."tip_amount") AS "max_tip_amount"
FROM
Green_tripdata g
JOIN
taxi_zones pz ON g."PULocationID" = pz."LocationID"
JOIN
taxi_zones dz ON g."DOLocationID" = dz."LocationID"
WHERE
g."lpep_pickup_datetime" >= '2019-10-01' AND
g."lpep_pickup_datetime" < '2019-10-31' AND
pz."Zone" = 'East Harlem North'
GROUP BY
dz."Zone"
ORDER BY
MAX(g."tip_amount") DESC
LIMIT 1;

Q7

image-20250117233324310

image-20250117233402410