More Classes with the Flight Dataset
This example is from TDM 102 Project 12 Spring 2024.
These example(s) depend on the database:
-
/anvil/projects/tdm/data/flights/2014.csv
Learn more about the dataset here.
|
You need to use 2 cores for your Jupyter Lab session for this example |
|
You can use |
1a. In the previous project, you created a class named Flight, which contains attributes for the flight number, origin airport ID, destination airport ID, departure time, arrival time, departure delay, and arrival delay. Now let us use this class as a base class. Create a new subclass called ScheduledFlight. Add 2 more attributes to this new subclass: CRSDepTime and CRSArrTime.
class Flight:
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay):
self.flight_number = flight_number
self.origin_airport_id = origin_airport_id
self.dest_airport_id = dest_airport_id
self.dep_time = dep_time
self.arr_time = arr_time
self.dep_delay = dep_delay
self.arr_delay = arr_delay
class ScheduledFlight(Flight):
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay)
self.crs_dep_time = crs_dep_time
self.crs_arr_time = crs_arr_time
1b. Add a method called is_ontime() to the class, which returns a boolean value that indicates if the flight departs on time and arrives on time.
class Flight:
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay):
self.flight_number = flight_number
self.origin_airport_id = origin_airport_id
self.dest_airport_id = dest_airport_id
self.dep_time = dep_time
self.arr_time = arr_time
self.dep_delay = dep_delay
self.arr_delay = arr_delay
class ScheduledFlight(Flight):
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay)
self.crs_dep_time = crs_dep_time
self.crs_arr_time = crs_arr_time
def is_ontime(self):
return self.dep_delay <= 0 and self.arr_delay <= 0
2a. Create a DataFrame named myDF, to store data from the 2014.csv data set. It suffices to import (only) the columns listed below, and to (only) read in the first 100 rows. Although we provide the columns_to_read, please make (and use) a dictionary of col_types like we did in Question 1 of Project 10.
col_types = {"Year":"int64",
"Quarter":"int64",
"Month":"int64",
"DayofMonth":"int64",
"DayOfWeek":"int64",
"FlightDate":"object",
"Reporting_Airline":"object",
"DOT_ID_Reporting_Airline":"int64",
"IATA_CODE_Reporting_Airline":"object",
"Tail_Number":"object",
"Flight_Number_Reporting_Airline":"int64",
"OriginAirportID":"int64",
"OriginAirportSeqID":"int64",
"OriginCityMarketID":"int64",
"Origin":"object",
"OriginCityName":"object",
"OriginState":"object",
"OriginStateFips":"int64",
"OriginStateName":"object",
"OriginWac":"int64",
"DestAirportID":"int64",
"DestAirportSeqID":"int64",
"DestCityMarketID":"int64",
"Dest":"object",
"DestCityName":"object",
"DestState":"object",
"DestStateFips":"int64",
"DestStateName":"object",
"DestWac":"int64",
"CRSDepTime":"int64",
"DepTime":"float64",
"DepDelay":"float64",
"DepDelayMinutes":"float64",
"DepDel15":"float64",
"DepartureDelayGroups":"float64",
"DepTimeBlk":"object",
"TaxiOut":"float64",
"WheelsOff":"float64",
"WheelsOn":"float64",
"TaxiIn":"float64",
"CRSArrTime":"int64",
"ArrTime":"float64",
"ArrDelay":"float64",
"ArrDelayMinutes":"float64",
"ArrDel15":"float64",
"ArrivalDelayGroups":"float64",
"ArrTimeBlk":"object",
"Cancelled":"float64",
"CancellationCode":"object",
"Diverted":"float64",
"CRSElapsedTime":"float64",
"ActualElapsedTime":"float64",
"AirTime":"float64",
"Flights":"float64",
"Distance":"float64",
"DistanceGroup":"int64",
"CarrierDelay":"float64",
"WeatherDelay":"float64",
"NASDelay":"float64",
"SecurityDelay":"float64",
"LateAircraftDelay":"float64",
"FirstDepTime":"float64",
"TotalAddGTime":"float64",
"LongestAddGTime":"float64",
"DivAirportLandings":"int64",
"DivReachedDest":"float64",
"DivActualElapsedTime":"float64",
"DivArrDelay":"float64",
"DivDistance":"float64",
"Div1Airport":"object",
"Div1AirportID":"float64",
"Div1AirportSeqID":"float64",
"Div1WheelsOn":"float64",
"Div1TotalGTime":"float64",
"Div1LongestGTime":"float64",
"Div1WheelsOff":"float64",
"Div1TailNum":"object",
"Div2Airport":"float64",
"Div2AirportID":"float64",
"Div2AirportSeqID":"float64",
"Div2WheelsOn":"float64",
"Div2TotalGTime":"float64",
"Div2LongestGTime":"float64",
"Div2WheelsOff":"float64",
"Div2TailNum":"float64",
"Div3Airport":"float64",
"Div3AirportID":"float64",
"Div3AirportSeqID":"float64",
"Div3WheelsOn":"float64",
"Div3TotalGTime":"float64",
"Div3LongestGTime":"float64",
"Div3WheelsOff":"float64",
"Div3TailNum":"float64",
"Div4Airport":"float64",
"Div4AirportID":"float64",
"Div4AirportSeqID":"float64",
"Div4WheelsOn":"float64",
"Div4TotalGTime":"float64",
"Div4LongestGTime":"float64",
"Div4WheelsOff":"float64",
"Div4TailNum":"float64",
"Div5Airport":"float64",
"Div5AirportID":"float64",
"Div5AirportSeqID":"float64",
"Div5WheelsOn":"float64",
"Div5TotalGTime":"float64",
"Div5LongestGTime":"float64",
"Div5WheelsOff":"float64",
"Div5TailNum":"float64"
}
import pandas as pd
filepath = '/anvil/projects/tdm/data/flights/2014.csv'
columns_to_read = [
'DepDelay', 'ArrDelay', 'Flight_Number_Reporting_Airline', 'Distance',
'CarrierDelay', 'WeatherDelay', 'CRSDepTime', 'CRSArrTime',
'DepTime', 'ArrTime', 'Origin', 'Dest', 'AirTime'
]
col_types = {
'DepDelay': 'float64', 'ArrDelay': 'float64', 'Flight_Number_Reporting_Airline': 'int64',
'Distance': 'float64', 'CarrierDelay': 'float64', 'WeatherDelay': 'float64',
'CRSDepTime': 'int64', 'CRSArrTime': 'int64', 'DepTime': 'int64', 'ArrTime': 'int64',
'Origin': 'object', 'Dest': 'object', 'AirTime': 'float64'
}
myDF = pd.read_csv(filepath, usecols=columns_to_read, nrows=100, dtype=col_types)
scheduled_flights = [
ScheduledFlight(
row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
row['CRSDepTime'], row['CRSArrTime']
) for index, row in myDF.iterrows()
]
2b. Load the data from myDF into the ScheduledFlight class instances. (When you are finished, you should have a list of 100 ScheduledFlight instances.)
scheduled_flights = [
ScheduledFlight(
row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
row['CRSDepTime'], row['CRSArrTime']
) for index, row in myDF.iterrows()
]
3a. Create an empty dictionary named ontime_count. Then use a for loop to assign values to ontime_count from the 100 ScheduledFlight objects.
ontime_count = {}
for flight in scheduled_flights:
dest = flight.dest_airport_id
ontime = flight.is_ontime()
if ontime:
ontime_count[dest] = ontime_count.get(dest, 0) + 1
3b. Calculate the total number of flights that were on time, for each destination airport.
print(ontime_count)
{'ICT': 1, 'DFW': 12, 'TPA': 11, 'OGG': 7, 'SJC': 3, 'KOA': 4, 'SMF': 1, 'SEA': 7, 'PDX': 3, 'OAK': 2, 'HNL': 6, 'ANC': 2, 'LIH': 2, 'SAN': 2, 'BLI': 1, 'DCA': 2}
4a. Add a method called is_delayed() to the class that indicates if the flight was delayed (either had a departure delay or an arrival delay).
class DelayedFlight(ScheduledFlight):
def __init__(self, flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time):
super().__init__(flight_number, origin_airport_id, dest_airport_id, dep_time, arr_time, dep_delay, arr_delay, crs_dep_time, crs_arr_time)
def is_delayed(self):
return self.dep_delay > 0 or self.arr_delay > 0
delayed_flights = [
DelayedFlight(
row['Flight_Number_Reporting_Airline'], row['Origin'], row['Dest'],
row['DepTime'], row['ArrTime'], row['DepDelay'], row['ArrDelay'],
row['CRSDepTime'], row['CRSArrTime']
) for index, row in myDF.iterrows()
]
4b. Calculate the total number of delayed flights, for each destination airport.
delayed_count = {}
for flight in delayed_flights:
dest = flight.dest_airport_id
if flight.is_delayed():
delayed_count[dest] = delayed_count.get(dest, 0) + 1
print(delayed_count)
{'ICT': 1, 'DFW': 19, 'TPA': 9, 'SAN': 1, 'HNL': 1, 'SEA': 2, 'OGG': 1}