Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

averageDailyFlights_Matrix added #37

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions api_aerodatabox/averageDailyFlights_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
import json
import pandas as pd
from scipy.sparse import csr_matrix, save_npz
from collections import defaultdict

# Base directory containing all the monthly folders
base_directory = '.'

# List of month folders
month_folders = [f'{i:02d}-{month}' for i, month in enumerate(
["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"], 1)]

# Function to update the matrix
def update_matrix(data, origin, destination, avg_daily_flights):
data[origin][destination] += float(avg_daily_flights)

# Loop through each monthly folder
for month_folder in month_folders:
directory = os.path.join(base_directory, month_folder)

# Initialize an empty dictionary for the matrix data
data = defaultdict(lambda: defaultdict(float))

if os.path.exists(directory) and os.path.isdir(directory):
# Loop through all JSON files in the directory
file_list = [f for f in os.listdir(directory) if f.endswith('.json')]

for i, filename in enumerate(file_list):
origin = filename.split('.')[0]
filepath = os.path.join(directory, filename)

with open(filepath, 'r') as file:
try:
data_json = json.load(file)
for route in data_json.get('routes', []):
destination_info = route.get('destination', {})
destination = destination_info.get('icao')
avg_daily_flights = route.get('averageDailyFlights')
if destination and avg_daily_flights is not None:
update_matrix(data, origin, destination, avg_daily_flights)
except json.JSONDecodeError:
print(f"Error decoding JSON from file {filename}")

# Save the matrix periodically
if (i + 1) % 100 == 0 or (i + 1) == len(file_list):
print(f'Processed {i + 1}/{len(file_list)} files in {month_folder}...')

# Create DataFrame from the data dictionary
matrix_df = pd.DataFrame.from_dict(data, orient='index').fillna(0).astype(pd.SparseDtype("float", 0))

# Convert DataFrame to sparse matrix and save
sparse_matrix = csr_matrix(matrix_df.sparse.to_coo())
save_npz(f'{month_folder}.npz', sparse_matrix)

print(f'Saved matrix for {month_folder}.')

print('All files processed successfully.')
Binary file not shown.
15 changes: 15 additions & 0 deletions api_aerodatabox/case_study/aircraft_seat_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,19 @@
"Gulfstream GIII": 19,
"Bombardier Challenger 300": 9,
"Gulfstream Aerospace V": 19,
"Cessna 560 Citation Excel": 8,
"Airbus A321-200 (Sharklets)": 185,
"Bombardier Dash 8 / DHC-8": 37,
"77I": 0,
"78I": 0,
"Airbus A330-900 NEO": 287,
"Boeing 737-700 (winglets)": 128,
"Sukhoi Superjet 100": 98,
"Boeing 767-300 (winglets)": 218,
"ATR 72": 70,
"78E": 0,
"77N": 0,
"Boeing 787-10": 318,
"32E": 0,
"Airbus A321-200 (Sharklets)": 185,
}
6 changes: 3 additions & 3 deletions api_aerodatabox/case_study/data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def generate_time_intervals(start_date, end_date, delta):
current += delta

# Airports list
airports = ["DNMM", "DNAA", "KLAX", "KSFO", "YMML", "YSSY"]
airports = ["OEJN", "OERK"]

# API details
base_url = "https://aerodatabox.p.rapidapi.com/flights/airports/icao/{airport}/{start}/{end}"
Expand All @@ -31,8 +31,8 @@ def generate_time_intervals(start_date, end_date, delta):
}

# Time interval settings
start_date = datetime(2023, 5, 25)
end_date = datetime(2024, 5, 23)
start_date = datetime(2023, 6, 13)
end_date = datetime(2023, 7, 13)
time_interval = timedelta(hours=12)

# Create output directory if it doesn't exist
Expand Down
Loading