Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
AbhiFutureTech committed May 11, 2024
0 parents commit 6805cf7
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions .idea/.name

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions .idea/EDA.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

100 changes: 100 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import sns

df= pd.read_csv(r"E:\projects\Chat Bot\coaster_db.csv")

print(df.shape)
print(df.head(5))
print(df.columns)
print(df.dtypes)
print(df.describe())

# Example of dropping columns
# df.drop(['Opening date'], axis=1)

df = df[['coaster_name',
# 'Length', 'Speed',
'Location', 'Status',
# 'Opening date',
# 'Type',
'Manufacturer',
#Height restriction', 'Model', 'Height'
# 'Inversions', 'Lift/launch system', 'Cost', 'Trains', 'Park section',
# 'Duration', 'Capacity', 'G-force', 'Designer', 'Max vertical angle',
# 'Drop', 'Soft opening date', 'Fast Lane available', 'Replaced',
# 'Track layout', 'Fastrack available', 'Soft opening date.1',
# 'Closing date',
# 'Opened',
# 'Replaced by', 'Website',
# 'Flash Pass Available', 'Must transfer from wheelchair', 'Theme',
# 'Single rider line available', 'Restraint Style',
# 'Flash Pass available', 'Acceleration', 'Restraints', 'Name',
'year_introduced',
'latitude', 'longitude',
'Type_Main',
'opening_date_clean',
#'speed1', 'speed2', 'speed1_value', 'speed1_unit',
'speed_mph',
#'height_value', 'height_unit',
'height_ft',
'Inversions_clean', 'Gforce_clean']].copy()

df['opening_date_clean'] = pd.to_datetime(df['opening_date_clean'])

# Rename our columns
df = df.rename(columns={'coaster_name':'Coaster_Name',
'year_introduced':'Year_Introduced',
'opening_date_clean':'Opening_Date',
'speed_mph':'Speed_mph',
'height_ft':'Height_ft',
'Inversions_clean':'Inversions',
'Gforce_clean':'Gforce'})

print(df.isna().sum())
print(df.loc[df.duplicated()])


# Check for duplicate coaster name
print(df.loc[df.duplicated(subset=['Coaster_Name'])].head(5))

# Checking an example duplicate
print(df.query('Coaster_Name == "Crystal Beach Cyclone"'))

print(df.columns)

df = df.loc[~df.duplicated(subset=['Coaster_Name','Location','Opening_Date'])] \
.reset_index(drop=True).copy()

print(df['Year_Introduced'].value_counts())

ax = df['Year_Introduced'].value_counts() \
.head(10) \
.plot(kind='bar', title='Top 10 Years Coasters Introduced')
print(ax.set_xlabel('Year Introduced'))
print(ax.set_ylabel('Count'))

ax: object = df['Speed_mph'].plot(kind='hist',
bins=20,
title='Coaster Speed (mph)')
print(ax.set_xlabel('Speed (mph)'))

print(df['Type_Main'].value_counts())


df.plot(kind='scatter',
x='Speed_mph',
y='Height_ft',
title='Coaster Speed vs. Height')
print(plt.show())

ax = df.query('Location != "Other"') \
.groupby('Location')['Speed_mph'] \
.agg(['mean','count']) \
.query('count >= 10') \
.sort_values('mean')['mean'] \
.plot(kind='barh', figsize=(12, 5), title='Average Coast Speed by Location')
ax.set_xlabel('Average Coaster Speed')
print(plt.show())

0 comments on commit 6805cf7

Please sign in to comment.