Skip to content

Commit

Permalink
updated formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
jessepisel committed Mar 30, 2023
1 parent 40ee3be commit 8cda165
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 31 deletions.
75 changes: 45 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
```

Expand Down Expand Up @@ -131,8 +132,18 @@ assert heroes_df.shape[0] == 734
assert heroes_df.shape[1] == 10

# These should be the columns
assert list(heroes_df.columns) == ['name', 'Gender', 'Eye color', 'Race',
'Hair color', 'Height', 'Publisher', 'Skin color', 'Alignment', 'Weight']
assert list(heroes_df.columns) == [
"name",
"Gender",
"Eye color",
"Race",
"Hair color",
"Height",
"Publisher",
"Skin color",
"Alignment",
"Weight",
]
```

Now you want to get familiar with the data. This step includes:
Expand Down Expand Up @@ -186,16 +197,16 @@ The following code will check if it was loaded correctly:
assert powers_df.shape == (167, 667)

# The first column should be '3-D Man'
assert powers_df.columns[0] == '3-D Man'
assert powers_df.columns[0] == "3-D Man"

# The last column should be 'Zoom'
assert powers_df.columns[-1] == 'Zoom'
assert powers_df.columns[-1] == "Zoom"

# The first index should be 'Agility'
assert powers_df.index[0] == 'Agility'
assert powers_df.index[0] == "Agility"

# The last index should be 'Omniscient'
assert powers_df.index[-1] == 'Omniscient'
assert powers_df.index[-1] == "Omniscient"
```

## 2. Perform Data Cleaning Required to Answer First Question
Expand All @@ -211,14 +222,18 @@ As you likely noted above, the `Publisher` column is missing some values. Let's

```python
# Run this cell without changes
has_publisher_sample = heroes_df[heroes_df["Publisher"].notna()].sample(5, random_state=1)
has_publisher_sample = heroes_df[heroes_df["Publisher"].notna()].sample(
5, random_state=1
)
has_publisher_sample
```


```python
# Run this cell without changes
missing_publisher_sample = heroes_df[heroes_df["Publisher"].isna()].sample(5, random_state=1)
missing_publisher_sample = heroes_df[heroes_df["Publisher"].isna()].sample(
5, random_state=1
)
missing_publisher_sample
```

Expand Down Expand Up @@ -381,7 +396,7 @@ assert heroes_and_powers_df.shape[0] == 647
# modify this test. We are checking that all of the powers are present as
# columns.)
assert [power in heroes_and_powers_df.columns for power in powers_df.index]
# (If you modified the value of heroes_df along the way, you might need to
# (If you modified the value of heroes_df along the way, you might need to
# modify this as well. We are checking that all of the attribute columns from
# heroes_df are present as columns in the joined df)
assert [attribute in heroes_and_powers_df.columns for attribute in heroes_df.columns]
Expand All @@ -395,7 +410,9 @@ Now that we have created a joined dataframe, we can aggregate the number of supe

# Note: we can use sum() with True and False values and they will
# automatically be cast to 1s and 0s
heroes_and_powers_df["Power Count"] = sum([heroes_and_powers_df[power_name] for power_name in powers_df.index])
heroes_and_powers_df["Power Count"] = sum(
[heroes_and_powers_df[power_name] for power_name in powers_df.index]
)
heroes_and_powers_df
```

Expand All @@ -410,9 +427,7 @@ Now we can plot the height vs. the count of powers:
fig, ax = plt.subplots(figsize=(16, 8))

ax.scatter(
x=heroes_and_powers_df["Height"],
y=heroes_and_powers_df["Power Count"],
alpha=0.3
x=heroes_and_powers_df["Height"], y=heroes_and_powers_df["Power Count"], alpha=0.3
)

ax.set_xlabel("Height (cm)")
Expand Down Expand Up @@ -459,11 +474,7 @@ Now we can redo that plot without those negative heights:

fig, ax = plt.subplots(figsize=(16, 8))

ax.scatter(
x=question_2_df["Height"],
y=question_2_df["Power Count"],
alpha=0.3
)
ax.scatter(x=question_2_df["Height"], y=question_2_df["Power Count"], alpha=0.3)

ax.set_xlabel("Height (cm)")
ax.set_ylabel("Number of Superpowers")
Expand All @@ -485,29 +496,31 @@ fig, ax = plt.subplots(figsize=(16, 8))
# Select subsets
question_2_male = question_2_df[question_2_df["Gender"] == "Male"]
question_2_female = question_2_df[question_2_df["Gender"] == "Female"]
question_2_other = question_2_df[(question_2_df["Gender"] != "Male") & (question_2_df["Gender"] != "Female")]
question_2_other = question_2_df[
(question_2_df["Gender"] != "Male") & (question_2_df["Gender"] != "Female")
]

# Plot data with different colors
ax.scatter(
x=question_2_male["Height"],
y=question_2_male["Power Count"],
alpha=0.5,
color="cyan",
label="Male"
label="Male",
)
ax.scatter(
x=question_2_female["Height"],
y=question_2_female["Power Count"],
alpha=0.5,
color="gray",
label="Female"
label="Female",
)
ax.scatter(
x=question_2_other["Height"],
y=question_2_other["Power Count"],
alpha=0.5,
color="yellow",
label="Other"
label="Other",
)

# Customize appearance
Expand Down Expand Up @@ -553,7 +566,11 @@ assert type(question_3_df) == pd.DataFrame
assert question_3_df.shape == (167, 3)

# Checking the column names
assert sorted(list(question_3_df.columns)) == ['DC Comics', 'Marvel Comics', 'Superpower Name']
assert sorted(list(question_3_df.columns)) == [
"DC Comics",
"Marvel Comics",
"Superpower Name",
]
```

### Answering the Question
Expand All @@ -565,7 +582,9 @@ The code below uses the dataframe you created to find and plot the most common s
# Run this cell without changes

marvel_most_common = question_3_df.drop("DC Comics", axis=1)
marvel_most_common = marvel_most_common.sort_values(by="Marvel Comics", ascending=False)[:5]
marvel_most_common = marvel_most_common.sort_values(
by="Marvel Comics", ascending=False
)[:5]
marvel_most_common
```

Expand All @@ -585,13 +604,9 @@ dc_most_common
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(15, 5))

ax1.bar(
x=marvel_most_common["Superpower Name"],
height=marvel_most_common["Marvel Comics"]
)
ax2.bar(
x=dc_most_common["Superpower Name"],
height=dc_most_common["DC Comics"]
x=marvel_most_common["Superpower Name"], height=marvel_most_common["Marvel Comics"]
)
ax2.bar(x=dc_most_common["Superpower Name"], height=dc_most_common["DC Comics"])

ax1.set_ylabel("Count of Superheroes")
ax2.set_ylabel("Count of Superheroes")
Expand Down
Loading

0 comments on commit 8cda165

Please sign in to comment.