-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExploratory_data_analysis.py
50 lines (38 loc) · 2.66 KB
/
Exploratory_data_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# import libraries
import os
import pandas as pd
from helper import count_plot
def main():
# Create diractory for graphs
dir = os.path.join(os.getcwd(),'graphs')
if not os.path.exists(dir):
os.makedirs(dir)
# Reading the CSV file into a pandas DataFrame
df = pd.read_csv('data\\cleaned_data.csv',index_col=0)
print(df.head())
print('------------------- Exploratory Data Analysis is just starting .... --------------------------')
print('------------------DataFrame Infos:--------------------------------')
print(df.info())
print('-------------------- Q 1 - Which companies have the most job openings? ----------------------')
count_plot(df,'company_name','Top 10 Companies by Job Openings')
print('------------------------------ Done ------------------------------------------')
print('-------------------- Q 2 - What are the top 10 state where the jobs are being offered? ----------------------')
count_plot(df,'company_location','Top 10 Stats by Job Openings')
print('------------------------------ Done ------------------------------------------')
print('-------------------- Q 3 - Which seniority levels are in demand? ----------------------')
count_plot(df,'experience','Distribution of Job Openings by Seniority Level', orient = 'v')
print('------------------------------ Done ------------------------------------------')
print('-------------------- Q 4 - Which industries have the highest number of job openings? ----------------------')
count_plot(df,'industry','Top 10 Industries by job Openings')
print('------------------------------ Done ------------------------------------------')
print('-------------------- Q 5 - What is the distribution of employment types (full-time, part-time, contract, etc.)? ----------------------')
count_plot(df,'job_type','Employment type Distribution of job Openings',orient='v')
print('------------------------------ Done ------------------------------------------')
print('-------------------- Q 6 - What is the distribution of workplace type (Remote, On-site )? ----------------------')
count_plot(df,'workplace_type','Employment type Distribution of job Openings',orient= 'v')
print('------------------------------ Done ------------------------------------------')
print('-------------------- Q 7 - What is the distribution of company size of job Openings (small, medium, large )? ----------------------')
count_plot(df,'company_size','Company size Distribution of job Openings',orient='v')
print('------------------------------ Done ------------------------------------------')
if __name__ == '__main__':
main()