Skip to content

Commit daae41b

Browse files
committed
initial
1 parent c24ddad commit daae41b

File tree

4 files changed

+118
-18
lines changed

4 files changed

+118
-18
lines changed

data_science/sales.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,6 @@ def greater(row):
6262
date_sample = sales.groupby(sales.index.month).sum().groupby(indexes).sum()
6363
date_sample2=sales.groupby(sales.index.month).sum()
6464

65+
#%%
66+
sales.loc[sales['STATUS'] == 'Shipped', 'derived'] = 'H'
67+

numpytutorial/numpy_tutorial.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,18 @@
133133
a = np.array([1,2,3,4])
134134
b = a.copy() # to create a deep copy hence a and b are independent of each other changes
135135

136+
#%%
137+
# create an array with mentioned diaognal numbers
138+
a = np.diag([1,2,3,4,5])
139+
print(a)
140+
141+
142+
# extracting the diognam numbers
143+
a = np.arange(25).reshape(5,5)
144+
print(a)
145+
print(np.diag(a))
146+
147+
136148
#%%
137149
# mathematics operation
138150
a = np.random.randint(1, 100, size=(3,6))
@@ -260,6 +272,50 @@
260272
print('\nHorizontal Stack')
261273
print(horizontal_stack)
262274

275+
#%%
276+
277+
# converting a 1d array to 2d array
278+
# if you have 1d array and u want to change to 2d array you cannot do it with transpose
279+
# np.newaxis add axis at designated level you want
280+
281+
a = np.arange(0,40,10)
282+
a = a[:, np.newaxis] # converting to 2d array
283+
print(a)
284+
285+
a = np.arange(0,10).reshape(2,5)
286+
a = a[:,:, np.newaxis]
287+
print(a.shape)
288+
print(a)
289+
#%%
290+
291+
# if you want to flatten the array, i.e. convert nd array to 1 dimensional array
292+
a = np.arange(25).reshape(5,5)
293+
print(a.ravel())
294+
#%%
295+
296+
# sorting
297+
#sorting with fancy indexing
298+
a = np.array([4, 3, 1, 2])
299+
j = np.argsort(a)
300+
print(j)
301+
302+
print(a[j])
303+
304+
#Sorting along an axis:
305+
a = np.array([[5, 4, 6], [2, 3, 2]])
306+
b = np.argsort(a, axis=0) # try with axis=1
307+
print(b)
308+
#%%
309+
# creating a tile
310+
a = np.tile(np.arange(0,40,10),(3,2))
311+
312+
print(a)
313+
314+
# getting transpose
315+
a = a.T
316+
print(a)
317+
318+
263319
#%%
264320
# working with range:
265321
np_range = np.arange(0,40,5) # 0 to 10 with step size of 5
@@ -281,7 +337,8 @@
281337
print('\nArray 2-D')
282338
print(two_d)
283339

284-
print('Index from row 3 till last and columns 0,2,4')
340+
341+
print('\nIndex from row 3 till last and columns 0,2,4')
285342
print(two_d[3:,[0,2,4]])
286343

287344

@@ -306,7 +363,7 @@
306363
#%%
307364
# getting the coordinates of the maximum value
308365
np.random.seed(42)
309-
a = np.random.randint(1,1000, size=(2, 4, 5))
366+
a = np.random.randint(1,1000, size=(2,2, 4, 5))
310367
print('\nArray A')
311368
print(a)
312369

@@ -367,3 +424,13 @@ def sum_num(*args):
367424
kwargs = {'name':1,'age':2,'system':3}
368425
print(len(args))
369426

427+
#%%
428+
# computing the percentile
429+
import numpy as np
430+
431+
data = np.random.binomial(9,0.25,size=1000)
432+
print(np.sum(data==9))
433+
import matplotlib.pyplot as plt
434+
plt.hist(data, cumulative=True, density=True, histtype='step', bins=100)
435+
plt.show()
436+

pandas_help/pandas_operation_help.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,18 @@
4949
# dropping rows with any Nan df.dropna(how='any) how='all' will remove a row in which all columns are null you can provide
5050
# thresh=<int> to drop rows/columns if it is equal or greater than thresh
5151

52+
# if you want to drop a row based on if particular column is na then use
53+
# df.dropna(subset=[<column name>], inplace=True)
54+
55+
#%%
56+
57+
# if you want to concatenate two columns then you can use:
58+
# data.A.str.cat(data.B, sep=' ') you can specify sep as you like
59+
60+
#%%
61+
62+
# if you want to get total null values in each column you can use data.isnull().sum()
63+
5264
# %%
5365

5466
# to get index
@@ -74,6 +86,7 @@
7486
# to get frequency count of a column
7587
# value_counts is a function of Series not of dataFrame so cannot be applied as data.value_counts(dropna=False)
7688
unique = data['Year'].value_counts(dropna=False) # dropna= False will include na values
89+
# you can pass normalize=True to get propotions the values are between 0 and 1
7790
print(unique)
7891

7992
# %%
@@ -216,6 +229,9 @@
216229
# data['A'] = data['A].astype(str)
217230
#
218231
# example: if we want to convert A column of data to category --> category datatype is memory efficient
232+
# https://campus.datacamp.com/courses/analyzing-police-activity-with-pandas/analyzing-the-effect-of-weather-on-policing?ex=4
233+
# follow above link for more useful ways for using category
234+
219235
# data['A'] = data['A'].astype('category')
220236

221237

@@ -277,7 +293,7 @@
277293
print(pattern3)
278294

279295
# pattern = '^[A-Za-z .]*$'
280-
# mask = countries.str.contains(pattern)
296+
# mask = countries.str.contains(pattern) # you can provide na=False so if there is missing value it will be linked to F
281297

282298
# %%
283299

@@ -613,4 +629,10 @@ def filling(series):
613629

614630
# similar to add we have multiply
615631

632+
#%%
616633

634+
# computing the frequency table user crosstab
635+
cross = pd.crosstab([data.Year, data.Gender], data.Medal)
636+
print(cross)
637+
cross['Total'] = cross.sum(axis=1)
638+
print(cross)

requests_help/requests_help.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,35 @@
1-
#%%
1+
# %%
2+
23
# dowlonad file using request
3-
from urllib.request import urlretrieve
44

5-
url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'
6-
urlretrieve(url, 'data_science/wine_data.csv')
5+
"""
76
8-
#%%
7+
Request basic help module
98
10-
# download file using requests
9+
"""
1110

11+
from urllib.request import urlretrieve
1212
import requests
13+
from bs4 import BeautifulSoup
14+
15+
URL = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-'\
16+
'red.csv'
17+
urlretrieve(URL, 'data_science/wine_data.csv')
1318

14-
url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'
15-
r = requests.get(url, allow_redirects=True)
19+
# %%
20+
21+
# download file using requests
22+
23+
URL = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-'\
24+
'red.csv'
25+
r = requests.get(URL, allow_redirects=True)
1626
open('google.csv', 'wb').write(r.content)
17-
#%%
27+
# %%
1828
# requests to get html data
1929

20-
import requests
21-
from bs4 import BeautifulSoup
22-
23-
url = 'https://campus.datacamp.com/courses/intermediate-importing-data-in-python/importing-data-from-the-internet-1?ex=6'
24-
response = requests.get(url)
30+
URL = 'https://campus.datacamp.com/courses/intermediate-importing-data-in-python/importing'\
31+
'-data-from-the-internet-1?ex=6 '
32+
response = requests.get(URL)
2533
text = response.text
2634

2735
# we can pretty the text using BeautifulSoup
@@ -44,4 +52,4 @@
4452
a = pretty.find_all('a')
4553

4654
for link in a:
47-
print(link.get('href')) # .get() method to extract the attributes of a tag
55+
print(link.get('href')) # .get() method to extract the attributes of a tag

0 commit comments

Comments
 (0)