initial

pyjads · pyjads · commit daae41ba4b30 · 2020-05-06T02:17:22.000+02:00
diff --git a/data_science/sales.py b/data_science/sales.py
@@ -62,3 +62,6 @@ def greater(row):
 date_sample = sales.groupby(sales.index.month).sum().groupby(indexes).sum()
 date_sample2=sales.groupby(sales.index.month).sum()
 
+#%%
+sales.loc[sales['STATUS'] == 'Shipped', 'derived'] = 'H'
+
diff --git a/numpytutorial/numpy_tutorial.py b/numpytutorial/numpy_tutorial.py
@@ -133,6 +133,18 @@
 a = np.array([1,2,3,4])
 b = a.copy() # to create a deep copy hence a and b are independent of each other changes
 
+#%%
+# create an array with mentioned diaognal numbers
+a = np.diag([1,2,3,4,5])
+print(a)
+
+
+# extracting the diognam numbers
+a = np.arange(25).reshape(5,5)
+print(a)
+print(np.diag(a))
+
+
 #%%
 # mathematics operation
 a = np.random.randint(1, 100, size=(3,6))
@@ -260,6 +272,50 @@
 print('\nHorizontal Stack')
 print(horizontal_stack)
 
+#%%
+
+# converting a 1d array to 2d array
+# if you have 1d array and u want to change to 2d array you cannot do it with transpose
+# np.newaxis add axis at designated level you want
+
+a = np.arange(0,40,10)
+a = a[:, np.newaxis] # converting to 2d array
+print(a)
+
+a = np.arange(0,10).reshape(2,5)
+a = a[:,:, np.newaxis]
+print(a.shape)
+print(a)
+#%%
+
+# if you want to flatten the array, i.e. convert nd array to 1 dimensional array
+a = np.arange(25).reshape(5,5)
+print(a.ravel())
+#%%
+
+# sorting
+#sorting with fancy indexing
+a = np.array([4, 3, 1, 2])
+j = np.argsort(a)
+print(j)
+
+print(a[j])
+
+#Sorting along an axis:
+a = np.array([[5, 4, 6], [2, 3, 2]])
+b = np.argsort(a, axis=0) # try with axis=1
+print(b)
+#%%
+# creating a tile
+a = np.tile(np.arange(0,40,10),(3,2))
+
+print(a)
+
+# getting transpose
+a = a.T
+print(a)
+
+
 #%%
 # working with range:
 np_range = np.arange(0,40,5) # 0 to 10 with step size of 5
@@ -281,7 +337,8 @@
 print('\nArray 2-D')
 print(two_d)
 
-print('Index from row 3 till last and columns 0,2,4')
+
+print('\nIndex from row 3 till last and columns 0,2,4')
 print(two_d[3:,[0,2,4]])
 
 
@@ -306,7 +363,7 @@
 #%%
 # getting the coordinates of the maximum value
 np.random.seed(42)
-a = np.random.randint(1,1000, size=(2, 4, 5))
+a = np.random.randint(1,1000, size=(2,2, 4, 5))
 print('\nArray A')
 print(a)
 
@@ -367,3 +424,13 @@ def sum_num(*args):
 kwargs = {'name':1,'age':2,'system':3}
 print(len(args))
 
+#%%
+# computing the percentile
+import numpy as np
+
+data = np.random.binomial(9,0.25,size=1000)
+print(np.sum(data==9))
+import matplotlib.pyplot as plt
+plt.hist(data, cumulative=True, density=True, histtype='step', bins=100)
+plt.show()
+
diff --git a/pandas_help/pandas_operation_help.py b/pandas_help/pandas_operation_help.py
@@ -49,6 +49,18 @@
 # dropping rows with any Nan df.dropna(how='any) how='all' will remove a row in which all columns are null you can provide
 # thresh=<int> to drop rows/columns if it is equal or greater than thresh
 
+# if you want to drop a row based on if particular column is na then use
+# df.dropna(subset=[<column name>], inplace=True)
+
+#%%
+
+# if you want to concatenate two columns then you can use:
+# data.A.str.cat(data.B, sep=' ') you can specify sep as you like
+
+#%%
+
+# if you want to get total null values in each column you can use data.isnull().sum()
+
 # %%
 
 # to get index
@@ -74,6 +86,7 @@
 # to get frequency count of a column
 # value_counts is a function of Series not of dataFrame so cannot be applied as data.value_counts(dropna=False)
 unique = data['Year'].value_counts(dropna=False)  # dropna= False will include na values
+# you can pass normalize=True to get propotions the values are between 0 and 1
 print(unique)
 
 # %%
@@ -216,6 +229,9 @@
 # data['A'] = data['A].astype(str)
 #
 # example: if we want to convert A column of data to category --> category datatype is memory efficient
+# https://campus.datacamp.com/courses/analyzing-police-activity-with-pandas/analyzing-the-effect-of-weather-on-policing?ex=4
+# follow above link for more useful ways for using category
+
 # data['A'] = data['A'].astype('category')
 
 
@@ -277,7 +293,7 @@
 print(pattern3)
 
 # pattern = '^[A-Za-z .]*$'
-# mask = countries.str.contains(pattern)
+# mask = countries.str.contains(pattern) # you can provide na=False so if there is missing value it will be linked to F
 
 # %%
 
@@ -613,4 +629,10 @@ def filling(series):
 
 # similar to add we have multiply
 
+#%%
 
+# computing the frequency table user crosstab
+cross = pd.crosstab([data.Year, data.Gender], data.Medal)
+print(cross)
+cross['Total'] = cross.sum(axis=1)
+print(cross)
diff --git a/requests_help/requests_help.py b/requests_help/requests_help.py
@@ -1,27 +1,35 @@
-#%%
+# %%
+
 # dowlonad file using request
-from urllib.request import urlretrieve
 
-url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'
-urlretrieve(url, 'data_science/wine_data.csv')
+"""
 
-#%%
+Request basic help module
 
-# download file using requests
+"""
 
+from urllib.request import urlretrieve
 import requests
+from bs4 import BeautifulSoup
+
+URL = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-'\
+      'red.csv'
+urlretrieve(URL, 'data_science/wine_data.csv')
 
-url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'
-r = requests.get(url, allow_redirects=True)
+# %%
+
+# download file using requests
+
+URL = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-'\
+      'red.csv'
+r = requests.get(URL, allow_redirects=True)
 open('google.csv', 'wb').write(r.content)
-#%%
+# %%
 # requests to get html data
 
-import requests
-from bs4 import BeautifulSoup
-
-url = 'https://campus.datacamp.com/courses/intermediate-importing-data-in-python/importing-data-from-the-internet-1?ex=6'
-response = requests.get(url)
+URL = 'https://campus.datacamp.com/courses/intermediate-importing-data-in-python/importing'\
+      '-data-from-the-internet-1?ex=6 '
+response = requests.get(URL)
 text = response.text
 
 # we can pretty the text using BeautifulSoup
@@ -44,4 +52,4 @@
 a = pretty.find_all('a')
 
 for link in a:
-    print(link.get('href')) # .get() method to extract the attributes of a tag
+    print(link.get('href'))  # .get() method to extract the attributes of a tag