Skip to content

Commit

Permalink
Update v1.0.17rc0
Browse files Browse the repository at this point in the history
  • Loading branch information
tej87681088 authored Nov 29, 2023
1 parent 12fff5f commit 2df97ad
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions TejToolAPI/Map_Dask_API.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def get_data(table, tickers, columns, start, end, fin_type):
if len(data_sets) < 1:

return fix_col_df

temp = data_sets[['coid', 'key3','annd', 'mdate', 'no']].copy()
fin_date = getMRAnnd_np(temp)
data_sets = fin_date.merge(data_sets, how = 'left', on = ['coid', 'key3','annd', 'mdate', 'no'])
# if 'ver' in data_sets.columns:
# data_sets.drop(columns = 'ver')

# parallel fin_type to columns
keys = ['coid', 'mdate', 'no', 'annd'] + [c for c in columns if c in ['sem','fin_type', 'curr', 'fin_ind']]
Expand All @@ -67,6 +71,7 @@ def get_data(table, tickers, columns, start, end, fin_type):

# Fixed the order of the columns
data_sets = data_sets[fix_col_df.columns]
data_sets = parallize_annd_process(data_sets)

return data_sets

Expand Down Expand Up @@ -305,7 +310,8 @@ def _get_data(table, tickers, columns, start, end, fin_type):
data_sets = data_sets.rename(columns=lower_columns)

# get most recent announce date of the company
fin_date = getMRAnnd_np(data_sets[['coid', 'key3','annd', 'mdate', 'no']])
temp = data_sets[['coid', 'key3','annd', 'mdate', 'no']].copy()
fin_date = getMRAnnd_np(temp)
data_sets = fin_date.merge(data_sets, how = 'left', on = ['coid', 'key3','annd', 'mdate', 'no'])

# del fin_date
Expand Down Expand Up @@ -340,6 +346,7 @@ def _get_data(table, tickers, columns, start, end, fin_type):

# Fixed the order of the columns
data_sets = data_sets[fix_col_df.columns]
data_sets = parallize_annd_process(data_sets)

return data_sets

Expand Down Expand Up @@ -377,15 +384,15 @@ def _get_data(table, tickers, columns, start, end, fin_type):
# return data

def getMRAnnd_np(data):
data = parallize_annd_process(data)
# data = parallize_annd_process(data)
# Keep the last row when there are multiple rows with the same keys
# The last row represents the data with the greatest mdate
# data = data.drop_duplicates(subset=['coid', 'key3','annd'], keep='last')
data['ver'] = data['mdate'].astype(str) + '-' + data['no']
data = data.groupby(['coid', 'key3','annd'], as_index=False).max()
data = data.groupby(['coid','key3']).apply(lambda x: np.fmax.accumulate(x, axis=0))
data = parallelize_ver_process(data)
data.drop(columns = 'ver')
data = data.drop(columns = 'ver')

return data

Expand Down Expand Up @@ -720,4 +727,4 @@ def get_fin_data(self, table, ticker, columns, start, end , fin_type, npartition
)

return datasets


0 comments on commit 2df97ad

Please sign in to comment.