forked from streamlit/streamlit-example
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_access.py
124 lines (105 loc) · 3.21 KB
/
data_access.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from numpy import double
from pandas.core.frame import DataFrame
import requests
import pandas as pd
import io
import locale
from itertools import compress
import datetime
from grunddata import Grunddata
orginal_kolumner = [
"M",
"L",
"C",
"KD",
"S",
"V",
"MP",
"SD",
"PublDate",
"collectPeriodFrom",
"collectPeriodTo",
"house",
]
kolumner = [
"M",
"L",
"C",
"KD",
"S",
"V",
"MP",
"SD",
"Publiceringsdatum",
"Insamlingsdatum_fr_o_m",
"Insamlingsdatum_t_o_m",
"Institut",
]
visa_kolumner = [
"Publiceringsdatum",
"V",
"S",
"MP",
"C",
"L",
"M",
"KD",
"SD",
"Insamlingsdatum_fr_o_m",
"Insamlingsdatum_t_o_m",
"Institut",
]
class DataAccess:
@staticmethod
def hämta_data(start_datum: datetime.date=None):
url = "https://raw.githubusercontent.com/MansMeg/SwedishPolls/master/Data/Polls.csv"
#url = "https://raw.githubusercontent.com/dataagile-ema/SwedishPolls/master/Data/Polls.csv"
download = requests.get(url).content
# Reading the downloaded content and turning it into a pandas dataframe
df = pd.read_csv(io.StringIO(download.decode("utf-8")))
#df = pd.read_csv("polls.csv")
zip_iterator = zip(orginal_kolumner, kolumner)
rename_dict = dict(zip_iterator)
df = df[orginal_kolumner]
df = df.rename(columns=rename_dict)
df["Publiceringsdatum"] = pd.to_datetime(df["Publiceringsdatum"])
df = df.reindex(columns=visa_kolumner)
if start_datum is not None:
df = df[df["Publiceringsdatum"] > start_datum]
df.set_index('Publiceringsdatum')
df.sort_index(inplace=True, ascending=False)
return df
@staticmethod
def skapa_rullande_medel(start_datum: datetime.date, data: pd.DataFrame, w: int):
df_rol = data.copy(deep=True)
df_rol = df_rol.groupby('Publiceringsdatum').mean()
# no of rows in df_rol
n = df_rol.shape[0]
# no of rows as even multiple of w
n_even_w = n - n % w
df_rol = df_rol.tail(n_even_w).rolling(window=w, min_periods=w).mean()
df_rol.reset_index(inplace=True)
df_rol = df_rol[df_rol["Publiceringsdatum"] > start_datum]
return df_rol
@staticmethod
def ge_data_for_sista_30_dagarna(df: pd.DataFrame):
datum_30_dagar_sedan = pd.to_datetime("today") + pd.DateOffset(-30)
df = df[df["Publiceringsdatum"] > datum_30_dagar_sedan]
return df
@staticmethod
def ge_dagar_kvar_till_valet():
idag = pd.to_datetime("today")
valdag = pd.to_datetime(Grunddata.valdag)
antal_dagar_till_valet = (valdag - idag).days + 1
return antal_dagar_till_valet
@staticmethod
def hämta_medelvärde_senaste_30_dagarna(df: pd.DataFrame):
df = DataAccess.ge_data_for_sista_30_dagarna(df)
series = df[Grunddata.partier].apply("mean")
return series
@staticmethod
def hämta_df_för_uppslag_block():
df_uppslag_block = pd.DataFrame()
df_uppslag_block["Parti"] = Grunddata.partier
df_uppslag_block["Block"] = Grunddata.block_för_parti
return df_uppslag_block