-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathetl.py
66 lines (56 loc) · 1.96 KB
/
etl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import etl
import glob, os
import numpy as np
import pandas as pd
from cv2 import imread
def eweather(weather, images):
'''
Extract and concat csv files from dir given by user
'''
print('Reading weather')
weather = pd.concat((pd.read_csv(f, header=14) for f in glob.glob(weather + "/*.csv")), ignore_index=True)
weather = tweather(weather, images)
return weather
def tweather(weather, images):
'''
#katkam-YYYYMMDDHH0000
Transform the weather df to required criteria
Predicate NA values from df with no NA values
'''
weather = weather[['Weather', 'Date/Time']]
#The unique strings you'll find are probably not exactly the choices you want to make for categories.
attrs = weather['Weather'].values
temp = ''
attrs[0] = attrs[1]
flag = weather['Weather'].isnull().values
for i in range(1, len(attrs)):
if not flag[i]:
temp = attrs[i]
else:
attrs[i] = temp
weather['Weather'] = pd.Series(attrs)
weather = weather.dropna(axis=0, how='any')
print('Merging weather with associated images paths...')
weather['Images'] = pd.to_datetime(weather['Date/Time'], format='%Y-%m-%d %H:%M').apply(
lambda dt:
# imread( # for efficiency only read the image when training
images +
'/katkam-' +
str(dt.year) +
str('%02d' % dt.month) +
str('%02d' % dt.day) +
str('%02d' % dt.hour) + '0000' + '.jpg') #)
weather = weather[pd.notnull(weather['Images'])]
#weather[weather["Weather"] == "NA"] => check operation
return weather
def lweather(weather, images):
'''
'''
return eweather(weather, images)
def timage(date, images):
'''
Extract thou images that correspond to existing Date/Time or those != NAN | null
'''
print(date)
#images = [cv2.imread(file) for file in glob.glob("path/to/files/*.png")]
#return cv2.imread(images + '/katkam-' + )