forked from llSourcell/prepare_dataset_challenge
-
Notifications
You must be signed in to change notification settings - Fork 4
/
FileProcessor.py
74 lines (59 loc) · 2.13 KB
/
FileProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Author: WeldFire
# Created: 12/20/2016
"""
This function loads a pokemon csv file and parses out labels
and stat information for input into a machine learning algorithm
IN:
filepath - the filepath of the csv you are wanting to load and parse
OUT:
pokemonStatLines - The number values of one pokemons stats
pokemonTypeLabels - The digitized pokemon type1 labels
trackedTypes - The key value pair transition table used to convert types to IDs
"""
def loadCleanData(filepath):
#Read all lines of the dataset into memory
with open(filepath) as file:
lines = file.readlines()
#Grab the titles from the first line of the dataset
titles = lines[1]
dataLines = lines[1:]
#Placeholder for each type that we would like to track
#(This will help us ID our training labels)
trackedTypes = {}
typeID = 0
#Define our output variables
pokemonStatLines = []
pokemonTypeLabels = []
#Loop through each data line to parse and clean it
for line in dataLines:
pokemonStats = line.split(',')
#Column 2 holds the type 1 data
pokemonType1 = pokemonStats[2]
if pokemonType1 not in trackedTypes:
#ID our type if it isn't already
trackedTypes[pokemonType1] = typeID
#Increase our ID for the next iteration
typeID = typeID + 1
#Placeholder for one Pokemons stats
pokemonStatLine = []
#Loop through each of the stats that we deem to be the most important in determining type
#For now lets only grab HP, Attack, Defense, Sp. Atk, Sp. Def, Speed, and Generation
for i in range(5, 12):
pokemonStatLine.append(pokemonStats[i])
#Add our generated data to our output variables
pokemonStatLines.append(pokemonStatLine)
pokemonTypeLabels.append(trackedTypes[pokemonType1])
return pokemonStatLines, pokemonTypeLabels, trackedTypes
"""
Provided a tracked types key value pair (KVP) it will return the type text
IN:
ID - ID you are wanting to find the type name for
trackedTypes - The KVP type map to search inside of
OUT:
type - The plain text representation of the pokemons type
"""
def typeFromTrackedTypeID(ID, trackedTypes):
for type, typeID in trackedTypes.iteritems():
if ID == typeID:
return type
return "UNKNOWN TYPE"