1
1
import numpy as np
2
2
import pdb
3
3
import pickle
4
+ import os
4
5
from adlframework .cache import Cache
5
6
from adlframework .utils import get_logger
6
7
@@ -86,8 +87,6 @@ def load(self):
86
87
Currently, only saves data.
87
88
To-Do: save labels too.
88
89
'''
89
- import pickle
90
- import os
91
90
dtf = self .cache_file + '_data'
92
91
lf = self .cache_file + '_label'
93
92
df = self .cache_file + '_dict'
@@ -113,9 +112,6 @@ def double_arr_size(self):
113
112
self .labels = self .new_labels
114
113
115
114
class IrregularNPArrCache (Cache ):
116
- import tables
117
- import string
118
- import random
119
115
'''
120
116
TO-DO: Written for 1-d. Generalize to N-D.
121
117
Reference: https://kastnerkyle.github.io/posts/using-pytables-for-larger-than-ram-data-processing/
@@ -124,6 +120,7 @@ def __init__(self, cache_file=None, compress=True):
124
120
self .data = []
125
121
self .labels = []
126
122
self .id_to_index = {}
123
+ self .cache_file = cache_file
127
124
128
125
129
126
''' Necessary classes '''
@@ -147,3 +144,23 @@ def retrieve(self, id_):
147
144
idx = self .id_to_index [id_ ]
148
145
return self .data [idx ], self .labels [idx ]
149
146
147
+ def load (self ):
148
+ '''
149
+ Reads data, labels, and id_to_index as tuple from pickle
150
+ '''
151
+ if self .cache_file != None :
152
+ if os .path .exists (self .cache_file ):
153
+ with open (self .cache_file , "wb" ) as f :
154
+ self .data , self .labels , self .id_to_index = pickle .load (f )
155
+ else :
156
+ logger .warn ('Cache file specified doesn\' t exist. Will continue...' )
157
+
158
+ def save (self ):
159
+ '''
160
+ Save data, labels, and id_to_index as tuple in pickle
161
+ '''
162
+ if self .cache_file != None :
163
+ with open (self .cache_file , "wb" ) as f :
164
+ pickle .dump ((self .data , self .labels , self .id_to_index ), f )
165
+ else :
166
+ logger .warn ('No cache file specified. Will lose cache on exit.' )
0 commit comments