9
9
10
10
import numpy as np
11
11
12
+ import matplotlib .cbook as cbook
12
13
import matplotlib .units as units
13
14
import matplotlib .ticker as ticker
14
15
22
23
def to_array (data , maxlen = 100 ):
23
24
if NP_NEW :
24
25
return np .array (data , dtype = np .unicode )
26
+ if cbook .is_scalar_or_string (data ):
27
+ data = [data ]
25
28
try :
26
29
vals = np .array (data , dtype = ('|S' , maxlen ))
27
30
except UnicodeEncodeError :
28
- # pure hack
31
+ # this yields gibberish
29
32
vals = np .array ([convert_to_string (d ) for d in data ])
30
33
return vals
31
34
@@ -36,49 +39,53 @@ def convert(value, unit, axis):
36
39
"""Uses axis.unit_data map to encode
37
40
data as floats
38
41
"""
39
- vmap = dict (axis .unit_data )
42
+ vmap = dict (zip ( axis .unit_data . seq , axis . unit_data . locs ) )
40
43
41
44
if isinstance (value , six .string_types ):
42
45
return vmap [value ]
43
46
44
47
vals = to_array (value )
45
- for lab , loc in axis . unit_data :
48
+ for lab , loc in vmap . items () :
46
49
vals [vals == lab ] = loc
47
50
48
51
return vals .astype ('float' )
49
52
50
53
@staticmethod
51
54
def axisinfo (unit , axis ):
52
- seq , locs = zip (* axis .unit_data )
53
- majloc = StrCategoryLocator (locs )
54
- majfmt = StrCategoryFormatter (seq )
55
+ majloc = StrCategoryLocator (axis .unit_data .locs )
56
+ majfmt = StrCategoryFormatter (axis .unit_data .seq )
55
57
return units .AxisInfo (majloc = majloc , majfmt = majfmt )
56
58
57
59
@staticmethod
58
60
def default_units (data , axis ):
59
61
# the conversion call stack is:
60
62
# default_units->axis_info->convert
61
- axis .unit_data = map_categories (data , axis .unit_data )
63
+ if axis .unit_data is None :
64
+ axis .unit_data = UnitData (data )
65
+ else :
66
+ axis .unit_data .update (data )
62
67
return None
63
68
64
69
65
70
class StrCategoryLocator (ticker .FixedLocator ):
66
71
def __init__ (self , locs ):
67
- super (StrCategoryLocator , self ).__init__ (locs , None )
72
+ self .locs = locs
73
+ self .nbins = None
68
74
69
75
70
76
class StrCategoryFormatter (ticker .FixedFormatter ):
71
77
def __init__ (self , seq ):
72
- super (StrCategoryFormatter , self ).__init__ (seq )
78
+ self .seq = seq
79
+ self .offset_string = ''
73
80
74
81
75
82
def convert_to_string (value ):
76
83
"""Helper function for numpy 1.6, can be replaced with
77
84
np.array(...,dtype=unicode) for all later versions of numpy"""
78
85
79
86
if isinstance (value , six .string_types ):
80
- return value
81
- if np .isfinite (value ):
87
+ pass
88
+ elif np .isfinite (value ):
82
89
value = np .asarray (value , dtype = str )[np .newaxis ][0 ]
83
90
elif np .isnan (value ):
84
91
value = 'nan'
@@ -91,59 +98,38 @@ def convert_to_string(value):
91
98
return value
92
99
93
100
94
- def map_categories (data , old_map = None ):
95
- """Create mapping between unique categorical
96
- values and numerical identifier.
97
-
98
- Paramters
99
- ---------
100
- data: iterable
101
- sequence of values
102
- old_map: list of tuple, optional
103
- if not `None`, than old_mapping will be updated with new values and
104
- previous mappings will remain unchanged)
105
- sort: bool, optional
106
- sort keys by ASCII value
107
-
108
- Returns
109
- -------
110
- list of tuple
111
- [(label, ticklocation),...]
112
-
113
- """
114
-
115
- # code typical missing data in the negative range because
116
- # everything else will always have positive encoding
117
- # question able if it even makes sense
101
+ class UnitData (object ):
102
+ # debatable makes sense to special code missing values
118
103
spdict = {'nan' : - 1.0 , 'inf' : - 2.0 , '-inf' : - 3.0 }
119
104
120
- if isinstance (data , six .string_types ):
121
- data = [data ]
122
-
123
- # will update this post cbook/dict support
124
- strdata = to_array (data )
125
- uniq = np .unique (strdata )
126
-
127
- if old_map :
128
- olabs , okeys = zip (* old_map )
129
- svalue = max (okeys ) + 1
130
- else :
131
- old_map , olabs , okeys = [], [], []
132
- svalue = 0
133
-
134
- category_map = old_map [:]
135
-
136
- new_labs = [u for u in uniq if u not in olabs ]
137
- missing = [nl for nl in new_labs if nl in spdict .keys ()]
138
-
139
- category_map .extend ([(m , spdict [m ]) for m in missing ])
140
-
141
- new_labs = [nl for nl in new_labs if nl not in missing ]
142
-
143
- new_locs = np .arange (svalue , svalue + len (new_labs ), dtype = 'float' )
144
- category_map .extend (list (zip (new_labs , new_locs )))
145
- return category_map
146
-
105
+ def __init__ (self , data ):
106
+ """Create mapping between unique categorical values
107
+ and numerical identifier
108
+ Paramters
109
+ ---------
110
+ data: iterable
111
+ sequence of values
112
+ """
113
+ self .seq , self .locs = [], []
114
+ self ._set_seq_locs (data , 0 )
115
+
116
+ def update (self , new_data ):
117
+ # so as not to conflict with spdict
118
+ value = max (max (self .locs ) + 1 , 0 )
119
+ self ._set_seq_locs (new_data , value )
120
+
121
+ def _set_seq_locs (self , data , value ):
122
+ # magic to make it work under np1.6
123
+ strdata = to_array (data )
124
+ # np.unique makes dateframes work
125
+ new_s = [d for d in np .unique (strdata ) if d not in self .seq ]
126
+ for ns in new_s :
127
+ self .seq .append (convert_to_string (ns ))
128
+ if ns in UnitData .spdict .keys ():
129
+ self .locs .append (UnitData .spdict [ns ])
130
+ else :
131
+ self .locs .append (value )
132
+ value += 1
147
133
148
134
# Connects the convertor to matplotlib
149
135
units .registry [str ] = StrCategoryConverter ()
0 commit comments