Skip to content

Commit

Permalink
Merge pull request #145 from mhmerrill/pdarray-index-enhancement
Browse files Browse the repository at this point in the history
Pdarray index enhancement
  • Loading branch information
mhmerrill authored Oct 21, 2019
2 parents 32a3df7 + ec3ed0b commit 4f70d59
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 36 deletions.
5 changes: 5 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Arkouda Python Benchmarks

* depending on your setup you may need to set `PYTHONPATH` environment variable to find the `arkouda.py` module
* the `run_all.sh` script runs them against an `arkouda_server` running on `localhost:5555`
* check the benchmark source for runtime options
4 changes: 4 additions & 0 deletions converter/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Python converter from CSV to HDF5

* example of conversion of the open LANL netflow data from CSV to HDF5

72 changes: 63 additions & 9 deletions src/IndexingMsg.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,10 @@ module IndexingMsg
var gIV: borrowed GenSymEntry = st.lookup(iname);
if (gIV == nil) {return unknownSymbolError(pn,iname);}

proc idxToValHelper(type Xtype, type IVtype, type dtype): string {
// scatter indexing by integer index vector
proc ivInt64Helper(type Xtype, type dtype): string {
var e = toSymEntry(gX,Xtype);
var iv = toSymEntry(gIV,IVtype);
var iv = toSymEntry(gIV,int);
var ivMin = min reduce iv.a;
var ivMax = max reduce iv.a;
if ivMin < 0 {return try! "Error: %s: OOBindex %i < 0".format(pn,ivMin);}
Expand All @@ -280,18 +281,42 @@ module IndexingMsg
[i in iva] unorderedCopy(ea[i],val);
return try! "%s success".format(pn);
}

// add check for IV to be dtype of int64 or bool

// expansion boolean indexing by bool index vector
proc ivBoolHelper(type Xtype, type dtype): string {
var e = toSymEntry(gX,Xtype);
var truth = toSymEntry(gIV,bool);
if (e.size != truth.size) {return try! "Error: %s: bool iv must be same size %i != %i".format(pn,e.size,truth.size);}
if isBool(dtype) {
value = value.replace("True","true"); // chapel to python bool
value = value.replace("False","false"); // chapel to python bool
}
var val = try! value:dtype;
ref ead = e.aD;
ref ea = e.a;
ref trutha = truth.a;
[i in ead] if (trutha[i] == true) {unorderedCopy(ea[i],val);}
return try! "%s success".format(pn);
}

select(gX.dtype, gIV.dtype, dtype) {
when (DType.Int64, DType.Int64, DType.Int64) {
return idxToValHelper(int, int, int);
return ivInt64Helper(int, int);
}
when (DType.Int64, DType.Bool, DType.Int64) {
return ivBoolHelper(int, int);
}
when (DType.Float64, DType.Int64, DType.Float64) {
return idxToValHelper(real, int, real);
return ivInt64Helper(real, real);
}
when (DType.Float64, DType.Bool, DType.Float64) {
return ivBoolHelper(real, real);
}
when (DType.Bool, DType.Int64, DType.Bool) {
return idxToValHelper(bool, int, bool);
return ivInt64Helper(bool, bool);
}
when (DType.Bool, DType.Bool, DType.Bool) {
return ivBoolHelper(bool, bool);
}
otherwise {return notImplementedError(pn,
"("+dtype2str(gX.dtype)+","+dtype2str(gIV.dtype)+","+dtype2str(dtype)+")");}
Expand All @@ -317,11 +342,12 @@ module IndexingMsg
var gY: borrowed GenSymEntry = st.lookup(yname);
if (gY == nil) {return unknownSymbolError(pn,yname);}

// add check to make syre IV and Y are same size
if (gIV.size != gY.size) {return try! "Error: %s: size mismatch %i %i".format(pn,gIV.size, gY.size);}
// add check for IV to be dtype of int64 or bool

// scatter indexing by an integer index vector
proc ivInt64Helper(type t) {
// add check to make syre IV and Y are same size
if (gIV.size != gY.size) {return try! "Error: %s: size mismatch %i %i".format(pn,gIV.size,gY.size);}
var e = toSymEntry(gX,t);
var iv = toSymEntry(gIV,int);
var ivMin = min reduce iv.a;
Expand All @@ -337,16 +363,44 @@ module IndexingMsg
return try! "%s success".format(pn);
}

// expansion indexing by a bool index vector
proc ivBoolHelper(type t) {
// add check to make syre IV and Y are same size
if (gIV.size != gX.size) {return try! "Error: %s: size mismatch %i %i".format(pn,gIV.size,gX.size);}
var e = toSymEntry(gX,t);
var truth = toSymEntry(gIV,bool);
var iv: [truth.aD] int = (+ scan truth.a);
var pop = iv[iv.size-1];
if v {writeln("pop = ",pop,"last-scan = ",iv[iv.size-1]);try! stdout.flush();}
var y = toSymEntry(gY,t);
if (y.size != pop) {return try! "Error: %s: pop size mismatch %i %i".format(pn,pop,y.size);}
ref ya = y.a;
ref ead = e.aD;
ref ea = e.a;
ref trutha = truth.a;
[i in ead] if (trutha[i] == true) {unorderedCopy(ea[i],ya[iv[i]-1]);}
return try! "%s success".format(pn);
}

select(gX.dtype, gIV.dtype, gY.dtype) {
when (DType.Int64, DType.Int64, DType.Int64) {
return ivInt64Helper(int);
}
when (DType.Int64, DType.Bool, DType.Int64) {
return ivBoolHelper(int);
}
when (DType.Float64, DType.Int64, DType.Float64) {
return ivInt64Helper(real);
}
when (DType.Float64, DType.Bool, DType.Float64) {
return ivBoolHelper(real);
}
when (DType.Bool, DType.Int64, DType.Bool) {
return ivInt64Helper(bool);
}
when (DType.Bool, DType.Bool, DType.Bool) {
return ivBoolHelper(bool);
}
otherwise {return notImplementedError(pn,
"("+dtype2str(gX.dtype)+","+dtype2str(gIV.dtype)+","+dtype2str(gY.dtype)+")");}
}
Expand Down
4 changes: 4 additions & 0 deletions test/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Some unit testing against the arkouda chapel modules

* still under construction

5 changes: 5 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Python Tests against the arkouda_server

* depending on your setup you may need to set `PYTHONPATH` environment variable to find the `arkouda.py` module
* check the python source for runtime options

112 changes: 87 additions & 25 deletions check.py → tests/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ def check_zeros(N):

print("check zeros :", check_zeros(N))

def check_argsort(N):
# create np version
a = np.arange(N)
a = a[::-1]
iv = np.argsort(a)
a = a[iv]
a = ak.array(a)
# create ak version
b = ak.arange(N)
b = b[::-1]
iv = ak.argsort(b)
b = b[iv]
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check argsort :", check_argsort(N))

def check_get_slice(N):
# create np version
a = np.ones(N)
Expand All @@ -86,20 +105,35 @@ def check_get_slice(N):

print("check get slice [::2] :", check_get_slice(N))

def check_set_slice_value(N):
# create np version
a = np.ones(N)
a[::2] = -1
a = ak.array(a)
# create ak version
b = ak.ones(N)
b[::2] = -1
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check set slice [::2] = value:", check_set_slice_value(N))

def check_set_slice(N):
# create np version
a = np.ones(N)
a[::2] = 0
a[::2] = a[::2] * -1
a = ak.array(a)
# create ak version
b = ak.ones(N)
b[::2] = 0
b[::2] = b[::2] * -1
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check set slice [::2] :", check_set_slice(N))
print("check set slice [::2] = pda:", check_set_slice(N))

def check_get_bool_iv(N):
# create np version
Expand All @@ -116,6 +150,36 @@ def check_get_bool_iv(N):

print("check (compressing) get bool iv :", check_get_bool_iv(N))

def check_set_bool_iv_value(N):
# create np version
a = np.arange(N)
a[a < N//2] = -1
a = ak.array(a)
# create ak version
b = ak.arange(N)
b[b < N//2] = -1
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check (expanding) set bool iv = value:", check_set_bool_iv_value(N))

def check_set_bool_iv(N):
# create np version
a = np.arange(N)
a[a < N//2] = a[:N//2] * -1
a = ak.array(a)
# create ak version
b = ak.arange(N)
b[b < N//2] = b[:N//2] * -1
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check (expanding) set bool iv = pda:", check_set_bool_iv(N))

def check_get_integer_iv(N):
# create np version
a = np.arange(N)
Expand All @@ -131,7 +195,24 @@ def check_get_integer_iv(N):
# print(type(c),c)
return pass_fail(c.all())

print("check (gather) get integer iv :", check_get_integer_iv(N))
print("check (gather) get integer iv:", check_get_integer_iv(N))

def check_set_integer_iv_value(N):
# create np version
a = np.arange(N)
iv = np.arange(N//2)
a[iv] = -1
a = ak.array(a)
# create ak version
b = ak.arange(N)
iv = ak.arange(N//2)
b[iv] = -1
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check (scatter) set integer iv = value:", check_set_integer_iv_value(N))

def check_set_integer_iv(N):
# create np version
Expand All @@ -148,7 +229,7 @@ def check_set_integer_iv(N):
# print(type(c),c)
return pass_fail(c.all())

print("check (scatter) set integer iv :", check_set_integer_iv(N))
print("check (scatter) set integer iv = pda:", check_set_integer_iv(N))

def check_get_integer_idx(N):
# create np version
Expand All @@ -172,26 +253,7 @@ def check_set_integer_idx(N):
v2 = b[N//2]
return pass_fail(v1 == v2)

print("check set integer idx :", check_set_integer_idx(N))

def check_argsort(N):
# create np version
a = np.arange(N)
a = a[::-1]
iv = np.argsort(a)
a = a[iv]
a = ak.array(a)
# create ak version
b = ak.arange(N)
b = b[::-1]
iv = ak.argsort(b)
b = b[iv]
# print(a,b)
c = a == b
# print(type(c),c)
return pass_fail(c.all())

print("check argsort :", check_argsort(N))
print("check set integer idx = value:", check_set_integer_idx(N))

#ak.disconnect()
ak.shutdown()
4 changes: 4 additions & 0 deletions toys/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Experiments and toys related to the development of Arkouda

* provided as examples ONLY and are not guaranteed to still work

2 changes: 1 addition & 1 deletion toys/ak_bfs_conn_comp.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def gen_rmat_edges(lgNv, Ne_per_v, p, perm=False):
#
# maybe: remove edges which are self-loops???
#
# return pair of ndarrays
# return pair of pdarrays
return (ii,jj)

# src and dst pdarrays hold the edge list
Expand Down
2 changes: 1 addition & 1 deletion toys/ak_rmat.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def gen_rmat_edges(lgNv, Ne_per_v, p, perm=False):
#
# maybe: remove edges which are self-loops???
#
# return pair of ndarrays
# return pair of pdarrays
return (ii,jj)


Expand Down

0 comments on commit 4f70d59

Please sign in to comment.