diff --git a/examples/daal4py/adaboost_batch.py b/examples/daal4py/adaboost_batch.py index 8956a82ce7..60a296c5f9 100644 --- a/examples/daal4py/adaboost_batch.py +++ b/examples/daal4py/adaboost_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Adaboost example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/adaboost_train.csv" testfile = "./data/batch/adaboost_test.csv" nClasses = 2 @@ -55,7 +57,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -66,6 +68,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Adaboost classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/adagrad_mse_batch.py b/examples/daal4py/adagrad_mse_batch.py index 4e74c2b329..23f5ffecf5 100644 --- a/examples/daal4py/adagrad_mse_batch.py +++ b/examples/daal4py/adagrad_mse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,27 +12,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py AdaGrad (Adaptive Subgradient Method) example for shared memory systems # using Mean Squared Error objective function -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/mse.csv" # Read the data, let's have 3 independent variables data = readcsv(infile, range(3)) @@ -46,11 +48,13 @@ def main(readcsv=read_csv, method='defaultDense'): # configure an AdaGrad object lr = np.array([[1.0]], dtype=np.double) niters = 1000 - sgd_algo = d4p.optimization_solver_adagrad(mse_algo, - learningRate=lr, - accuracyThreshold=0.0000001, - nIterations=niters, - batchSize=1) + sgd_algo = d4p.optimization_solver_adagrad( + mse_algo, + learningRate=lr, + accuracyThreshold=0.0000001, + nIterations=niters, + batchSize=1, + ) # finally do the computation inp = np.array([[8], [2], [1], [4]], dtype=np.double) @@ -66,4 +70,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/association_rules_batch.py b/examples/daal4py/association_rules_batch.py index 180de107dd..aefb2811e7 100644 --- a/examples/daal4py/association_rules_batch.py +++ b/examples/daal4py/association_rules_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py assiciation rules example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/apriori.csv" # configure a association_rules object @@ -57,6 +59,6 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result1 = main() - print('Confidence: (20 first)') + print("Confidence: (20 first)") print(result1.confidence[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/bacon_outlier_batch.py b/examples/daal4py/bacon_outlier_batch.py index 372c37b3b6..6d7bccdb4c 100644 --- a/examples/daal4py/bacon_outlier_batch.py +++ b/examples/daal4py/bacon_outlier_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py outlier detection bacon example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input file infile = "./data/batch/outlierdetection.csv" @@ -55,4 +57,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nInput data\n", data) print("\nOutlier detection result (Bacon method) weights:\n", res.weights) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/bf_knn_classification_batch.py b/examples/daal4py/bf_knn_classification_batch.py index 316e7d004a..70a2751f53 100644 --- a/examples/daal4py/bf_knn_classification_batch.py +++ b/examples/daal4py/bf_knn_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,30 +12,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Brute Force KNN example for shared memory systems -import daal4py as d4p -import numpy as np import os +import numpy as np + +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input data set parameters - train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') - predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') + train_file = os.path.join("data", "batch", "k_nearest_neighbors_train.csv") + predict_file = os.path.join("data", "batch", "k_nearest_neighbors_test.csv") # Read data. Let's use 5 features per observation nFeatures = 5 @@ -69,6 +72,5 @@ def main(readcsv=read_csv, method='defaultDense'): print("Brute Force kNN classification results:") print("Ground truth(observations #30-34):\n", predict_labels[30:35]) print( - "Classification results(observations #30-34):\n", - predict_result.prediction[30:35] + "Classification results(observations #30-34):\n", predict_result.prediction[30:35] ) diff --git a/examples/daal4py/brownboost_batch.py b/examples/daal4py/brownboost_batch.py index 7bcb4cb973..14fe231fcd 100644 --- a/examples/daal4py/brownboost_batch.py +++ b/examples/daal4py/brownboost_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Brownboost example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/brownboost_train.csv" testfile = "./data/batch/brownboost_test.csv" @@ -54,7 +56,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -65,6 +67,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Brownboost classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/cholesky_batch.py b/examples/daal4py/cholesky_batch.py index 955640586d..75d9b76bd6 100644 --- a/examples/daal4py/cholesky_batch.py +++ b/examples/daal4py/cholesky_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py cholesky example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/cholesky.csv" # configure a cholesky object @@ -45,4 +47,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result = main() print("\nFactor:\n", result.choleskyFactor) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/correlation_distance_batch.py b/examples/daal4py/correlation_distance_batch.py index 37949243e5..2bd4ccefd2 100644 --- a/examples/daal4py/correlation_distance_batch.py +++ b/examples/daal4py/correlation_distance_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,34 +12,37 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py correlation distance example for shared memory systems -import daal4py as d4p -import numpy as np import os +import numpy as np + +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): - data = readcsv(os.path.join('data', 'batch', 'distance.csv'), range(10)) +def main(readcsv=read_csv, method="defaultDense"): + data = readcsv(os.path.join("data", "batch", "distance.csv"), range(10)) # Create algorithm to compute correlation distance (no parameters) algorithm = d4p.correlation_distance() # Computed correlation distance with file or numpy array - res1 = algorithm.compute(os.path.join('data', 'batch', 'distance.csv')) + res1 = algorithm.compute(os.path.join("data", "batch", "distance.csv")) res2 = algorithm.compute(data) assert np.allclose(res1.correlationDistance, res2.correlationDistance) @@ -51,6 +54,6 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print( "\nCorrelation distance (first 15 rows/columns):\n", - res.correlationDistance[0:15, 0:15] + res.correlationDistance[0:15, 0:15], ) print("All looks good!") diff --git a/examples/daal4py/cosine_distance_batch.py b/examples/daal4py/cosine_distance_batch.py index cdd4eb7cf7..61da63cf39 100644 --- a/examples/daal4py/cosine_distance_batch.py +++ b/examples/daal4py/cosine_distance_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,34 +12,37 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py cosine distance example for shared memory systems -import daal4py as d4p -import numpy as np import os +import numpy as np + +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='defaultDense'): - data = readcsv(os.path.join('data', 'batch', 'distance.csv'), range(10)) +def main(readcsv=read_csv, method="defaultDense"): + data = readcsv(os.path.join("data", "batch", "distance.csv"), range(10)) # Create algorithm to compute cosine distance (no parameters) algorithm = d4p.cosine_distance() # Computed cosine distance with file or numpy array - res1 = algorithm.compute(os.path.join('data', 'batch', 'distance.csv')) + res1 = algorithm.compute(os.path.join("data", "batch", "distance.csv")) res2 = algorithm.compute(data) assert np.allclose(res1.cosineDistance, res2.cosineDistance) diff --git a/examples/daal4py/covariance_batch.py b/examples/daal4py/covariance_batch.py index 7f4eb02838..0fca126ccb 100644 --- a/examples/daal4py/covariance_batch.py +++ b/examples/daal4py/covariance_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/covcormoments_dense.csv" # configure a covariance object @@ -57,4 +59,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("Covariance matrix:\n", res.covariance) print("Mean vector:\n", res.mean) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/covariance_spmd.py b/examples/daal4py/covariance_spmd.py index 2651afee3b..37245ce1e6 100644 --- a/examples/daal4py/covariance_spmd.py +++ b/examples/daal4py/covariance_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,16 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./covariance_spmd.py -import daal4py as d4p - # let's use a reading of file in chunks (defined in spmd_utils.py) -from spmd_utils import read_csv, get_chunk_params +from spmd_utils import get_chunk_params, read_csv + +import daal4py as d4p def main(): @@ -29,9 +29,9 @@ def main(): # We know the number of lines in the file # and use this to separate data between processes - skiprows, nrows = get_chunk_params(lines_count=200, - chunks_count=d4p.num_procs(), - chunk_number=d4p.my_procid()) + skiprows, nrows = get_chunk_params( + lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid() + ) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) diff --git a/examples/daal4py/covariance_streaming.py b/examples/daal4py/covariance_streaming.py index 4d159fd145..2f68e5a83e 100644 --- a/examples/daal4py/covariance_streaming.py +++ b/examples/daal4py/covariance_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,17 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for streaming on shared memory systems -import daal4py as d4p - # let's use a generator for getting stream from file (defined in stream.py) from stream import read_next +import daal4py as d4p + -def main(readcsv=None, method='defaultDense'): +def main(readcsv=None, method="defaultDense"): infile = "./data/batch/covcormoments_dense.csv" # configure a covariance object @@ -44,4 +44,4 @@ def main(readcsv=None, method='defaultDense'): res = main() print("Covariance matrix:\n", res.covariance) print("Mean vector:\n", res.mean) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/dbscan_batch.py b/examples/daal4py/dbscan_batch.py index 15a31f71d4..187dd416e6 100644 --- a/examples/daal4py/dbscan_batch.py +++ b/examples/daal4py/dbscan_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py DBSCAN example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/dbscan_dense.csv" epsilon = 0.04 minObservations = 45 @@ -44,7 +46,7 @@ def main(readcsv=read_csv, method='defaultDense'): algo = d4p.dbscan( minObservations=minObservations, epsilon=epsilon, - resultsToCompute='computeCoreIndices|computeCoreObservations' + resultsToCompute="computeCoreIndices|computeCoreObservations", ) # and compute result = algo.compute(data) @@ -70,4 +72,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nFirst 10 cluster core indices:\n", result.coreIndices[0:10]) print("\nFirst 10 cluster core observations:\n", result.coreObservations[0:10]) print("\nNumber of clusters:\n", result.nClusters) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/dbscan_spmd.py b/examples/daal4py/dbscan_spmd.py index f758a11409..0206c8ba3d 100644 --- a/examples/daal4py/dbscan_spmd.py +++ b/examples/daal4py/dbscan_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,25 +12,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py DBSCAN example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./dbscan_spmd.py -import daal4py as d4p import numpy as np +import daal4py as d4p + -def main(method='defaultDense'): +def main(method="defaultDense"): infile = "./data/batch/dbscan_dense.csv" epsilon = 0.04 minObservations = 45 # Load the data - data = np.loadtxt(infile, delimiter=',') + data = np.loadtxt(infile, delimiter=",") rpp = int(data.shape[0] / d4p.num_procs()) - data = data[rpp * d4p.my_procid(): rpp * d4p.my_procid() + rpp, :] + data = data[rpp * d4p.my_procid() : rpp * d4p.my_procid() + rpp, :] # configure dbscan main object algo = d4p.dbscan(minObservations=minObservations, epsilon=epsilon, distributed=True) @@ -44,7 +45,13 @@ def main(method='defaultDense'): # Initialize SPMD mode d4p.daalinit() result = main() - print("\nResults on node with id = ", d4p.my_procid(), " :\n", - "\nFirst 10 cluster assignments:\n", result.assignments[0:10], - "\nNumber of clusters:\n", result.nClusters) + print( + "\nResults on node with id = ", + d4p.my_procid(), + " :\n", + "\nFirst 10 cluster assignments:\n", + result.assignments[0:10], + "\nNumber of clusters:\n", + result.nClusters, + ) d4p.daalfini() diff --git a/examples/daal4py/decision_forest_classification_default_dense_batch.py b/examples/daal4py/decision_forest_classification_default_dense_batch.py index d97a4cf55e..21a7c88726 100755 --- a/examples/daal4py/decision_forest_classification_default_dense_batch.py +++ b/examples/daal4py/decision_forest_classification_default_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" @@ -44,9 +46,9 @@ def main(readcsv=read_csv, method='defaultDense'): minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), - varImportance='MDI', + varImportance="MDI", bootstrap=True, - resultsToCompute='computeOutOfBagError' + resultsToCompute="computeOutOfBagError", ) # Read data. Let's use 3 features per observation @@ -60,7 +62,7 @@ def main(readcsv=read_csv, method='defaultDense'): predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, resultsToEvaluate="computeClassLabels|computeClassProbabilities", - votingMethod="unweighted" + votingMethod="unweighted", ) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) @@ -80,11 +82,11 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print( "\nDecision forest probabilities results (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_classification_hist_batch.py b/examples/daal4py/decision_forest_classification_hist_batch.py index 5adad2e360..a2be41356b 100755 --- a/examples/daal4py/decision_forest_classification_hist_batch.py +++ b/examples/daal4py/decision_forest_classification_hist_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification example of Hist method for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='hist'): +def main(readcsv=read_csv, method="hist"): # input data file infile = "./data/batch/df_classification_train.csv" testfile = "./data/batch/df_classification_test.csv" @@ -46,9 +48,9 @@ def main(readcsv=read_csv, method='hist'): minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), - varImportance='MDI', + varImportance="MDI", bootstrap=True, - resultsToCompute='computeOutOfBagError' + resultsToCompute="computeOutOfBagError", ) # Read data. Let's use 3 features per observation @@ -62,7 +64,7 @@ def main(readcsv=read_csv, method='hist'): predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, resultsToEvaluate="computeClassLabels|computeClassProbabilities", - votingMethod="unweighted" + votingMethod="unweighted", ) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) @@ -82,11 +84,11 @@ def main(readcsv=read_csv, method='hist'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print( "\nDecision forest probabilities results (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_classification_traverse_batch.py b/examples/daal4py/decision_forest_classification_traverse_batch.py index 9b2ffe679d..f94ab69b53 100755 --- a/examples/daal4py/decision_forest_classification_traverse_batch.py +++ b/examples/daal4py/decision_forest_classification_traverse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification Tree Traversal example import math -import daal4py as d4p from decision_forest_classification_default_dense_batch import main as df_classification +import daal4py as d4p + def printTree(nodes, values): def printNodes(node_id, nodes, values, level): @@ -53,5 +54,5 @@ def printNodes(node_id, nodes, values, level): printTree(treeState.node_ar, treeState.value_ar) # Now let printTree traverse the TreeState printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/decision_forest_regression_default_dense_batch.py b/examples/daal4py/decision_forest_regression_default_dense_batch.py index 77a111554c..708e32328d 100755 --- a/examples/daal4py/decision_forest_regression_default_dense_batch.py +++ b/examples/daal4py/decision_forest_regression_default_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" @@ -39,10 +41,10 @@ def main(readcsv=read_csv, method='defaultDense'): train_algo = d4p.decision_forest_regression_training( method=method, nTrees=100, - varImportance='MDA_Raw', + varImportance="MDA_Raw", bootstrap=True, engine=d4p.engines_mt2203(seed=777), - resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation' + resultsToCompute="computeOutOfBagError|computeOutOfBagErrorPerObservation", ) # Read data. Let's have 13 independent, @@ -74,7 +76,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_regression_hist_batch.py b/examples/daal4py/decision_forest_regression_hist_batch.py index 3450d67b4b..024b31a330 100755 --- a/examples/daal4py/decision_forest_regression_hist_batch.py +++ b/examples/daal4py/decision_forest_regression_hist_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression example of Hist method for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='hist'): +def main(readcsv=read_csv, method="hist"): infile = "./data/batch/df_regression_train.csv" testfile = "./data/batch/df_regression_test.csv" @@ -41,10 +43,10 @@ def main(readcsv=read_csv, method='hist'): maxBins=512, minBinSize=1, nTrees=100, - varImportance='MDA_Raw', + varImportance="MDA_Raw", bootstrap=True, engine=d4p.engines_mt2203(seed=777), - resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation' + resultsToCompute="computeOutOfBagError|computeOutOfBagErrorPerObservation", ) # Read data. Let's have 13 independent, @@ -76,7 +78,7 @@ def main(readcsv=read_csv, method='hist'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_forest_regression_traverse_batch.py b/examples/daal4py/decision_forest_regression_traverse_batch.py index 3da6cf75f4..2c53bb362c 100755 --- a/examples/daal4py/decision_forest_regression_traverse_batch.py +++ b/examples/daal4py/decision_forest_regression_traverse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression Tree Traversal example import math -import daal4py as d4p from decision_forest_regression_default_dense_batch import main as df_regression +import daal4py as d4p + def printTree(nodes, values): def printNodes(node_id, nodes, values, level): @@ -51,5 +52,5 @@ def printNodes(node_id, nodes, values, level): for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId) printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/decision_tree_classification_batch.py b/examples/daal4py/decision_tree_classification_batch.py index a190a68345..320925a345 100644 --- a/examples/daal4py/decision_tree_classification_batch.py +++ b/examples/daal4py/decision_tree_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Classification example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/decision_tree_train.csv" prunefile = "./data/batch/decision_tree_prune.csv" @@ -65,7 +67,7 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, plabels) = main() print( "\nDecision tree prediction results (first 20 rows):\n", - predict_result.prediction[0:20] + predict_result.prediction[0:20], ) print("\nGround truth (first 20 rows):\n", plabels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_tree_classification_traverse_batch.py b/examples/daal4py/decision_tree_classification_traverse_batch.py index 8aea1fe029..6c9eb713d3 100644 --- a/examples/daal4py/decision_tree_classification_traverse_batch.py +++ b/examples/daal4py/decision_tree_classification_traverse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Regression example for shared memory systems import math -import daal4py as d4p from decision_tree_classification_batch import main as dt_classification +import daal4py as d4p + def printTree(nodes, values): def printNodes(node_id, nodes, values, level): @@ -52,4 +53,4 @@ def printNodes(node_id, nodes, values, level): treeState = d4p.getTreeState(train_result.model, treeId, 5) # Now let printTree traverse the TreeState printTree(treeState.node_ar, treeState.value_ar) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_tree_regression_batch.py b/examples/daal4py/decision_tree_regression_batch.py index 2c6d514f23..2379e59331 100644 --- a/examples/daal4py/decision_tree_regression_batch.py +++ b/examples/daal4py/decision_tree_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/decision_tree_train.csv" prunefile = "./data/batch/decision_tree_prune.csv" testfile = "./data/batch/decision_tree_test.csv" @@ -66,7 +68,7 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, ptdata) = main() print( "\nDecision tree prediction results (first 20 rows):\n", - predict_result.prediction[0:20] + predict_result.prediction[0:20], ) print("\nGround truth (first 10 rows):\n", ptdata[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/decision_tree_regression_traverse_batch.py b/examples/daal4py/decision_tree_regression_traverse_batch.py index 02cc9f3960..315cb79a84 100644 --- a/examples/daal4py/decision_tree_regression_traverse_batch.py +++ b/examples/daal4py/decision_tree_regression_traverse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Tree Regression example for shared memory systems import math -import daal4py as d4p from decision_tree_regression_batch import main as dt_regression +import daal4py as d4p + def printTree(nodes, values): def printNodes(node_id, nodes, values, level): @@ -52,4 +53,4 @@ def printNodes(node_id, nodes, values, level): treeState = d4p.getTreeState(train_result.model, treeId, 5) # Now let printTree traverse the TreeState printTree(treeState.node_ar, treeState.value_ar) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/distributions_bernoulli_batch.py b/examples/daal4py/distributions_bernoulli_batch.py index 0165083fea..a0eca7b8ff 100644 --- a/examples/daal4py/distributions_bernoulli_batch.py +++ b/examples/daal4py/distributions_bernoulli_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,16 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py bernoulli distribution example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p -def main(readcsv=None, method='defaultDense'): +def main(readcsv=None, method="defaultDense"): # Create algorithm algorithm = d4p.distributions_bernoulli(0.5, engine=d4p.engines_mt19937(seed=777)) @@ -31,11 +31,7 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(data, res.randomNumbers) assert np.allclose( - data, - [[ - 1.0, 1.000, 1.000, 0.000, 1.000, - 0.000, 1.000, 0.000, 1.000, 0.000 - ]] + data, [[1.0, 1.000, 1.000, 0.000, 1.000, 0.000, 1.000, 0.000, 1.000, 0.000]] ) return data diff --git a/examples/daal4py/distributions_normal_batch.py b/examples/daal4py/distributions_normal_batch.py index 8027087f6c..7a7aa3ea4b 100644 --- a/examples/daal4py/distributions_normal_batch.py +++ b/examples/daal4py/distributions_normal_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,16 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py normal distribution example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p -def main(readcsv=None, method='defaultDense'): +def main(readcsv=None, method="defaultDense"): # Create algorithm algorithm = d4p.distributions_normal(engine=d4p.engines_mt19937(seed=777)) @@ -32,10 +32,20 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(data, res.randomNumbers) assert np.allclose( data, - [[ - -0.74104167, -0.13616829, -0.13679562, 2.40385531, -0.33556821, - 0.19041699, -0.61331181, 0.95958821, -0.42301092, 0.09460208 - ]] + [ + [ + -0.74104167, + -0.13616829, + -0.13679562, + 2.40385531, + -0.33556821, + 0.19041699, + -0.61331181, + 0.95958821, + -0.42301092, + 0.09460208, + ] + ], ) return data diff --git a/examples/daal4py/distributions_uniform_batch.py b/examples/daal4py/distributions_uniform_batch.py index 4341de87f6..fdccedc908 100644 --- a/examples/daal4py/distributions_uniform_batch.py +++ b/examples/daal4py/distributions_uniform_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,16 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py uniform distribution example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p -def main(readcsv=None, method='defaultDense'): +def main(readcsv=None, method="defaultDense"): # Create algorithm algorithm = d4p.distributions_uniform(engine=d4p.engines_mt19937(seed=777)) @@ -32,10 +32,20 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(data, res.randomNumbers) assert np.allclose( data, - [[ - 0.22933409, 0.44584412, 0.44559617, 0.9918884, 0.36859825, - 0.57550881, 0.26983509, 0.83136875, 0.33614365, 0.53768455, - ]] + [ + [ + 0.22933409, + 0.44584412, + 0.44559617, + 0.9918884, + 0.36859825, + 0.57550881, + 0.26983509, + 0.83136875, + 0.33614365, + 0.53768455, + ] + ], ) return data diff --git a/examples/daal4py/elastic_net_batch.py b/examples/daal4py/elastic_net_batch.py index 1f86a6b10c..90af8b9b76 100644 --- a/examples/daal4py/elastic_net_batch.py +++ b/examples/daal4py/elastic_net_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Elastic Net example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -63,7 +65,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nElastic Net prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/em_gmm_batch.py b/examples/daal4py/em_gmm_batch.py index f27b7da4ab..8118aec526 100644 --- a/examples/daal4py/em_gmm_batch.py +++ b/examples/daal4py/em_gmm_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py em_gmm example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nComponents = 2 infile = "./data/batch/em_gmm.csv" # We load the data @@ -59,4 +61,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("Means:\n", res.means) for c in res.covariances: print("Covariance:\n", c) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_classification_batch.py b/examples/daal4py/gradient_boosted_classification_batch.py index bfdebf48a8..77a8d99ac6 100644 --- a/examples/daal4py/gradient_boosted_classification_batch.py +++ b/examples/daal4py/gradient_boosted_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Classification example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 3 nClasses = 5 maxIterations = 200 @@ -46,7 +48,7 @@ def main(readcsv=read_csv, method='defaultDense'): maxIterations=maxIterations, minObservationsInLeafNode=minObservationsInLeafNode, featuresPerNode=nFeatures, - varImportance='weight|totalCover|cover|totalGain|gain' + varImportance="weight|totalCover|cover|totalGain|gain", ) # Read data. Let's use 3 features per observation @@ -58,7 +60,7 @@ def main(readcsv=read_csv, method='defaultDense'): # previous version has different interface predict_algo = d4p.gbt_classification_prediction( nClasses=nClasses, - resultsToEvaluate="computeClassLabels|computeClassProbabilities" + resultsToEvaluate="computeClassLabels|computeClassProbabilities", ) # read test data (with same #features) pdata = readcsv(testfile, range(3), t=np.float32) @@ -76,22 +78,20 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, plabels) = main() print( "\nGradient boosted trees prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) print( "\nGradient boosted trees prediction probabilities (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nvariableImportanceByWeight:\n", train_result.variableImportanceByWeight) print( - "\nvariableImportanceByTotalCover:\n", - train_result.variableImportanceByTotalCover + "\nvariableImportanceByTotalCover:\n", train_result.variableImportanceByTotalCover ) print("\nvariableImportanceByCover:\n", train_result.variableImportanceByCover) print( - "\nvariableImportanceByTotalGain:\n", - train_result.variableImportanceByTotalGain + "\nvariableImportanceByTotalGain:\n", train_result.variableImportanceByTotalGain ) print("\nvariableImportanceByGain:\n", train_result.variableImportanceByGain) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_classification_traverse_batch.py b/examples/daal4py/gradient_boosted_classification_traverse_batch.py index 8f8c1330f4..9f0d5a5824 100644 --- a/examples/daal4py/gradient_boosted_classification_traverse_batch.py +++ b/examples/daal4py/gradient_boosted_classification_traverse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Boosting Classification Tree Traversal example import math -import daal4py as d4p from gradient_boosted_classification_batch import main as gbt_classification +import daal4py as d4p + def printTree(nodes, values): def printNodes(node_id, nodes, values, level): @@ -51,5 +52,5 @@ def printNodes(node_id, nodes, values, level): for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId, 5) printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_regression_batch.py b/examples/daal4py/gradient_boosted_regression_batch.py index cfd49ec9ed..4292aceae9 100644 --- a/examples/daal4py/gradient_boosted_regression_batch.py +++ b/examples/daal4py/gradient_boosted_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): maxIterations = 200 # input data file @@ -54,16 +56,17 @@ def main(readcsv=read_csv, method='defaultDense'): predict_result = predict_algo.compute(pdata, train_result.model) # Prediction result provides prediction - ptdata = np.loadtxt(testfile, usecols=range(13, 14), - delimiter=',', ndmin=2, dtype=np.float32) + ptdata = np.loadtxt( + testfile, usecols=range(13, 14), delimiter=",", ndmin=2, dtype=np.float32 + ) # ptdata = np.loadtxt('../tests/unittest_data/gradient_boosted_regression_batch.csv', # delimiter=',', ndmin=2, dtype=np.float32) - if hasattr(ptdata, 'toarray'): + if hasattr(ptdata, "toarray"): ptdata = ptdata.toarray() # to make the next assertion work with scipy's csr_matrix - assert True or \ - np.square(predict_result.prediction - ptdata).mean() < 1e-2, \ - np.square(predict_result.prediction - ptdata).mean() + assert True or np.square(predict_result.prediction - ptdata).mean() < 1e-2, np.square( + predict_result.prediction - ptdata + ).mean() return (train_result, predict_result, ptdata) @@ -72,7 +75,7 @@ def main(readcsv=read_csv, method='defaultDense'): (train_result, predict_result, ptdata) = main() print( "\nGradient boosted trees prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/gradient_boosted_regression_traverse_batch.py b/examples/daal4py/gradient_boosted_regression_traverse_batch.py index 95dd718451..e00d172f8c 100644 --- a/examples/daal4py/gradient_boosted_regression_traverse_batch.py +++ b/examples/daal4py/gradient_boosted_regression_traverse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Boosting Regression Tree Traversal example import math -import daal4py as d4p from gradient_boosted_regression_batch import main as gbt_regression +import daal4py as d4p + def printTree(nodes, values): def printNodes(node_id, nodes, values, level): @@ -51,5 +52,5 @@ def printNodes(node_id, nodes, values, level): for treeId in range(train_result.model.NumberOfTrees): treeState = d4p.getTreeState(train_result.model, treeId) printTree(treeState.node_ar, treeState.value_ar) - print('Traversed {} trees.'.format(train_result.model.NumberOfTrees)) - print('All looks good!') + print("Traversed {} trees.".format(train_result.model.NumberOfTrees)) + print("All looks good!") diff --git a/examples/daal4py/implicit_als_batch.py b/examples/daal4py/implicit_als_batch.py index b210e35ec0..8750e72512 100644 --- a/examples/daal4py/implicit_als_batch.py +++ b/examples/daal4py/implicit_als_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py implicit_als example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFactors = 2 infile = "./data/batch/implicit_als_dense.csv" # We load the data @@ -61,4 +63,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": res = main() print("Predicted ratings:\n", res.prediction[:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/kdtree_knn_classification_batch.py b/examples/daal4py/kdtree_knn_classification_batch.py index 638147feb6..16cfdec5c3 100644 --- a/examples/daal4py/kdtree_knn_classification_batch.py +++ b/examples/daal4py/kdtree_knn_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,30 +12,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py KD-Tree KNN example for shared memory systems -import daal4py as d4p -import numpy as np import os +import numpy as np + +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input data set parameters - train_file = os.path.join('data', 'batch', 'k_nearest_neighbors_train.csv') - predict_file = os.path.join('data', 'batch', 'k_nearest_neighbors_test.csv') + train_file = os.path.join("data", "batch", "k_nearest_neighbors_train.csv") + predict_file = os.path.join("data", "batch", "k_nearest_neighbors_test.csv") # Read data. Let's use 5 features per observation nFeatures = 5 @@ -69,6 +72,5 @@ def main(readcsv=read_csv, method='defaultDense'): print("KD-tree based kNN classification results:") print("Ground truth(observations #30-34):\n", predict_labels[30:35]) print( - "Classification results(observations #30-34):\n", - predict_result.prediction[30:35] + "Classification results(observations #30-34):\n", predict_result.prediction[30:35] ) diff --git a/examples/daal4py/kmeans_batch.py b/examples/daal4py/kmeans_batch.py index 4df52858c0..565de34715 100644 --- a/examples/daal4py/kmeans_batch.py +++ b/examples/daal4py/kmeans_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py K-Means example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/kmeans_dense.csv" nClusters = 20 maxIter = 5 @@ -68,4 +70,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nFirst 10 cluster assignments:\n", result.assignments[0:10]) print("\nFirst 10 dimensions of centroids:\n", result.centroids[:, 0:10]) print("\nObjective function value:\n", result.objectiveFunction) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/kmeans_spmd.py b/examples/daal4py/kmeans_spmd.py index 766c702519..9d68000d2c 100644 --- a/examples/daal4py/kmeans_spmd.py +++ b/examples/daal4py/kmeans_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,17 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py K-Means example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./kmeans_spmd.py -import daal4py as d4p from numpy import loadtxt +import daal4py as d4p + -def main(method='plusPlusDense'): +def main(method="plusPlusDense"): infile = "./data/distributed/kmeans_dense.csv" nClusters = 10 maxIter = 25 @@ -30,11 +31,11 @@ def main(method='plusPlusDense'): # configure a kmeans-init init_algo = d4p.kmeans_init(nClusters, method=method, distributed=True) # Load the data - data = loadtxt(infile, delimiter=',') + data = loadtxt(infile, delimiter=",") # now slice the data, # it would have been better to read only what we need, of course... rpp = int(data.shape[0] / d4p.num_procs()) - data = data[rpp * d4p.my_procid(): rpp * d4p.my_procid() + rpp, :] + data = data[rpp * d4p.my_procid() : rpp * d4p.my_procid() + rpp, :] # compute initial centroids init_result = init_algo.compute(data) @@ -78,5 +79,5 @@ def main(method='plusPlusDense'): print("\nFirst 10 cluster assignments:\n", assignments[0:10]) print("\nFirst 10 dimensions of centroids:\n", result.centroids[:, 0:10]) print("\nObjective function value:\n", result.objectiveFunction) - print('All looks good!') + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/lasso_regression_batch.py b/examples/daal4py/lasso_regression_batch.py index c653e2ea31..6d8ea91f81 100644 --- a/examples/daal4py/lasso_regression_batch.py +++ b/examples/daal4py/lasso_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Lasso Regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -58,7 +60,7 @@ def main(readcsv=read_csv, method='defaultDense'): # the example is used in tests with the scipy.sparse matrix # we use this trick until subtracting a sparse matrix is not supported - if hasattr(ptdata, 'toarray'): + if hasattr(ptdata, "toarray"): ptdata = ptdata.toarray() # this assertion is outdated, will be fixed in next release # assert np.square(predict_result.prediction - np.asarray(ptdata)).mean() < 2.2 @@ -70,7 +72,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nLasso Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/lbfgs_cr_entr_loss_batch.py b/examples/daal4py/lbfgs_cr_entr_loss_batch.py index 9700603400..329b7ec283 100644 --- a/examples/daal4py/lbfgs_cr_entr_loss_batch.py +++ b/examples/daal4py/lbfgs_cr_entr_loss_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,28 +12,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py LBFGS (limited memory Broyden-Fletcher-Goldfarb-Shanno) # example for shared memory systems # using cross entropy loss function -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 6 nClasses = 5 nIterations = 1000 @@ -47,15 +49,16 @@ def main(readcsv=read_csv, method='defaultDense'): nVectors = data.shape[0] # configure a function - func = d4p.optimization_solver_cross_entropy_loss(nClasses, nVectors, - interceptFlag=True) + func = d4p.optimization_solver_cross_entropy_loss( + nClasses, nVectors, interceptFlag=True + ) func.setup(data, dep_data) # configure a algorithm stepLengthSequence = np.array([[stepLength]], dtype=np.double) - alg = d4p.optimization_solver_lbfgs(func, - stepLengthSequence=stepLengthSequence, - nIterations=nIterations) + alg = d4p.optimization_solver_lbfgs( + func, stepLengthSequence=stepLengthSequence, nIterations=nIterations + ) # do the computation nParameters = nClasses * (nFeatures + 1) @@ -75,15 +78,45 @@ def main(readcsv=read_csv, method='defaultDense'): "\nExpected coefficients:\n", np.array( [ - [-2.277], [2.836], [14.985], [0.511], [7.510], [-2.831], [-5.814], - [-0.033], [13.227], [-24.447], [3.730], [10.394], [-10.461], [-0.766], - [0.077], [1.558], [-1.133], [2.884], [-3.825], [7.699], [2.421], - [-0.135], [-6.996], [1.785], [-2.294], [-9.819], [1.692], [-0.725], - [0.069], [-8.41], [1.458], [-3.306], [-4.719], [5.507], [-1.642] + [-2.277], + [2.836], + [14.985], + [0.511], + [7.510], + [-2.831], + [-5.814], + [-0.033], + [13.227], + [-24.447], + [3.730], + [10.394], + [-10.461], + [-0.766], + [0.077], + [1.558], + [-1.133], + [2.884], + [-3.825], + [7.699], + [2.421], + [-0.135], + [-6.996], + [1.785], + [-2.294], + [-9.819], + [1.692], + [-0.725], + [0.069], + [-8.41], + [1.458], + [-3.306], + [-4.719], + [5.507], + [-1.642], ], - dtype=np.double - ) + dtype=np.double, + ), ) print("\nResulting coefficients:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/lbfgs_mse_batch.py b/examples/daal4py/lbfgs_mse_batch.py index 49583131f0..9c5e1fd0e6 100644 --- a/examples/daal4py/lbfgs_mse_batch.py +++ b/examples/daal4py/lbfgs_mse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,28 +12,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py LBFGS (limited memory Broyden-Fletcher-Goldfarb-Shanno) # example for shared memory systems # using Mean Squared Error objective function -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/lbfgs.csv" # Read the data, let's have 10 independent variables data = readcsv(infile, range(10)) @@ -47,9 +49,9 @@ def main(readcsv=read_csv, method='defaultDense'): # configure an LBFGS object sls = np.array([[1.0e-4]], dtype=np.double) niters = 1000 - lbfgs_algo = d4p.optimization_solver_lbfgs(mse_algo, - stepLengthSequence=sls, - nIterations=niters) + lbfgs_algo = d4p.optimization_solver_lbfgs( + mse_algo, stepLengthSequence=sls, nIterations=niters + ) # finally do the computation inp = np.array([[100]] * 11, dtype=np.double) @@ -66,10 +68,9 @@ def main(readcsv=read_csv, method='defaultDense'): print( "\nExpected coefficients:\n", np.array( - [[11], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10]], - dtype=np.double - ) + [[11], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10]], dtype=np.double + ), ) print("\nResulting coefficients:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/linear_regression_batch.py b/examples/daal4py/linear_regression_batch.py index 8c139a3adb..cc66848eab 100644 --- a/examples/daal4py/linear_regression_batch.py +++ b/examples/daal4py/linear_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Linear Regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -64,7 +66,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLinear Regression coefficients:\n", train_result.model.Beta) print( "\nLinear Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/linear_regression_spmd.py b/examples/daal4py/linear_regression_spmd.py index 7512d3f3ef..34789506d6 100644 --- a/examples/daal4py/linear_regression_spmd.py +++ b/examples/daal4py/linear_regression_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,30 +12,32 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Linear Regression example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./linreg_spmd.py -import daal4py as d4p from numpy import loadtxt +import daal4py as d4p + if __name__ == "__main__": # Initialize SPMD mode d4p.daalinit() # Each process gets its own data - infile = "./data/distributed/linear_regression_train_" + \ - str(d4p.my_procid() + 1) + ".csv" + infile = ( + "./data/distributed/linear_regression_train_" + str(d4p.my_procid() + 1) + ".csv" + ) # Configure a Linear regression training object train_algo = d4p.linear_regression_training(distributed=True) # Read data. Let's have 10 independent, # and 2 dependent variables (for each observation) - indep_data = loadtxt(infile, delimiter=',', usecols=range(10)) - dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12)) + indep_data = loadtxt(infile, delimiter=",", usecols=range(10)) + dep_data = loadtxt(infile, delimiter=",", usecols=range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) @@ -44,14 +46,18 @@ if d4p.my_procid() == 0: predict_algo = d4p.linear_regression_prediction() # read test data (with same #features) - pdata = loadtxt("./data/distributed/linear_regression_test.csv", - delimiter=',', usecols=range(10)) + pdata = loadtxt( + "./data/distributed/linear_regression_test.csv", + delimiter=",", + usecols=range(10), + ) # now predict using the model from the training above - predict_result = d4p.linear_regression_prediction().compute(pdata, - train_result.model) + predict_result = d4p.linear_regression_prediction().compute( + pdata, train_result.model + ) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - print('All looks good!') + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/linear_regression_streaming.py b/examples/daal4py/linear_regression_streaming.py index f106867017..61fbcb1994 100644 --- a/examples/daal4py/linear_regression_streaming.py +++ b/examples/daal4py/linear_regression_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,24 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Linear Regression example for streaming on shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, s=0, n=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, - skiprows=s, nrows=n, dtype=t) + return pandas.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) + except: # fall back to numpy genfromtxt def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: @@ -37,7 +40,7 @@ def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): return a -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -81,7 +84,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLinear Regression coefficients:\n", train_result.model.Beta) print( "\nLinear Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/log_reg_binary_dense_batch.py b/examples/daal4py/log_reg_binary_dense_batch.py index 3a7a33fe1d..794f4dbf0e 100644 --- a/examples/daal4py/log_reg_binary_dense_batch.py +++ b/examples/daal4py/log_reg_binary_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py logistic regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nClasses = 2 nFeatures = 20 @@ -54,8 +56,10 @@ def main(readcsv=read_csv, method='defaultDense'): predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction - assert predict_result.prediction.shape == (predict_data.shape[0], - train_labels.shape[1]) + assert predict_result.prediction.shape == ( + predict_data.shape[0], + train_labels.shape[1], + ) return (train_result, predict_result, predict_labels) @@ -65,7 +69,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLogistic Regression coefficients:\n", train_result.model.Beta) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", predict_labels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/log_reg_dense_batch.py b/examples/daal4py/log_reg_dense_batch.py index 53f9069622..44c548e650 100644 --- a/examples/daal4py/log_reg_dense_batch.py +++ b/examples/daal4py/log_reg_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py logistic regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nClasses = 5 nFeatures = 6 @@ -41,10 +43,9 @@ def main(readcsv=read_csv, method='defaultDense'): train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1)) # set parameters and train - train_alg = d4p.logistic_regression_training(nClasses=nClasses, - penaltyL1=0.1, - penaltyL2=0.1, - interceptFlag=True) + train_alg = d4p.logistic_regression_training( + nClasses=nClasses, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True + ) train_result = train_alg.compute(train_data, train_labels) # read testing data from file with 6 features per observation @@ -52,24 +53,23 @@ def main(readcsv=read_csv, method='defaultDense'): predict_data = readcsv(testfile, range(nFeatures)) # set parameters and compute predictions - predict_alg = \ - d4p.logistic_regression_prediction( - nClasses=nClasses, - resultsToEvaluate="computeClassLabels|computeClassProbabilities|" - "computeClassLogProbabilities" - ) + predict_alg = d4p.logistic_regression_prediction( + nClasses=nClasses, + resultsToEvaluate="computeClassLabels|computeClassProbabilities|" + "computeClassLogProbabilities", + ) predict_result = predict_alg.compute(predict_data, train_result.model) # the prediction result provides prediction, probabilities and logProbabilities assert predict_result.probabilities.shape == (predict_data.shape[0], nClasses) assert predict_result.logProbabilities.shape == (predict_data.shape[0], nClasses) predict_labels = np.loadtxt( - testfile, - usecols=range(nFeatures, nFeatures + 1), - delimiter=',', - ndmin=2 + testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=",", ndmin=2 + ) + assert ( + np.count_nonzero(predict_result.prediction - predict_labels) + / predict_labels.shape[0] + < 0.025 ) - assert np.count_nonzero(predict_result.prediction - predict_labels) \ - / predict_labels.shape[0] < 0.025 return (train_result, predict_result, predict_labels) @@ -79,15 +79,15 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLogistic Regression coefficients:\n", train_result.model.Beta) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", predict_labels[0:10]) print( "\nLogistic regression prediction probabilities (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print( "\nLogistic regression prediction log probabilities (first 10 rows):\n", - predict_result.logProbabilities[0:10] + predict_result.logProbabilities[0:10], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/log_reg_model_builder.py b/examples/daal4py/log_reg_model_builder.py index e3b6519c99..c9962b1816 100644 --- a/examples/daal4py/log_reg_model_builder.py +++ b/examples/daal4py/log_reg_model_builder.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,14 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== -import daal4py as d4p import numpy as np -from daal4py.sklearn._utils import daal_check_version from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression +import daal4py as d4p +from daal4py.sklearn._utils import daal_check_version + def main(): X, y = load_iris(return_X_y=True) @@ -28,15 +29,15 @@ def main(): # set parameters and train clf = LogisticRegression(fit_intercept=True, max_iter=1000, random_state=0).fit(X, y) - #set parameters and call model builder - builder = d4p.logistic_regression_model_builder(n_classes=n_classes, - n_features=X.shape[1]) + # set parameters and call model builder + builder = d4p.logistic_regression_model_builder( + n_classes=n_classes, n_features=X.shape[1] + ) builder.set_beta(clf.coef_, clf.intercept_) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction( - nClasses=n_classes, - resultsToEvaluate="computeClassLabels" + nClasses=n_classes, resultsToEvaluate="computeClassLabels" ) # set parameters and compute predictions predict_result_daal = predict_alg.compute(X, builder.model) @@ -46,19 +47,19 @@ def main(): if __name__ == "__main__": - if daal_check_version(((2021, 'P', 1))): + if daal_check_version(((2021, "P", 1))): (builder, predict_result_daal) = main() print("\nLogistic Regression coefficients:\n", builder.model) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result_daal.prediction[0:10] + predict_result_daal.prediction[0:10], ) print( "\nLogistic regression prediction probabilities (first 10 rows):\n", - predict_result_daal.probabilities[0:10] + predict_result_daal.probabilities[0:10], ) print( "\nLogistic regression prediction log probabilities (first 10 rows):\n", - predict_result_daal.logProbabilities[0:10] + predict_result_daal.logProbabilities[0:10], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/logitboost_batch.py b/examples/daal4py/logitboost_batch.py index ffa3d339a3..cd038ac44e 100644 --- a/examples/daal4py/logitboost_batch.py +++ b/examples/daal4py/logitboost_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,33 +12,36 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Logitboost example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/logitboost_train.csv" testfile = "./data/batch/logitboost_test.csv" nClasses = 5 # Configure a logitboost training object - train_algo = d4p.logitboost_training(nClasses, maxIterations=100, - accuracyThreshold=0.01) + train_algo = d4p.logitboost_training( + nClasses, maxIterations=100, accuracyThreshold=0.01 + ) # Read data. Let's have 20 independent, # and 1 dependent variable (for each observation) @@ -56,7 +59,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -66,6 +69,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Logitboost classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/low_order_moms_dense_batch.py b/examples/daal4py/low_order_moms_dense_batch.py index f6f8315716..6846fe204c 100644 --- a/examples/daal4py/low_order_moms_dense_batch.py +++ b/examples/daal4py/low_order_moms_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,23 +12,25 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) def main(readcsv=read_csv, method="defaultDense"): @@ -42,9 +44,21 @@ def main(readcsv=read_csv, method="defaultDense"): # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation - assert all(getattr(res, name).shape == (1, data.shape[1]) for name in - ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']) + assert all( + getattr(res, name).shape == (1, data.shape[1]) + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ] + ) return res @@ -62,4 +76,4 @@ def main(readcsv=read_csv, method="defaultDense"): print("\nVariance:\n", res.variance) print("\nStandard deviation:\n", res.standardDeviation) print("\nVariation:\n", res.variation) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/low_order_moms_spmd.py b/examples/daal4py/low_order_moms_spmd.py index 132bc49275..0cd4102cfb 100644 --- a/examples/daal4py/low_order_moms_spmd.py +++ b/examples/daal4py/low_order_moms_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,16 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./low_order_moms_spmd.py -import daal4py as d4p - # let's use a reading of file in chunks (defined in spmd_utils.py) -from spmd_utils import read_csv, get_chunk_params +from spmd_utils import get_chunk_params, read_csv + +import daal4py as d4p def main(): @@ -29,24 +29,36 @@ def main(): # We know the number of lines in the file # and use this to separate data between processes - skiprows, nrows = get_chunk_params(lines_count=200, - chunks_count=d4p.num_procs(), - chunk_number=d4p.my_procid()) + skiprows, nrows = get_chunk_params( + lines_count=200, chunks_count=d4p.num_procs(), chunk_number=d4p.my_procid() + ) # Each process reads its chunk of the file data = read_csv(infile, sr=skiprows, nr=nrows) # Create algorithm with distributed mode - alg = d4p.low_order_moments(method='defaultDense', distributed=True) + alg = d4p.low_order_moments(method="defaultDense", distributed=True) # Perform computation res = alg.compute(data) # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation - assert all(getattr(res, name).shape == (1, data.shape[1]) for name in - ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']) + assert all( + getattr(res, name).shape == (1, data.shape[1]) + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ] + ) return res diff --git a/examples/daal4py/low_order_moms_streaming.py b/examples/daal4py/low_order_moms_streaming.py index 8eb28984e5..00487812e2 100644 --- a/examples/daal4py/low_order_moms_streaming.py +++ b/examples/daal4py/low_order_moms_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,24 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for streaming on shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, s=0, n=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, - skiprows=s, nrows=n, dtype=t) + return pandas.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) + except: # fall back to numpy genfromtxt def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: @@ -37,7 +40,7 @@ def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): return a -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # read data from file file = "./data/batch/covcormoments_dense.csv" @@ -78,4 +81,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nVariance:\n", res.variance) print("\nStandard deviation:\n", res.standardDeviation) print("\nVariation:\n", res.variation) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/model_builders_catboost.py b/examples/daal4py/model_builders_catboost.py index ba497fef87..add5300388 100644 --- a/examples/daal4py/model_builders_catboost.py +++ b/examples/daal4py/model_builders_catboost.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,18 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Classification model creation from Catboost example -import daal4py as d4p import catboost as cb import numpy as np import pandas as pd +import daal4py as d4p + def pd_read_csv(f, c=None, t=np.float64): - return pd.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pd.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) def main(readcsv=pd_read_csv): @@ -43,14 +44,14 @@ def main(readcsv=pd_read_csv): # training parameters setting params = { - 'reg_lambda': 1, - 'max_depth': 6, - 'num_leaves': 2**6, - 'verbose': 0, - 'objective': 'MultiClass', - 'learning_rate': 0.3, - 'n_estimators': 25, - 'classes_count': 5, + "reg_lambda": 1, + "max_depth": 6, + "num_leaves": 2**6, + "verbose": 0, + "objective": "MultiClass", + "learning_rate": 0.3, + "n_estimators": 25, + "classes_count": 5, } # Training @@ -58,7 +59,7 @@ def main(readcsv=pd_read_csv): cb_model.fit(cb_train) # Catboost prediction - cb_prediction = cb_model.predict(cb_test, prediction_type='Class').T[0] + cb_prediction = cb_model.predict(cb_test, prediction_type="Class").T[0] cb_errors_count = np.count_nonzero(cb_prediction - np.ravel(y_test)) # Conversion to daal4py @@ -69,24 +70,24 @@ def main(readcsv=pd_read_csv): daal_errors_count = np.count_nonzero(daal_prediction - np.ravel(y_test)) assert np.absolute(cb_errors_count - daal_errors_count) == 0 - return (cb_prediction, cb_errors_count, daal_prediction, - daal_errors_count, np.ravel(y_test)) + return ( + cb_prediction, + cb_errors_count, + daal_prediction, + daal_errors_count, + np.ravel(y_test), + ) if __name__ == "__main__": - (cb_prediction, cb_errors_count, - daal_prediction, daal_errors_count, y_test) = main() - print("\nCatboost prediction results (first 10 rows):\n", - cb_prediction[0:10]) - print("\ndaal4py prediction results (first 10 rows):\n", - daal_prediction[0:10]) + (cb_prediction, cb_errors_count, daal_prediction, daal_errors_count, y_test) = main() + print("\nCatboost prediction results (first 10 rows):\n", cb_prediction[0:10]) + print("\ndaal4py prediction results (first 10 rows):\n", daal_prediction[0:10]) print("\nGround truth (first 10 rows):\n", y_test[0:10]) print("Catboost errors count:", cb_errors_count) - print("Catboost accuracy score:", - 1 - cb_errors_count / cb_prediction.shape[0]) + print("Catboost accuracy score:", 1 - cb_errors_count / cb_prediction.shape[0]) print("\ndaal4py errors count:", daal_errors_count) - print("daal4py accuracy score:", - 1 - daal_errors_count / daal_prediction.shape[0]) + print("daal4py accuracy score:", 1 - daal_errors_count / daal_prediction.shape[0]) print("\nAll looks good!") diff --git a/examples/daal4py/model_builders_lightgbm.py b/examples/daal4py/model_builders_lightgbm.py index b8b5279496..44bc6a5428 100644 --- a/examples/daal4py/model_builders_lightgbm.py +++ b/examples/daal4py/model_builders_lightgbm.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,18 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Classification model creation from LightGBM example -import daal4py as d4p import lightgbm as lgb import numpy as np import pandas as pd +import daal4py as d4p + def pd_read_csv(f, c=None, t=np.float64): - return pd.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pd.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) def main(readcsv=pd_read_csv): @@ -39,24 +40,22 @@ def main(readcsv=pd_read_csv): # Datasets creation lgb_train = lgb.Dataset( - X_train, - np.array(y_train).reshape(X_train.shape[0]), - free_raw_data=False + X_train, np.array(y_train).reshape(X_train.shape[0]), free_raw_data=False ) # training parameters setting params = { - 'max_bin': 256, - 'scale_pos_weight': 2, - 'lambda_l2': 1, - 'alpha': 0.9, - 'max_depth': 6, - 'num_leaves': 2**6, - 'verbose': -1, - 'objective': 'multiclass', - 'learning_rate': 0.3, - 'num_class': 5, - 'n_estimators': 25 + "max_bin": 256, + "scale_pos_weight": 2, + "lambda_l2": 1, + "alpha": 0.9, + "max_depth": 6, + "num_leaves": 2**6, + "verbose": -1, + "objective": "multiclass", + "learning_rate": 0.3, + "num_class": 5, + "n_estimators": 25, } # Training @@ -74,13 +73,23 @@ def main(readcsv=pd_read_csv): daal_errors_count = np.count_nonzero(daal_prediction - np.ravel(y_test)) assert np.absolute(lgb_errors_count - daal_errors_count) == 0 - return (lgb_prediction, lgb_errors_count, daal_prediction, - daal_errors_count, np.ravel(y_test)) + return ( + lgb_prediction, + lgb_errors_count, + daal_prediction, + daal_errors_count, + np.ravel(y_test), + ) if __name__ == "__main__": - (lgb_prediction, lgb_errors_count, daal_prediction, - daal_errors_count, y_test) = main() + ( + lgb_prediction, + lgb_errors_count, + daal_prediction, + daal_errors_count, + y_test, + ) = main() print("\nLightGBM prediction results (first 10 rows):\n", lgb_prediction[0:10]) print("\ndaal4py prediction results (first 10 rows):\n", daal_prediction[0:10]) print("\nGround truth (first 10 rows):\n", y_test[0:10]) diff --git a/examples/daal4py/model_builders_xgboost.py b/examples/daal4py/model_builders_xgboost.py index 2c29327df5..962a345dd1 100644 --- a/examples/daal4py/model_builders_xgboost.py +++ b/examples/daal4py/model_builders_xgboost.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,18 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Classification model creation from XGBoost example -import daal4py as d4p -import xgboost as xgb import numpy as np import pandas as pd +import xgboost as xgb + +import daal4py as d4p def pd_read_csv(f, c=None, t=np.float64): - return pd.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pd.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) def main(readcsv=pd_read_csv): @@ -43,17 +44,17 @@ def main(readcsv=pd_read_csv): # training parameters setting params = { - 'max_bin': 256, - 'scale_pos_weight': 2, - 'lambda_l2': 1, - 'alpha': 0.9, - 'max_depth': 6, - 'num_leaves': 2**6, - 'verbosity': 0, - 'objective': 'multi:softmax', - 'learning_rate': 0.3, - 'num_class': 5, - 'n_estimators': 25, + "max_bin": 256, + "scale_pos_weight": 2, + "lambda_l2": 1, + "alpha": 0.9, + "max_depth": 6, + "num_leaves": 2**6, + "verbosity": 0, + "objective": "multi:softmax", + "learning_rate": 0.3, + "num_class": 5, + "n_estimators": 25, } # Training @@ -71,13 +72,23 @@ def main(readcsv=pd_read_csv): daal_errors_count = np.count_nonzero(daal_prediction - np.ravel(y_test)) assert np.absolute(xgb_errors_count - daal_errors_count) == 0 - return (xgb_prediction, xgb_errors_count, daal_prediction, - daal_errors_count, np.ravel(y_test)) + return ( + xgb_prediction, + xgb_errors_count, + daal_prediction, + daal_errors_count, + np.ravel(y_test), + ) if __name__ == "__main__": - (xgb_prediction, xgb_errors_count, - daal_prediction, daal_errors_count, y_test) = main() + ( + xgb_prediction, + xgb_errors_count, + daal_prediction, + daal_errors_count, + y_test, + ) = main() print("\nXGBoost prediction results (first 10 rows):\n", xgb_prediction[0:10]) print("\ndaal4py prediction results (first 10 rows):\n", daal_prediction[0:10]) print("\nGround truth (first 10 rows):\n", y_test[0:10]) diff --git a/examples/daal4py/multivariate_outlier_batch.py b/examples/daal4py/multivariate_outlier_batch.py index 3705b03699..9b6d139c0f 100644 --- a/examples/daal4py/multivariate_outlier_batch.py +++ b/examples/daal4py/multivariate_outlier_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py outlier detection multivariate example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input file infile = "./data/batch/outlierdetection.csv" @@ -55,4 +57,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nInput data\n", data) print("\nOutlier detection result (Default method) weights:\n", res.weights) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/naive_bayes_batch.py b/examples/daal4py/naive_bayes_batch.py index 4300485799..034bf0e29f 100644 --- a/examples/daal4py/naive_bayes_batch.py +++ b/examples/daal4py/naive_bayes_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Naive Bayes Classification example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/naivebayes_train_dense.csv" testfile = "./data/batch/naivebayes_test_dense.csv" @@ -62,7 +64,7 @@ def main(readcsv=read_csv, method='defaultDense'): (presult, plabels) = main() print( "\nNaiveBayes classification results (first 20 observations):\n", - presult.prediction[0:20] + presult.prediction[0:20], ) print("\nGround truth (first 20 observations)\n", plabels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/naive_bayes_spmd.py b/examples/daal4py/naive_bayes_spmd.py index aef97e8a4a..ed6bf7f4f3 100644 --- a/examples/daal4py/naive_bayes_spmd.py +++ b/examples/daal4py/naive_bayes_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,15 +12,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Naive Bayes Classification example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./naive_bayes_spmd.py -import daal4py as d4p from numpy import loadtxt +import daal4py as d4p + if __name__ == "__main__": # Initialize SPMD mode d4p.daalinit() @@ -32,8 +33,8 @@ talgo = d4p.multinomial_naive_bayes_training(20, distributed=True) # Read data. Let's use 20 features per observation - data = loadtxt(infile, delimiter=',', usecols=range(20)) - labels = loadtxt(infile, delimiter=',', usecols=range(20, 21)) + data = loadtxt(infile, delimiter=",", usecols=range(20)) + labels = loadtxt(infile, delimiter=",", usecols=range(20, 21)) labels.shape = (labels.size, 1) # must be a 2d array tresult = talgo.compute(data, labels) @@ -42,14 +43,15 @@ if d4p.my_procid() == 0: palgo = d4p.multinomial_naive_bayes_prediction(20) # read test data (with same #features) - pdata = loadtxt("./data/batch/naivebayes_test_dense.csv", - delimiter=',', usecols=range(20)) + pdata = loadtxt( + "./data/batch/naivebayes_test_dense.csv", delimiter=",", usecols=range(20) + ) # now predict using the model from the training above presult = palgo.compute(pdata, tresult.model) # Prediction result provides prediction assert presult.prediction.shape == (pdata.shape[0], 1) - print('All looks good!') + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/naive_bayes_streaming.py b/examples/daal4py/naive_bayes_streaming.py index 14634d5c01..2d8a44da58 100644 --- a/examples/daal4py/naive_bayes_streaming.py +++ b/examples/daal4py/naive_bayes_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,24 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Naive Bayes Classification example for streaming on shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, s=0, n=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, - skiprows=s, nrows=n, dtype=t) + return pandas.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) + except: # fall back to numpy genfromtxt def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: @@ -37,7 +40,7 @@ def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): return a -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/naivebayes_train_dense.csv" testfile = "./data/batch/naivebayes_test_dense.csv" @@ -81,7 +84,7 @@ def main(readcsv=read_csv, method='defaultDense'): (result, labels) = main() print( "\nNaiveBayes classification results (first 20 observations):\n", - result.prediction[0:20] + result.prediction[0:20], ) print("\nGround truth (first 20 observations)\n", labels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/normalization_minmax_batch.py b/examples/daal4py/normalization_minmax_batch.py index 7743b6eb9e..41bb24ef2f 100644 --- a/examples/daal4py/normalization_minmax_batch.py +++ b/examples/daal4py/normalization_minmax_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py normalization minmax example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/normalization.csv" # configure a covariance object @@ -53,4 +55,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": res = main() print("MinMax result (first 5 rows):\n", res.normalizedData[:5]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/normalization_zscore_batch.py b/examples/daal4py/normalization_zscore_batch.py index 94bebb0f3e..51bd3fa493 100644 --- a/examples/daal4py/normalization_zscore_batch.py +++ b/examples/daal4py/normalization_zscore_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py normalization zscore example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/normalization.csv" # configure a covariance object @@ -56,4 +58,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": res = main() print("ZScore result (first 5 rows):\n", res.normalizedData[:5]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/pca_batch.py b/examples/daal4py/pca_batch.py index 3bf3f4bd49..008ce0cee8 100644 --- a/examples/daal4py/pca_batch.py +++ b/examples/daal4py/pca_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,34 +12,39 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): infile = "./data/batch/pca_normalized.csv" # 'normalization' is an optional parameter to PCA; # we use z-score which could be configured differently zscore = d4p.normalization_zscore() # configure a PCA object - algo = d4p.pca(resultsToCompute="mean|variance|eigenvalue", - isDeterministic=True, normalization=zscore) + algo = d4p.pca( + resultsToCompute="mean|variance|eigenvalue", + isDeterministic=True, + normalization=zscore, + ) # let's provide a file directly, not a table/array result1 = algo.compute(infile) @@ -67,4 +72,4 @@ def main(readcsv=read_csv, method='svdDense'): print("\nEigenvectors:\n", result1.eigenvectors) print("\nMeans:\n", result1.means) print("\nVariances:\n", result1.variances) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/pca_spmd.py b/examples/daal4py/pca_spmd.py index ed0ee05fd5..c6c615e968 100644 --- a/examples/daal4py/pca_spmd.py +++ b/examples/daal4py/pca_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,14 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./pca_spmd.py +from numpy import allclose, loadtxt + import daal4py as d4p -from numpy import loadtxt, allclose if __name__ == "__main__": # Initialize SPMD mode @@ -29,23 +30,27 @@ infile = "./data/distributed/pca_normalized_" + str(d4p.my_procid() + 1) + ".csv" # configure a PCA object to use svd instead of default correlation - algo = d4p.pca(method='svdDense', distributed=True) + algo = d4p.pca(method="svdDense", distributed=True) # let's provide a file directly, not a table/array result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array - data = loadtxt(infile, delimiter=',') + data = loadtxt(infile, delimiter=",") result2 = algo.compute(data) # PCA result objects provide eigenvalues, eigenvectors, means and variances assert allclose(result1.eigenvalues, result2.eigenvalues) assert allclose(result1.eigenvectors, result2.eigenvectors) - assert result1.means is None and \ - result2.means is None or \ - allclose(result1.means, result2.means) - assert result1.variances is None and \ - result2.variances is None or \ - allclose(result1.variances, result2.variances) - - print('All looks good!') + assert ( + result1.means is None + and result2.means is None + or allclose(result1.means, result2.means) + ) + assert ( + result1.variances is None + and result2.variances is None + or allclose(result1.variances, result2.variances) + ) + + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/pca_transform_batch.py b/examples/daal4py/pca_transform_batch.py index b96dd55fbd..8bf250bb6a 100644 --- a/examples/daal4py/pca_transform_batch.py +++ b/examples/daal4py/pca_transform_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): dataFileName = "data/batch/pca_transform.csv" nComponents = 2 @@ -44,8 +46,9 @@ def main(readcsv=read_csv, method='svdDense'): # Apply transform with whitening because means and eigenvalues are provided pcatrans_algo = d4p.pca_transform(nComponents=nComponents) - pcatrans_res = pcatrans_algo.compute(data, pca_res.eigenvectors, - pca_res.dataForTransform) + pcatrans_res = pcatrans_algo.compute( + data, pca_res.eigenvectors, pca_res.dataForTransform + ) # pca_transform_result objects provides transformedData return (pca_res, pcatrans_res) @@ -57,9 +60,9 @@ def main(readcsv=read_csv, method='svdDense'): # print PCA results print("\nEigenvalues:\n", pca_res.eigenvalues) print("\nEigenvectors:\n", pca_res.eigenvectors) - print("\nEigenvalues kv:\n", pca_res.dataForTransform['eigenvalue']) - print("\nMeans kv:\n", pca_res.dataForTransform['mean']) - print("\nVariances kv:\n", pca_res.dataForTransform['variance']) + print("\nEigenvalues kv:\n", pca_res.dataForTransform["eigenvalue"]) + print("\nMeans kv:\n", pca_res.dataForTransform["mean"]) + print("\nVariances kv:\n", pca_res.dataForTransform["variance"]) # print results of tranform print("\nTransformed data:", pcatrans_res.transformedData) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/pivoted_qr_batch.py b/examples/daal4py/pivoted_qr_batch.py index 06a55e2901..071d465253 100644 --- a/examples/daal4py/pivoted_qr_batch.py +++ b/examples/daal4py/pivoted_qr_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py pivoted QR example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): infile = "./data/batch/qr.csv" # configure a pivoted QR object @@ -53,4 +55,4 @@ def main(readcsv=read_csv, method='svdDense'): print("Orthogonal matrix Q (:10):\n", result.matrixQ[:10]) print("Triangular matrix R:\n", result.matrixR) print("\nPermutation matrix P:\n", result.permutationMatrix) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/qr_batch.py b/examples/daal4py/qr_batch.py index 8dfdc387db..b14a2ea437 100644 --- a/examples/daal4py/qr_batch.py +++ b/examples/daal4py/qr_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py QR example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='svdDense'): +def main(readcsv=read_csv, method="svdDense"): infile = "./data/batch/qr.csv" # configure a QR object @@ -51,7 +53,7 @@ def main(readcsv=read_csv, method='svdDense'): assert np.allclose(result1.matrixQ, result2.matrixQ, atol=1e-07) assert np.allclose(result1.matrixR, result2.matrixR, atol=1e-07) - if hasattr(data, 'toarray'): + if hasattr(data, "toarray"): data = data.toarray() # to make the next assertion work with scipy's csr_matrix assert np.allclose(data, np.matmul(result1.matrixQ, result1.matrixR)) @@ -61,4 +63,4 @@ def main(readcsv=read_csv, method='svdDense'): if __name__ == "__main__": (_, result) = main() print(result) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/qr_spmd.py b/examples/daal4py/qr_spmd.py index 4c1564ad7e..ee59cd8356 100644 --- a/examples/daal4py/qr_spmd.py +++ b/examples/daal4py/qr_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,14 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py QR example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./qr_spmd.py +from numpy import allclose, loadtxt + import daal4py as d4p -from numpy import loadtxt, allclose def main(): @@ -33,7 +34,7 @@ def main(): result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array - data = loadtxt(infile, delimiter=',') + data = loadtxt(infile, delimiter=",") result2 = algo.compute(data) # QR result provide matrixQ and matrixR @@ -54,5 +55,5 @@ def main(): if d4p.my_procid() == 0: print("\nEach process has matrixR but only his part of matrixQ:\n") print(result) - print('All looks good!') + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/qr_streaming.py b/examples/daal4py/qr_streaming.py index 8b82c8cc5d..86d82249e9 100644 --- a/examples/daal4py/qr_streaming.py +++ b/examples/daal4py/qr_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,17 +12,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py QR example for shared memory systems -import daal4py as d4p - # let's use a generator for getting stream from file (defined in stream.py) from stream import read_next +import daal4py as d4p + -def main(readcsv=None, method='svdDense'): +def main(readcsv=None, method="svdDense"): infile = "./data/batch/qr.csv" # configure a QR object @@ -45,4 +45,4 @@ def main(readcsv=None, method='svdDense'): result = main() print("Orthogonal matrix Q:\n", result.matrixQ[:10]) print("Triangular matrix R:\n", result.matrixR) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/quantiles_batch.py b/examples/daal4py/quantiles_batch.py index eb362deadc..814a78dfb6 100644 --- a/examples/daal4py/quantiles_batch.py +++ b/examples/daal4py/quantiles_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py quantiles example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/quantiles.csv" # configure a quantiles object @@ -53,4 +55,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result = main() print("Quantiles:\n", result.quantiles) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/ridge_regression_batch.py b/examples/daal4py/ridge_regression_batch.py index c430af5772..84268683b3 100644 --- a/examples/daal4py/ridge_regression_batch.py +++ b/examples/daal4py/ridge_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Ridge Regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -63,7 +65,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nRidge Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/ridge_regression_spmd.py b/examples/daal4py/ridge_regression_spmd.py index 26922d3e72..daf73dba79 100644 --- a/examples/daal4py/ridge_regression_spmd.py +++ b/examples/daal4py/ridge_regression_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,30 +12,32 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Ridge Regression example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./ridge_regression_spmd.py -import daal4py as d4p from numpy import loadtxt +import daal4py as d4p + if __name__ == "__main__": # Initialize SPMD mode d4p.daalinit() # Each process gets its own data - infile = "./data/distributed/linear_regression_train_" + \ - str(d4p.my_procid() + 1) + ".csv" + infile = ( + "./data/distributed/linear_regression_train_" + str(d4p.my_procid() + 1) + ".csv" + ) # Configure a Ridge regression training object train_algo = d4p.ridge_regression_training(distributed=True) # Read data. Let's have 10 independent, # and 2 dependent variables (for each observation) - indep_data = loadtxt(infile, delimiter=',', usecols=range(10)) - dep_data = loadtxt(infile, delimiter=',', usecols=range(10, 12)) + indep_data = loadtxt(infile, delimiter=",", usecols=range(10)) + dep_data = loadtxt(infile, delimiter=",", usecols=range(10, 12)) # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(indep_data, dep_data) @@ -44,14 +46,18 @@ if d4p.my_procid() == 0: predict_algo = d4p.ridge_regression_prediction() # read test data (with same #features) - pdata = loadtxt("./data/distributed/linear_regression_test.csv", - delimiter=',', usecols=range(10)) + pdata = loadtxt( + "./data/distributed/linear_regression_test.csv", + delimiter=",", + usecols=range(10), + ) # now predict using the model from the training above - predict_result = d4p.ridge_regression_prediction().compute(pdata, - train_result.model) + predict_result = d4p.ridge_regression_prediction().compute( + pdata, train_result.model + ) # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - print('All looks good!') + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/ridge_regression_streaming.py b/examples/daal4py/ridge_regression_streaming.py index b0248f9d14..e1c5d4d55b 100644 --- a/examples/daal4py/ridge_regression_streaming.py +++ b/examples/daal4py/ridge_regression_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,24 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Ridge Regression example for streaming on shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, s=0, n=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, - skiprows=s, nrows=n, dtype=t) + return pandas.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) + except: # fall back to numpy genfromtxt def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: @@ -37,7 +40,7 @@ def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): return a -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/linear_regression_train.csv" testfile = "./data/batch/linear_regression_test.csv" @@ -80,7 +83,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, ptdata) = main() print( "\nRidge Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", ptdata[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/saga_batch.py b/examples/daal4py/saga_batch.py index 384a5cb6d7..cc7a0dc4bd 100644 --- a/examples/daal4py/saga_batch.py +++ b/examples/daal4py/saga_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Saga example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/XM.csv" # Read the data, let's have 3 independent variables data = readcsv(infile, range(1)) @@ -39,22 +41,26 @@ def main(readcsv=read_csv, method='defaultDense'): nVectors = data.shape[0] # configure a Logistic Loss object - logloss_algo = d4p.optimization_solver_logistic_loss(numberOfTerms=nVectors, - penaltyL1=0.3, - penaltyL2=0, - interceptFlag=True, - resultsToCompute='gradient') + logloss_algo = d4p.optimization_solver_logistic_loss( + numberOfTerms=nVectors, + penaltyL1=0.3, + penaltyL2=0, + interceptFlag=True, + resultsToCompute="gradient", + ) logloss_algo.setup(data, dep_data) # configure an Saga object lr = np.array([[0.01]], dtype=np.double) niters = 100000 - saga_algo = d4p.optimization_solver_saga(nIterations=niters, - accuracyThreshold=1e-5, - batchSize=1, - function=logloss_algo, - learningRateSequence=lr, - optionalResultRequired=True) + saga_algo = d4p.optimization_solver_saga( + nIterations=niters, + accuracyThreshold=1e-5, + batchSize=1, + function=logloss_algo, + learningRateSequence=lr, + optionalResultRequired=True, + ) # finally do the computation inp = np.zeros((2, 1), dtype=np.double) @@ -71,4 +77,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sgd_logistic_loss_batch.py b/examples/daal4py/sgd_logistic_loss_batch.py index e28af65f47..bd98ab895f 100644 --- a/examples/daal4py/sgd_logistic_loss_batch.py +++ b/examples/daal4py/sgd_logistic_loss_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,27 +12,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SGD (Stochastic Gradient Descent) example for shared memory systems # using Logisitc Loss objective function -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/custom.csv" # Read the data, let's have 4 independent variables data = readcsv(infile, range(4)) @@ -46,10 +48,9 @@ def main(readcsv=read_csv, method='defaultDense'): # configure a SGD object lrs = np.array([[0.01]], dtype=np.double) niters = 1000 - sgd_algo = d4p.optimization_solver_sgd(ll_algo, - learningRateSequence=lrs, - accuracyThreshold=0.02, - nIterations=niters) + sgd_algo = d4p.optimization_solver_sgd( + ll_algo, learningRateSequence=lrs, accuracyThreshold=0.02, nIterations=niters + ) # finally do the computation inp = np.array([[1], [1], [1], [1], [1]], dtype=np.double) @@ -65,4 +66,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sgd_mse_batch.py b/examples/daal4py/sgd_mse_batch.py index f91578fa38..c0e578abe3 100644 --- a/examples/daal4py/sgd_mse_batch.py +++ b/examples/daal4py/sgd_mse_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,27 +12,29 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SGD (Stochastic Gradient Descent) example for shared memory systems # using Mean Squared Error objective function -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/mse.csv" # Read the data, let's have 3 independent variables data = readcsv(infile, range(3)) @@ -46,10 +48,12 @@ def main(readcsv=read_csv, method='defaultDense'): # configure a SGD object lrs = np.array([[1.0]], dtype=np.double) niters = 1000 - sgd_algo = d4p.optimization_solver_sgd(mse_algo, - learningRateSequence=lrs, - accuracyThreshold=0.0000001, - nIterations=niters) + sgd_algo = d4p.optimization_solver_sgd( + mse_algo, + learningRateSequence=lrs, + accuracyThreshold=0.0000001, + nIterations=niters, + ) # finally do the computation inp = np.array([[8], [2], [1], [4]], dtype=np.double) @@ -65,4 +69,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("\nMinimum:\n", res.minimum) print("\nNumber of iterations performed:\n", res.nIterations[0][0]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sorting_batch.py b/examples/daal4py/sorting_batch.py index af91bb9501..12160cae1d 100644 --- a/examples/daal4py/sorting_batch.py +++ b/examples/daal4py/sorting_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py sorting example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/sorting.csv" # configure a sorting object @@ -48,7 +50,7 @@ def main(readcsv=read_csv, method='defaultDense'): assert np.allclose(result1.sortedData, result2.sortedData) assert np.allclose( result1.sortedData, - np.sort(data.toarray() if hasattr(data, 'toarray') else data, axis=0) + np.sort(data.toarray() if hasattr(data, "toarray") else data, axis=0), ) return result1 @@ -57,4 +59,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": result = main() print("Sorted matrix of observations:\n", result.sortedData) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/spmd_utils.py b/examples/daal4py/spmd_utils.py index 26c7de0dff..a5bb96e889 100644 --- a/examples/daal4py/spmd_utils.py +++ b/examples/daal4py/spmd_utils.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # Example showing reading of file in few chunks, this reader is used in SPMD examples @@ -23,20 +23,23 @@ import pandas def read_csv(f, c=None, sr=0, nr=None, t=np.float64): - return pandas.read_csv(f, usecols=c, skiprows=sr, nrows=nr, - delimiter=',', header=None, dtype=t) + return pandas.read_csv( + f, usecols=c, skiprows=sr, nrows=nr, delimiter=",", header=None, dtype=t + ) + except: # fall back to numpy loadtxt def read_csv(f, c=None, sr=0, nr=np.iinfo(np.int64).max, t=np.float64): - res = np.genfromtxt(f, usecols=c, delimiter=',', - skip_header=sr, max_rows=nr, dtype=t) + res = np.genfromtxt( + f, usecols=c, delimiter=",", skip_header=sr, max_rows=nr, dtype=t + ) if res.ndim == 1: return res[:, np.newaxis] return res def get_chunk_params(lines_count, chunks_count, chunk_number): - 'returns count of rows to skip from beginning of file and count of rows to read' + "returns count of rows to skip from beginning of file and count of rows to read" min_nrows = (int)(lines_count / chunks_count) rest_rows = lines_count - min_nrows * chunks_count is_tail = rest_rows > chunk_number diff --git a/examples/daal4py/stream.py b/examples/daal4py/stream.py index 4fec64d3a7..d8cf8da821 100644 --- a/examples/daal4py/stream.py +++ b/examples/daal4py/stream.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,24 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # Example showing daal4py's operation on streams using a generator -import daal4py as d4p -import numpy as np import os +import numpy as np + +import daal4py as d4p + try: import pandas def read_csv(f, c=None, s=0, n=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, - skiprows=s, nrows=n, dtype=t) + return pandas.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) + except: # fall back to numpy genfromtxt def read_csv(f, c=None, s=0, n=np.iinfo(np.int64).max): - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: diff --git a/examples/daal4py/stump_classification_batch.py b/examples/daal4py/stump_classification_batch.py index f82467c524..eaca5a514b 100644 --- a/examples/daal4py/stump_classification_batch.py +++ b/examples/daal4py/stump_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,31 +12,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Stump classification example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/stump_train.csv" testfile = "./data/batch/stump_test.csv" # Configure a stump classification training object - train_algo = d4p.stump_classification_training(splitCriterion='gini') + train_algo = d4p.stump_classification_training(splitCriterion="gini") # Read data. Let's have 20 independent, # and 1 dependent variable (for each observation) @@ -54,7 +56,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -64,6 +66,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Stump classification results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/stump_regression_batch.py b/examples/daal4py/stump_regression_batch.py index 9b8a31ba54..ab8ff1088a 100644 --- a/examples/daal4py/stump_regression_batch.py +++ b/examples/daal4py/stump_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Stump regression example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/stump_train.csv" testfile = "./data/batch/stump_test.csv" @@ -54,7 +56,7 @@ def main(readcsv=read_csv, method='defaultDense'): # The prediction result provides prediction assert predict_result.prediction.shape == (pdata.shape[0], dep_data.shape[1]) - ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=',', ndmin=2) + ptdata = np.loadtxt(testfile, usecols=range(20, 21), delimiter=",", ndmin=2) assert np.allclose(predict_result.prediction, ptdata) return (train_result, predict_result, ptdata) @@ -65,6 +67,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nGround truth (first 20 observations):\n", ptdata[:20]) print( "Stump regression results: (first 20 observations):\n", - predict_result.prediction[:20] + predict_result.prediction[:20], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svd_batch.py b/examples/daal4py/svd_batch.py index be2d33dafa..002d895787 100644 --- a/examples/daal4py/svd_batch.py +++ b/examples/daal4py/svd_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVD example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infile = "./data/batch/svd.csv" # configure a SVD object @@ -47,23 +49,23 @@ def main(readcsv=read_csv, method='defaultDense'): # SVD result objects provide leftSingularMatrix, # rightSingularMatrix and singularValues - assert np.allclose(result1.leftSingularMatrix, - result2.leftSingularMatrix, atol=1e-07) - assert np.allclose(result1.rightSingularMatrix, - result2.rightSingularMatrix, atol=1e-07) + assert np.allclose(result1.leftSingularMatrix, result2.leftSingularMatrix, atol=1e-07) + assert np.allclose( + result1.rightSingularMatrix, result2.rightSingularMatrix, atol=1e-07 + ) assert np.allclose(result1.singularValues, result2.singularValues, atol=1e-07) assert result1.singularValues.shape == (1, data.shape[1]) assert result1.rightSingularMatrix.shape == (data.shape[1], data.shape[1]) assert result1.leftSingularMatrix.shape == data.shape - if hasattr(data, 'toarray'): + if hasattr(data, "toarray"): data = data.toarray() # to make the next assertion work with scipy's csr_matrix assert np.allclose( data, np.matmul( np.matmul(result1.leftSingularMatrix, np.diag(result1.singularValues[0])), - result1.rightSingularMatrix - ) + result1.rightSingularMatrix, + ), ) return (data, result1) @@ -72,4 +74,4 @@ def main(readcsv=read_csv, method='defaultDense'): if __name__ == "__main__": (_, result) = main() print(result) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svd_spmd.py b/examples/daal4py/svd_spmd.py index ecbca37d36..607bb27c64 100644 --- a/examples/daal4py/svd_spmd.py +++ b/examples/daal4py/svd_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,14 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVD example for distributed memory systems; SPMD mode # run like this: # mpirun -n 4 python ./svd_spmd.py +from numpy import allclose, loadtxt + import daal4py as d4p -from numpy import loadtxt, allclose def main(): @@ -33,7 +34,7 @@ def main(): result1 = algo.compute(infile) # We can also load the data ourselfs and provide the numpy array - data = loadtxt(infile, delimiter=',') + data = loadtxt(infile, delimiter=",") result2 = algo.compute(data) # SVD result objects provide leftSingularMatrix, @@ -60,5 +61,5 @@ def main(): "but only his part of leftSingularMatrix:\n" ) print(result) - print('All looks good!') + print("All looks good!") d4p.daalfini() diff --git a/examples/daal4py/svd_streaming.py b/examples/daal4py/svd_streaming.py index cf40681871..f5760f7dd3 100644 --- a/examples/daal4py/svd_streaming.py +++ b/examples/daal4py/svd_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,24 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVD example for streaming on shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, s=0, n=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', - header=None, skiprows=s, nrows=n, dtype=t) + return pandas.read_csv( + f, usecols=c, delimiter=",", header=None, skiprows=s, nrows=n, dtype=t + ) + except: # fall back to numpy genfromtxt def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): - a = np.genfromtxt(f, usecols=c, delimiter=',', skip_header=s, max_rows=n) + a = np.genfromtxt(f, usecols=c, delimiter=",", skip_header=s, max_rows=n) if a.shape[0] == 0: raise Exception("done") if a.ndim == 1: @@ -37,7 +40,7 @@ def read_csv(f, c, s=0, n=np.iinfo(np.int64).max): return a -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): infiles = ["./data/distributed/svd_{}.csv".format(i) for i in range(1, 5)] # configure a SVD object @@ -61,7 +64,6 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nSingular values:\n", result.singularValues) print("\nRight orthogonal matrix V:\n", result.rightSingularMatrix) print( - "\nLeft orthogonal matrix U (first 10 rows):\n", - result.leftSingularMatrix[0:10] + "\nLeft orthogonal matrix U (first 10 rows):\n", result.leftSingularMatrix[0:10] ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svm_batch.py b/examples/daal4py/svm_batch.py index f282a23947..38e35e8677 100644 --- a/examples/daal4py/svm_batch.py +++ b/examples/daal4py/svm_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVM example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # input data file infile = "./data/batch/svm_two_class_train_dense.csv" testfile = "./data/batch/svm_two_class_test_dense.csv" @@ -39,7 +41,7 @@ def main(readcsv=read_csv, method='defaultDense'): # Configure a SVM object to use rbf kernel (and adjusting cachesize) kern = d4p.kernel_function_linear() # need an object that lives when creating train_algo - train_algo = d4p.svm_training(method='thunder', kernel=kern, cacheSize=600000000) + train_algo = d4p.svm_training(method="thunder", kernel=kern, cacheSize=600000000) # Read data. Let's use features per observation data = readcsv(infile, range(20)) @@ -69,8 +71,8 @@ def main(readcsv=read_csv, method='defaultDense'): print( "\nSVM classification decision function (first 20 observations):\n", - decision_function[0:20] + decision_function[0:20], ) print("\nSVM classification results (first 20 observations):\n", predict_labels[0:20]) print("\nGround truth (first 20 observations):\n", plabels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/svm_multiclass_batch.py b/examples/daal4py/svm_multiclass_batch.py index 324ccc2a91..f56c814f9d 100644 --- a/examples/daal4py/svm_multiclass_batch.py +++ b/examples/daal4py/svm_multiclass_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,40 +12,42 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py multi-class SVM example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 20 nClasses = 5 # read training data from file # with nFeatures features per observation and 1 class label - train_file = 'data/batch/svm_multi_class_train_dense.csv' + train_file = "data/batch/svm_multi_class_train_dense.csv" train_data = readcsv(train_file, range(nFeatures)) train_labels = readcsv(train_file, range(nFeatures, nFeatures + 1)) # Create and configure algorithm object algorithm = d4p.multi_class_classifier_training( nClasses=nClasses, - training=d4p.svm_training(method='thunder'), - prediction=d4p.svm_prediction() + training=d4p.svm_training(method="thunder"), + prediction=d4p.svm_prediction(), ) # Pass data to training. Training result provides model @@ -55,15 +57,15 @@ def main(readcsv=read_csv, method='defaultDense'): # Now the prediction stage # Read data - pred_file = 'data/batch/svm_multi_class_test_dense.csv' + pred_file = "data/batch/svm_multi_class_test_dense.csv" pred_data = readcsv(pred_file, range(nFeatures)) pred_labels = readcsv(pred_file, range(nFeatures, nFeatures + 1)) # Create an algorithm object to predict multi-class SVM values algorithm = d4p.multi_class_classifier_prediction( nClasses, - training=d4p.svm_training(method='thunder'), - prediction=d4p.svm_prediction() + training=d4p.svm_training(method="thunder"), + prediction=d4p.svm_prediction(), ) # Pass data to prediction. Prediction result provides prediction pred_result = algorithm.compute(pred_data, train_result.model) @@ -76,7 +78,7 @@ def main(readcsv=read_csv, method='defaultDense'): (pred_res, pred_labels) = main() print( "\nSVM classification results (first 20 observations):\n", - pred_res.prediction[0:20] + pred_res.prediction[0:20], ) print("\nGround truth (first 20 observations):\n", pred_labels[0:20]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/bf_knn_classification_batch.py b/examples/daal4py/sycl/bf_knn_classification_batch.py index ff05908749..9c9d8770f8 100644 --- a/examples/daal4py/sycl/bf_knn_classification_batch.py +++ b/examples/daal4py/sycl/bf_knn_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py BF KNN example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -44,12 +49,14 @@ def read_csv(f, c, t=np.float64): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -60,19 +67,19 @@ def to_numpy(data): # Common code for both CPU and GPU computations def compute(train_data, train_labels, predict_data, nClasses): # Create an algorithm object and call compute - train_algo = d4p.bf_knn_classification_training(nClasses=nClasses, fptype='float') + train_algo = d4p.bf_knn_classification_training(nClasses=nClasses, fptype="float") train_result = train_algo.compute(train_data, train_labels) # Create an algorithm object and call compute - predict_algo = d4p.bf_knn_classification_prediction(nClasses=nClasses, fptype='float') + predict_algo = d4p.bf_knn_classification_prediction(nClasses=nClasses, fptype="float") predict_result = predict_algo.compute(predict_data, train_result.model) return predict_result -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input data set parameters - train_file = os.path.join('..', 'data', 'batch', 'k_nearest_neighbors_train.csv') - predict_file = os.path.join('..', 'data', 'batch', 'k_nearest_neighbors_test.csv') + train_file = os.path.join("..", "data", "batch", "k_nearest_neighbors_train.csv") + predict_file = os.path.join("..", "data", "batch", "k_nearest_neighbors_test.csv") # Read data. Let's use 5 features per observation nFeatures = 5 @@ -92,25 +99,29 @@ def main(readcsv=read_csv, method='defaultDense'): predict_data = to_numpy(predict_data) if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - predict_result_gpu = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, nClasses) - assert np.allclose(predict_result_gpu.prediction, - predict_result_classic.prediction) + predict_result_gpu = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses + ) + assert np.allclose( + predict_result_gpu.prediction, predict_result_classic.prediction + ) - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - predict_result_cpu = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, nClasses) - assert np.allclose(predict_result_cpu.prediction, - predict_result_classic.prediction) + predict_result_cpu = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses + ) + assert np.allclose( + predict_result_cpu.prediction, predict_result_classic.prediction + ) return (predict_result_classic, predict_labels) @@ -120,6 +131,5 @@ def main(readcsv=read_csv, method='defaultDense'): print("BF based KNN classification results:") print("Ground truth(observations #30-34):\n", predict_labels[30:35]) print( - "Classification results(observations #30-34):\n", - predict_result.prediction[30:35] + "Classification results(observations #30-34):\n", predict_result.prediction[30:35] ) diff --git a/examples/daal4py/sycl/covariance_batch.py b/examples/daal4py/sycl/covariance_batch.py index cd28254e70..40a561a326 100644 --- a/examples/daal4py/sycl/covariance_batch.py +++ b/examples/daal4py/sycl/covariance_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,16 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -44,7 +48,7 @@ def read_csv(f, c, t=np.float64): # Common code for both CPU and GPU computations def compute(data, method): # configure a covariance object - algo = d4p.covariance(method=method, fptype='float') + algo = d4p.covariance(method=method, fptype="float") return algo.compute(data) @@ -52,12 +56,14 @@ def compute(data, method): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -65,8 +71,8 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): - infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') +def main(readcsv=read_csv, method="defaultDense"): + infile = os.path.join("..", "data", "batch", "covcormoments_dense.csv") # Load the data data = readcsv(infile, range(10), t=np.float32) @@ -78,18 +84,18 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_data = sycl_buffer(data) - result_gpu = compute(sycl_data, 'defaultDense') + result_gpu = compute(sycl_data, "defaultDense") assert np.allclose(result_classic.covariance, result_gpu.covariance) assert np.allclose(result_classic.mean, result_gpu.mean) assert np.allclose(result_classic.correlation, result_gpu.correlation) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_data = sycl_buffer(data) - result_cpu = compute(sycl_data, 'defaultDense') + result_cpu = compute(sycl_data, "defaultDense") # covariance result objects provide correlation, covariance and mean assert np.allclose(result_classic.covariance, result_cpu.covariance) @@ -103,4 +109,4 @@ def main(readcsv=read_csv, method='defaultDense'): res = main() print("Covariance matrix:\n", res.covariance) print("Mean vector:\n", res.mean) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/covariance_streaming.py b/examples/daal4py/sycl/covariance_streaming.py index 4b71d3dbd6..1d7364d2e8 100644 --- a/examples/daal4py/sycl/covariance_streaming.py +++ b/examples/daal4py/sycl/covariance_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,23 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py covariance example for streaming on shared memory systems -import daal4py as d4p -import numpy as np import os -from daal4py.oneapi import sycl_buffer # let's use a generator for getting stream from file (defined in stream.py) import sys -sys.path.insert(0, '..') + +import numpy as np + +import daal4py as d4p +from daal4py.oneapi import sycl_buffer + +sys.path.insert(0, "..") from stream import read_next try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -38,12 +42,14 @@ def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -51,12 +57,12 @@ def to_numpy(data): return data -def main(readcsv=None, method='defaultDense'): - infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') +def main(readcsv=None, method="defaultDense"): + infile = os.path.join("..", "data", "batch", "covcormoments_dense.csv") # Using of the classic way (computations on CPU) # configure a covariance object - algo = d4p.covariance(streaming=True, fptype='float') + algo = d4p.covariance(streaming=True, fptype="float") # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream @@ -67,9 +73,9 @@ def main(readcsv=None, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): # configure a covariance object - algo = d4p.covariance(streaming=True, fptype='float') + algo = d4p.covariance(streaming=True, fptype="float") # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream @@ -83,9 +89,9 @@ def main(readcsv=None, method='defaultDense'): assert np.allclose(result_classic.correlation, result_gpu.correlation) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): # configure a covariance object - algo = d4p.covariance(streaming=True, fptype='float') + algo = d4p.covariance(streaming=True, fptype="float") # get the generator (defined in stream.py)... rn = read_next(infile, 112, readcsv) # ... and iterate through chunks/stream @@ -108,4 +114,4 @@ def main(readcsv=None, method='defaultDense'): res = main() print("Covariance matrix:\n", res.covariance) print("Mean vector:\n", res.mean) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/dbscan_batch.py b/examples/daal4py/sycl/dbscan_batch.py index 99c40fcbb0..af6327ac4f 100644 --- a/examples/daal4py/sycl/dbscan_batch.py +++ b/examples/daal4py/sycl/dbscan_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py DBSCAN example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -44,12 +49,14 @@ def read_csv(f, c, t=np.float64): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -63,17 +70,17 @@ def compute(data, minObservations, epsilon): # we also request the indices and observations of cluster cores algo = d4p.dbscan( minObservations=minObservations, - fptype='float', + fptype="float", epsilon=epsilon, - resultsToCompute='computeCoreIndices|computeCoreObservations', - memorySavingMode=True + resultsToCompute="computeCoreIndices|computeCoreObservations", + memorySavingMode=True, ) # and compute return algo.compute(data) -def main(readcsv=read_csv, method='defaultDense'): - infile = os.path.join('..', 'data', 'batch', 'dbscan_dense.csv') +def main(readcsv=read_csv, method="defaultDense"): + infile = os.path.join("..", "data", "batch", "dbscan_dense.csv") epsilon = 0.04 minObservations = 45 @@ -86,16 +93,17 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, minObservations, epsilon) assert np.allclose(result_classic.nClusters, result_gpu.nClusters) assert np.allclose(result_classic.assignments, result_gpu.assignments) assert np.allclose(result_classic.coreIndices, result_gpu.coreIndices) - assert np.allclose(result_classic.coreObservations, - result_gpu.coreObservations) + assert np.allclose( + result_classic.coreObservations, result_gpu.coreObservations + ) - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, minObservations, epsilon) assert np.allclose(result_classic.nClusters, result_cpu.nClusters) @@ -112,4 +120,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nFirst 10 cluster core indices:\n", result.coreIndices[0:10]) print("\nFirst 10 cluster core observations:\n", result.coreObservations[0:10]) print("\nNumber of clusters:\n", result.nClusters) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/decision_forest_classification_batch.py b/examples/daal4py/sycl/decision_forest_classification_batch.py index 0fb6b328f4..647759ea74 100644 --- a/examples/daal4py/sycl/decision_forest_classification_batch.py +++ b/examples/daal4py/sycl/decision_forest_classification_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,34 +28,37 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except Exception: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except Exception: gpu_available = False # Commone code for both CPU and GPU computations -def compute(train_data, train_labels, predict_data, method='defaultDense'): +def compute(train_data, train_labels, predict_data, method="defaultDense"): # Configure a training object (5 classes) train_algo = d4p.decision_forest_classification_training( 5, - fptype='float', + fptype="float", nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), - varImportance='MDI', + varImportance="MDI", bootstrap=True, - resultsToCompute='computeOutOfBagError', - method=method + resultsToCompute="computeOutOfBagError", + method=method, ) # Training result provides (depending on parameters) model, # outOfBagError, outOfBagErrorPerObservation and/or variableImportance @@ -62,9 +67,9 @@ def compute(train_data, train_labels, predict_data, method='defaultDense'): # now predict using the model from the training above predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, - fptype='float', + fptype="float", resultsToEvaluate="computeClassLabels|computeClassProbabilities", - votingMethod="unweighted" + votingMethod="unweighted", ) predict_result = predict_algo.compute(predict_data, train_result.model) @@ -76,11 +81,13 @@ def compute(train_data, train_labels, predict_data, method='defaultDense'): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except Exception: try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except Exception: @@ -89,11 +96,11 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 3 # input data file - train_file = os.path.join('..', 'data', 'batch', 'df_classification_train.csv') - predict_file = os.path.join('..', 'data', 'batch', 'df_classification_test.csv') + train_file = os.path.join("..", "data", "batch", "df_classification_train.csv") + predict_file = os.path.join("..", "data", "batch", "df_classification_test.csv") # Read train data. Let's use 3 features per observation train_data = readcsv(train_file, range(nFeatures), t=np.float32) @@ -103,8 +110,9 @@ def main(readcsv=read_csv, method='defaultDense'): predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1), t=np.float32) # Using of the classic way (computations on CPU) - train_result, predict_result = compute(train_data, train_labels, - predict_data, "defaultDense") + train_result, predict_result = compute( + train_data, train_labels, predict_data, "defaultDense" + ) assert predict_result.prediction.shape == (predict_labels.shape[0], 1) assert (np.mean(predict_result.prediction != predict_labels) < 0.03).any() @@ -114,12 +122,13 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - train_result, predict_result = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, 'hist') + train_result, predict_result = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, "hist" + ) assert predict_result.prediction.shape == (predict_labels.shape[0], 1) assert (np.mean(predict_result.prediction != predict_labels) < 0.03).any() @@ -132,11 +141,11 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print( "\nDecision forest probabilities results (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/decision_forest_classification_hist_batch.py b/examples/daal4py/sycl/decision_forest_classification_hist_batch.py index b1f6555c70..6b4d4affd8 100755 --- a/examples/daal4py/sycl/decision_forest_classification_hist_batch.py +++ b/examples/daal4py/sycl/decision_forest_classification_hist_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Classification example of Hist method for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except Exception: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except Exception: gpu_available = False @@ -45,17 +50,17 @@ def compute(train_data, train_labels, predict_data): # Configure a training object (5 classes) train_algo = d4p.decision_forest_classification_training( 5, - fptype='float', - method='hist', + fptype="float", + method="hist", maxBins=256, minBinSize=1, nTrees=10, minObservationsInLeafNode=8, featuresPerNode=3, engine=d4p.engines_mt19937(seed=777), - varImportance='MDI', + varImportance="MDI", bootstrap=True, - resultsToCompute='computeOutOfBagError' + resultsToCompute="computeOutOfBagError", ) # Training result provides (depending on parameters) model, @@ -65,9 +70,9 @@ def compute(train_data, train_labels, predict_data): # now predict using the model from the training above predict_algo = d4p.decision_forest_classification_prediction( nClasses=5, - fptype='float', + fptype="float", resultsToEvaluate="computeClassLabels|computeClassProbabilities", - votingMethod="unweighted" + votingMethod="unweighted", ) predict_result = predict_algo.compute(predict_data, train_result.model) @@ -79,11 +84,13 @@ def compute(train_data, train_labels, predict_data): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except Exception: try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except Exception: @@ -95,8 +102,8 @@ def to_numpy(data): def main(readcsv=read_csv): nFeatures = 3 # input data file - train_file = os.path.join('..', 'data', 'batch', 'df_classification_train.csv') - predict_file = os.path.join('..', 'data', 'batch', 'df_classification_test.csv') + train_file = os.path.join("..", "data", "batch", "df_classification_train.csv") + predict_file = os.path.join("..", "data", "batch", "df_classification_test.csv") # Read train data. Let's use 3 features per observation train_data = readcsv(train_file, range(nFeatures), t=np.float32) @@ -116,12 +123,13 @@ def main(readcsv=read_csv): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - train_result, predict_result = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data) + train_result, predict_result = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data + ) assert predict_result.prediction.shape == (predict_labels.shape[0], 1) assert (np.mean(predict_result.prediction != predict_labels) < 0.03).any() @@ -134,11 +142,11 @@ def main(readcsv=read_csv): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print( "\nDecision forest probabilities results (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/decision_forest_regression_batch.py b/examples/daal4py/sycl/decision_forest_regression_batch.py index 05be0cbcfa..7a68d90506 100644 --- a/examples/daal4py/sycl/decision_forest_regression_batch.py +++ b/examples/daal4py/sycl/decision_forest_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,38 +28,41 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except Exception: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except Exception: gpu_available = False # Commone code for both CPU and GPU computations -def compute(train_data, train_labels, predict_data, method='defaultDense'): +def compute(train_data, train_labels, predict_data, method="defaultDense"): # Configure a training object train_algo = d4p.decision_forest_regression_training( nTrees=100, - fptype='float', + fptype="float", engine=d4p.engines_mt2203(seed=777), - varImportance='MDA_Raw', + varImportance="MDA_Raw", bootstrap=True, - resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation', - method=method + resultsToCompute="computeOutOfBagError|computeOutOfBagErrorPerObservation", + method=method, ) # Training result provides (depending on parameters) model, # outOfBagError, outOfBagErrorPerObservation and/or variableImportance train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above - predict_algo = d4p.decision_forest_regression_prediction(fptype='float') + predict_algo = d4p.decision_forest_regression_prediction(fptype="float") predict_result = predict_algo.compute(predict_data, train_result.model) @@ -68,11 +73,13 @@ def compute(train_data, train_labels, predict_data, method='defaultDense'): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except Exception: try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except Exception: @@ -81,11 +88,11 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nFeatures = 13 # input data file - train_file = os.path.join('..', 'data', 'batch', 'df_regression_train.csv') - predict_file = os.path.join('..', 'data', 'batch', 'df_regression_test.csv') + train_file = os.path.join("..", "data", "batch", "df_regression_train.csv") + predict_file = os.path.join("..", "data", "batch", "df_regression_test.csv") # Read train data. Let's use 3 features per observation train_data = readcsv(train_file, range(nFeatures), t=np.float32) @@ -95,8 +102,9 @@ def main(readcsv=read_csv, method='defaultDense'): predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1), t=np.float32) # Using of the classic way (computations on CPU) - train_result, predict_result = compute(train_data, train_labels, - predict_data, "defaultDense") + train_result, predict_result = compute( + train_data, train_labels, predict_data, "defaultDense" + ) assert predict_result.prediction.shape == (predict_labels.shape[0], 1) assert (np.square(predict_result.prediction - predict_labels).mean() < 18).any() @@ -106,12 +114,13 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - train_result, predict_result = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, 'hist') + train_result, predict_result = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, "hist" + ) assert predict_result.prediction.shape == (predict_labels.shape[0], 1) assert ( np.square(predict_result.prediction - predict_labels).mean() < 18 @@ -126,7 +135,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/decision_forest_regression_hist_batch.py b/examples/daal4py/sycl/decision_forest_regression_hist_batch.py index 5b4e44e328..3d06c35999 100755 --- a/examples/daal4py/sycl/decision_forest_regression_hist_batch.py +++ b/examples/daal4py/sycl/decision_forest_regression_hist_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2021 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Decision Forest Regression example of Hist method for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except Exception: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=t) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=t) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except Exception: gpu_available = False @@ -44,15 +49,15 @@ def read_csv(f, c, t=np.float64): def compute(train_data, train_labels, predict_data): # Configure a training object train_algo = d4p.decision_forest_regression_training( - method='hist', + method="hist", maxBins=256, minBinSize=1, nTrees=100, - fptype='float', - varImportance='MDA_Raw', + fptype="float", + varImportance="MDA_Raw", bootstrap=True, engine=d4p.engines_mt2203(seed=777), - resultsToCompute='computeOutOfBagError|computeOutOfBagErrorPerObservation' + resultsToCompute="computeOutOfBagError|computeOutOfBagErrorPerObservation", ) # Training result provides (depending on parameters) model, @@ -60,7 +65,7 @@ def compute(train_data, train_labels, predict_data): train_result = train_algo.compute(train_data, train_labels) # now predict using the model from the training above - predict_algo = d4p.decision_forest_regression_prediction(fptype='float') + predict_algo = d4p.decision_forest_regression_prediction(fptype="float") predict_result = predict_algo.compute(predict_data, train_result.model) @@ -71,11 +76,13 @@ def compute(train_data, train_labels, predict_data): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except Exception: try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except Exception: @@ -87,8 +94,8 @@ def to_numpy(data): def main(readcsv=read_csv): nFeatures = 13 # input data file - train_file = os.path.join('..', 'data', 'batch', 'df_regression_train.csv') - predict_file = os.path.join('..', 'data', 'batch', 'df_regression_test.csv') + train_file = os.path.join("..", "data", "batch", "df_regression_train.csv") + predict_file = os.path.join("..", "data", "batch", "df_regression_test.csv") # Read train data. Let's use 3 features per observation train_data = readcsv(train_file, range(nFeatures), t=np.float32) @@ -108,12 +115,13 @@ def main(readcsv=read_csv): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - train_result, predict_result = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data) + train_result, predict_result = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data + ) assert predict_result.prediction.shape == (predict_labels.shape[0], 1) assert ( np.square(predict_result.prediction - predict_labels).mean() < 18 @@ -128,7 +136,7 @@ def main(readcsv=read_csv): print("\nOOB error:\n", train_result.outOfBagError) print( "\nDecision forest prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", plabels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/gradient_boosted_regression_batch.py b/examples/daal4py/sycl/gradient_boosted_regression_batch.py index 33fbc7bf9d..682a37cba5 100644 --- a/examples/daal4py/sycl/gradient_boosted_regression_batch.py +++ b/examples/daal4py/sycl/gradient_boosted_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Gradient Bossting Regression example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=np.float32) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=np.float32) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2, dtype=np.float32) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2, dtype=np.float32) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -43,10 +48,10 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(train_indep_data, train_dep_data, test_indep_data, maxIterations): # Configure a training object - train_algo = d4p.gbt_regression_training(maxIterations=maxIterations, fptype='float') + train_algo = d4p.gbt_regression_training(maxIterations=maxIterations, fptype="float") train_result = train_algo.compute(train_indep_data, train_dep_data) # Now let's do some prediction - predict_algo = d4p.gbt_regression_prediction(fptype='float') + predict_algo = d4p.gbt_regression_prediction(fptype="float") # now predict using the model from the training above return predict_algo.compute(test_indep_data, train_result.model) @@ -55,12 +60,14 @@ def compute(train_indep_data, train_dep_data, test_indep_data, maxIterations): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -68,12 +75,12 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): maxIterations = 200 # input data file - infile = os.path.join('..', 'data', 'batch', 'df_regression_train.csv') - testfile = os.path.join('..', 'data', 'batch', 'df_regression_test.csv') + infile = os.path.join("..", "data", "batch", "df_regression_train.csv") + testfile = os.path.join("..", "data", "batch", "df_regression_test.csv") # Read data. Let's use 13 features per observation train_indep_data = readcsv(infile, range(13), t=np.float32) @@ -82,8 +89,9 @@ def main(readcsv=read_csv, method='defaultDense'): test_indep_data = readcsv(testfile, range(13), t=np.float32) # Using of the classic way (computations on CPU) - result_classic = compute(train_indep_data, train_dep_data, - test_indep_data, maxIterations) + result_classic = compute( + train_indep_data, train_dep_data, test_indep_data, maxIterations + ) train_indep_data = to_numpy(train_indep_data) train_dep_data = to_numpy(train_dep_data) @@ -91,15 +99,20 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_indep_data = sycl_buffer(train_indep_data) sycl_train_dep_data = sycl_buffer(train_dep_data) sycl_test_indep_data = sycl_buffer(test_indep_data) - _ = compute(sycl_train_indep_data, sycl_train_dep_data, - sycl_test_indep_data, maxIterations) - - test_dep_data = np.loadtxt(testfile, usecols=range(13, 14), delimiter=',', - ndmin=2, dtype=np.float32) + _ = compute( + sycl_train_indep_data, + sycl_train_dep_data, + sycl_test_indep_data, + maxIterations, + ) + + test_dep_data = np.loadtxt( + testfile, usecols=range(13, 14), delimiter=",", ndmin=2, dtype=np.float32 + ) return (result_classic, test_dep_data) @@ -108,7 +121,7 @@ def main(readcsv=read_csv, method='defaultDense'): (predict_result, test_dep_data) = main() print( "\nGradient boosted trees prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", test_dep_data[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/kmeans_batch.py b/examples/daal4py/sycl/kmeans_batch.py index 37c2c24c8b..bc093e295e 100644 --- a/examples/daal4py/sycl/kmeans_batch.py +++ b/examples/daal4py/sycl/kmeans_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py K-Means example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -43,12 +48,12 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(data, nClusters, maxIter, method): # configure kmeans init object - initrain_algo = d4p.kmeans_init(nClusters, method=method, fptype='float') + initrain_algo = d4p.kmeans_init(nClusters, method=method, fptype="float") # compute initial centroids initrain_result = initrain_algo.compute(data) # configure kmeans main object: we also request the cluster assignments - algo = d4p.kmeans(nClusters, maxIter, assignFlag=True, fptype='float') + algo = d4p.kmeans(nClusters, maxIter, assignFlag=True, fptype="float") # compute the clusters/centroids return algo.compute(data, initrain_result.centroids) @@ -62,12 +67,14 @@ def compute(data, nClusters, maxIter, method): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -75,8 +82,8 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='randomDense'): - infile = os.path.join('..', 'data', 'batch', 'kmeans_dense.csv') +def main(readcsv=read_csv, method="randomDense"): + infile = os.path.join("..", "data", "batch", "kmeans_dense.csv") nClusters = 20 maxIter = 5 @@ -90,16 +97,15 @@ def main(readcsv=read_csv, method='randomDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, nClusters, maxIter, method) assert np.allclose(result_classic.centroids, result_gpu.centroids) assert np.allclose(result_classic.assignments, result_gpu.assignments) - assert np.isclose(result_classic.objectiveFunction, - result_gpu.objectiveFunction) + assert np.isclose(result_classic.objectiveFunction, result_gpu.objectiveFunction) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, nClusters, maxIter, method) @@ -122,4 +128,4 @@ def main(readcsv=read_csv, method='randomDense'): print("\nFirst 10 cluster assignments:\n", result.assignments[0:10]) print("\nFirst 10 dimensions of centroids:\n", result.centroids[:, 0:10]) print("\nObjective function value:\n", result.objectiveFunction) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/linear_regression_batch.py b/examples/daal4py/sycl/linear_regression_batch.py index b63c99fa70..7c7a09aa36 100644 --- a/examples/daal4py/sycl/linear_regression_batch.py +++ b/examples/daal4py/sycl/linear_regression_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Linear Regression example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -43,11 +48,11 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(train_indep_data, train_dep_data, test_indep_data): # Configure a Linear regression training object - train_algo = d4p.linear_regression_training(interceptFlag=True, fptype='float') + train_algo = d4p.linear_regression_training(interceptFlag=True, fptype="float") # Now train/compute, the result provides the model for prediction train_result = train_algo.compute(train_indep_data, train_dep_data) # Now let's do some prediction - predict_algo = d4p.linear_regression_prediction(fptype='float') + predict_algo = d4p.linear_regression_prediction(fptype="float") # now predict using the model from the training above return predict_algo.compute(test_indep_data, train_result.model), train_result @@ -56,12 +61,14 @@ def compute(train_indep_data, train_dep_data, test_indep_data): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -69,21 +76,22 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # read training data. Let's have 10 independent, # and 2 dependent variables (for each observation) - trainfile = os.path.join('..', 'data', 'batch', 'linear_regression_train.csv') + trainfile = os.path.join("..", "data", "batch", "linear_regression_train.csv") train_indep_data = readcsv(trainfile, range(10), t=np.float32) train_dep_data = readcsv(trainfile, range(10, 12), t=np.float32) # read testing data - testfile = os.path.join('..', 'data', 'batch', 'linear_regression_test.csv') + testfile = os.path.join("..", "data", "batch", "linear_regression_test.csv") test_indep_data = readcsv(testfile, range(10), t=np.float32) test_dep_data = readcsv(testfile, range(10, 12), t=np.float32) # Using of the classic way (computations on CPU) - result_classic, train_result = \ - compute(train_indep_data, train_dep_data, test_indep_data) + result_classic, train_result = compute( + train_indep_data, train_dep_data, test_indep_data + ) train_indep_data = to_numpy(train_indep_data) train_dep_data = to_numpy(train_dep_data) @@ -91,25 +99,29 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_indep_data = sycl_buffer(train_indep_data) sycl_train_dep_data = sycl_buffer(train_dep_data) sycl_test_indep_data = sycl_buffer(test_indep_data) - result_gpu, _ = compute(sycl_train_indep_data, sycl_train_dep_data, - sycl_test_indep_data) + result_gpu, _ = compute( + sycl_train_indep_data, sycl_train_dep_data, sycl_test_indep_data + ) assert np.allclose(result_classic.prediction, result_gpu.prediction, atol=1e-1) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_train_indep_data = sycl_buffer(train_indep_data) sycl_train_dep_data = sycl_buffer(train_dep_data) sycl_test_indep_data = sycl_buffer(test_indep_data) - result_cpu, _ = compute(sycl_train_indep_data, sycl_train_dep_data, - sycl_test_indep_data) + result_cpu, _ = compute( + sycl_train_indep_data, sycl_train_dep_data, sycl_test_indep_data + ) # The prediction result provides prediction - assert result_classic.prediction.shape == (test_dep_data.shape[0], - test_dep_data.shape[1]) + assert result_classic.prediction.shape == ( + test_dep_data.shape[0], + test_dep_data.shape[1], + ) assert np.allclose(result_classic.prediction, result_cpu.prediction) @@ -121,7 +133,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLinear Regression coefficients:\n", train_result.model.Beta) print( "\nLinear Regression prediction results: (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", test_dep_data[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/log_reg_binary_dense_batch.py b/examples/daal4py/sycl/log_reg_binary_dense_batch.py index eb1f8db592..703378683c 100644 --- a/examples/daal4py/sycl/log_reg_binary_dense_batch.py +++ b/examples/daal4py/sycl/log_reg_binary_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py logistic regression example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -43,12 +48,12 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(train_data, train_labels, predict_data, nClasses): # set parameters and train - train_alg = d4p.logistic_regression_training(nClasses=nClasses, - interceptFlag=True, - fptype='float') + train_alg = d4p.logistic_regression_training( + nClasses=nClasses, interceptFlag=True, fptype="float" + ) train_result = train_alg.compute(train_data, train_labels) # set parameters and compute predictions - predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, fptype='float') + predict_alg = d4p.logistic_regression_prediction(nClasses=nClasses, fptype="float") return predict_alg.compute(predict_data, train_result.model), train_result @@ -56,12 +61,14 @@ def compute(train_data, train_labels, predict_data, nClasses): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -69,23 +76,24 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nClasses = 2 nFeatures = 20 # read training data from file with 20 features per observation and 1 class label - trainfile = os.path.join('..', 'data', 'batch', 'binary_cls_train.csv') + trainfile = os.path.join("..", "data", "batch", "binary_cls_train.csv") train_data = readcsv(trainfile, range(nFeatures), t=np.float32) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1), t=np.float32) # read testing data from file with 20 features per observation - testfile = os.path.join('..', 'data', 'batch', 'binary_cls_test.csv') + testfile = os.path.join("..", "data", "batch", "binary_cls_test.csv") predict_data = readcsv(testfile, range(nFeatures), t=np.float32) predict_labels = readcsv(testfile, range(nFeatures, nFeatures + 1), t=np.float32) # Using of the classic way (computations on CPU) - result_classic, train_result = compute(train_data, train_labels, - predict_data, nClasses) + result_classic, train_result = compute( + train_data, train_labels, predict_data, nClasses + ) train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) @@ -93,27 +101,31 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - result_gpu, _ = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, nClasses) + result_gpu, _ = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses + ) # TODO: When LogisticRegression run2run instability will be replace on np.equal assert np.mean(result_classic.prediction != result_gpu.prediction) < 0.2 # It is possible to specify to make the computations on GPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - result_cpu, _ = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, nClasses) + result_cpu, _ = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses + ) # the prediction result provides prediction - assert result_classic.prediction.shape == (predict_data.shape[0], - train_labels.shape[1]) + assert result_classic.prediction.shape == ( + predict_data.shape[0], + train_labels.shape[1], + ) # TODO: When LogisticRegression run2run instability will be replace on np.equal assert np.mean(result_classic.prediction != result_cpu.prediction) < 0.2 @@ -125,7 +137,7 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLogistic Regression coefficients:\n", train_result.model.Beta) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", predict_labels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/log_reg_dense_batch.py b/examples/daal4py/sycl/log_reg_dense_batch.py index c640ce401c..735744fa6c 100644 --- a/examples/daal4py/sycl/log_reg_dense_batch.py +++ b/examples/daal4py/sycl/log_reg_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py logistic regression example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -43,17 +48,21 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(train_data, train_labels, predict_data, nClasses): # set parameters and train - train_alg = d4p.logistic_regression_training(nClasses=nClasses, - fptype='float', - penaltyL1=0.1, - penaltyL2=0.1, - interceptFlag=True) + train_alg = d4p.logistic_regression_training( + nClasses=nClasses, + fptype="float", + penaltyL1=0.1, + penaltyL2=0.1, + interceptFlag=True, + ) train_result = train_alg.compute(train_data, train_labels) # set parameters and compute predictions predict_alg = d4p.logistic_regression_prediction( - nClasses=nClasses, fptype='float', + nClasses=nClasses, + fptype="float", resultsToEvaluate="computeClassLabels|computeClassProbabilities|" - "computeClassLogProbabilities") + "computeClassLogProbabilities", + ) return predict_alg.compute(predict_data, train_result.model), train_result @@ -61,12 +70,14 @@ def compute(train_data, train_labels, predict_data, nClasses): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -74,22 +85,23 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): nClasses = 5 nFeatures = 6 # read training data from file with 6 features per observation and 1 class label - trainfile = os.path.join('..', 'data', 'batch', 'logreg_train.csv') + trainfile = os.path.join("..", "data", "batch", "logreg_train.csv") train_data = readcsv(trainfile, range(nFeatures), t=np.float32) train_labels = readcsv(trainfile, range(nFeatures, nFeatures + 1), t=np.float32) # read testing data from file with 6 features per observation - testfile = os.path.join('..', 'data', 'batch', 'logreg_test.csv') + testfile = os.path.join("..", "data", "batch", "logreg_test.csv") predict_data = readcsv(testfile, range(nFeatures), t=np.float32) # Using of the classic way (computations on CPU) - result_classic, train_result = \ - compute(train_data, train_labels, predict_data, nClasses) + result_classic, train_result = compute( + train_data, train_labels, predict_data, nClasses + ) train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) @@ -97,33 +109,41 @@ def main(readcsv=read_csv, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - result_gpu, _ = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, nClasses) + result_gpu, _ = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses + ) assert np.allclose(result_classic.prediction, result_gpu.prediction) - assert np.allclose(result_classic.probabilities, - result_gpu.probabilities, atol=1e-3) - assert np.allclose(result_classic.logProbabilities, - result_gpu.logProbabilities, atol=1e-2) + assert np.allclose( + result_classic.probabilities, result_gpu.probabilities, atol=1e-3 + ) + assert np.allclose( + result_classic.logProbabilities, result_gpu.logProbabilities, atol=1e-2 + ) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - result_cpu, _ = compute(sycl_train_data, sycl_train_labels, - sycl_predict_data, nClasses) + result_cpu, _ = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, nClasses + ) # the prediction result provides prediction, probabilities and logProbabilities assert result_classic.probabilities.shape == (predict_data.shape[0], nClasses) assert result_classic.logProbabilities.shape == (predict_data.shape[0], nClasses) - predict_labels = np.loadtxt(testfile, usecols=range(nFeatures, nFeatures + 1), - delimiter=',', ndmin=2) - assert np.count_nonzero(result_classic.prediction - predict_labels) \ - / predict_labels.shape[0] < 0.025 + predict_labels = np.loadtxt( + testfile, usecols=range(nFeatures, nFeatures + 1), delimiter=",", ndmin=2 + ) + assert ( + np.count_nonzero(result_classic.prediction - predict_labels) + / predict_labels.shape[0] + < 0.025 + ) assert np.allclose(result_classic.prediction, result_cpu.prediction) assert np.allclose(result_classic.probabilities, result_cpu.probabilities) @@ -137,15 +157,15 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nLogistic Regression coefficients:\n", train_result.model.Beta) print( "\nLogistic regression prediction results (first 10 rows):\n", - predict_result.prediction[0:10] + predict_result.prediction[0:10], ) print("\nGround truth (first 10 rows):\n", predict_labels[0:10]) print( "\nLogistic regression prediction probabilities (first 10 rows):\n", - predict_result.probabilities[0:10] + predict_result.probabilities[0:10], ) print( "\nLogistic regression prediction log probabilities (first 10 rows):\n", - predict_result.logProbabilities[0:10] + predict_result.logProbabilities[0:10], ) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/low_order_moms_dense_batch.py b/examples/daal4py/sycl/low_order_moms_dense_batch.py index faafe8a7ee..050476184f 100644 --- a/examples/daal4py/sycl/low_order_moms_dense_batch.py +++ b/examples/daal4py/sycl/low_order_moms_dense_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -42,7 +47,7 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(data, method): - alg = d4p.low_order_moments(method=method, fptype='float') + alg = d4p.low_order_moments(method=method, fptype="float") return alg.compute(data) @@ -50,12 +55,14 @@ def compute(data, method): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -65,7 +72,7 @@ def to_numpy(data): def main(readcsv=read_csv, method="defaultDense"): # read data from file - file = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') + file = os.path.join("..", "data", "batch", "covcormoments_dense.csv") data = readcsv(file, range(10), t=np.float32) # Using of the classic way (computations on CPU) @@ -75,27 +82,58 @@ def main(readcsv=read_csv, method="defaultDense"): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, "defaultDense") - for name in ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', - 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', - 'variation']: + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ]: assert np.allclose(getattr(result_classic, name), getattr(result_gpu, name)) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, "defaultDense") # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation - assert all(getattr(result_classic, name).shape == (1, data.shape[1]) for name in - ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']) - - for name in ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']: + assert all( + getattr(result_classic, name).shape == (1, data.shape[1]) + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ] + ) + + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ]: assert np.allclose(getattr(result_classic, name), getattr(result_cpu, name)) return result_classic @@ -114,4 +152,4 @@ def main(readcsv=read_csv, method="defaultDense"): print("\nVariance:\n", res.variance) print("\nStandard deviation:\n", res.standardDeviation) print("\nVariation:\n", res.variation) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/low_order_moms_streaming.py b/examples/daal4py/sycl/low_order_moms_streaming.py index 7663b7d874..238bb2a2f5 100644 --- a/examples/daal4py/sycl/low_order_moms_streaming.py +++ b/examples/daal4py/sycl/low_order_moms_streaming.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,23 +12,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py low order moments example for streaming on shared memory systems -import daal4py as d4p -import numpy as np import os -from daal4py.oneapi import sycl_buffer # let's use a generator for getting stream from file (defined in stream.py) import sys -sys.path.insert(0, '..') + +import numpy as np + +import daal4py as d4p +from daal4py.oneapi import sycl_buffer + +sys.path.insert(0, "..") from stream import read_next try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -38,12 +42,14 @@ def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -51,13 +57,13 @@ def to_numpy(data): return data -def main(readcsv=None, method='defaultDense'): +def main(readcsv=None, method="defaultDense"): # read data from file - infile = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv') + infile = os.path.join("..", "data", "batch", "covcormoments_dense.csv") # Using of the classic way (computations on CPU) # Configure a low order moments object for streaming - algo = d4p.low_order_moments(streaming=True, fptype='float') + algo = d4p.low_order_moments(streaming=True, fptype="float") # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream @@ -68,9 +74,9 @@ def main(readcsv=None, method='defaultDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): # Configure a low order moments object for streaming - algo = d4p.low_order_moments(streaming=True, fptype='float') + algo = d4p.low_order_moments(streaming=True, fptype="float") # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream @@ -79,15 +85,24 @@ def main(readcsv=None, method='defaultDense'): algo.compute(sycl_chunk) # finalize computation result_gpu = algo.finalize() - for name in ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', - 'mean', 'secondOrderRawMoment', 'variance', 'standardDeviation', - 'variation']: + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ]: assert np.allclose(getattr(result_classic, name), getattr(result_gpu, name)) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): # Configure a low order moments object for streaming - algo = d4p.low_order_moments(streaming=True, fptype='float') + algo = d4p.low_order_moments(streaming=True, fptype="float") # get the generator (defined in stream.py)... rn = read_next(infile, 55, readcsv) # ... and iterate through chunks/stream @@ -99,8 +114,18 @@ def main(readcsv=None, method='defaultDense'): # result provides minimum, maximum, sum, sumSquares, sumSquaresCentered, # mean, secondOrderRawMoment, variance, standardDeviation, variation - for name in ['minimum', 'maximum', 'sum', 'sumSquares', 'sumSquaresCentered', 'mean', - 'secondOrderRawMoment', 'variance', 'standardDeviation', 'variation']: + for name in [ + "minimum", + "maximum", + "sum", + "sumSquares", + "sumSquaresCentered", + "mean", + "secondOrderRawMoment", + "variance", + "standardDeviation", + "variation", + ]: assert np.allclose(getattr(result_classic, name), getattr(result_cpu, name)) return result_classic @@ -119,4 +144,4 @@ def main(readcsv=None, method='defaultDense'): print("\nVariance:\n", res.variance) print("\nStandard deviation:\n", res.standardDeviation) print("\nVariation:\n", res.variation) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/pca_batch.py b/examples/daal4py/sycl/pca_batch.py index ca7368f362..b2c1a996db 100644 --- a/examples/daal4py/sycl/pca_batch.py +++ b/examples/daal4py/sycl/pca_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c=None, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c=None, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -46,8 +51,12 @@ def compute(data): # we use z-score which could be configured differently zscore = d4p.normalization_zscore(fptype="float") # configure a PCA object - algo = d4p.pca(fptype="float", resultsToCompute="mean|variance|eigenvalue", - isDeterministic=True, normalization=zscore) + algo = d4p.pca( + fptype="float", + resultsToCompute="mean|variance|eigenvalue", + isDeterministic=True, + normalization=zscore, + ) return algo.compute(data) @@ -55,12 +64,14 @@ def compute(data): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -68,8 +79,8 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='svdDense'): - infile = os.path.join('..', 'data', 'batch', 'pca_normalized.csv') +def main(readcsv=read_csv, method="svdDense"): + infile = os.path.join("..", "data", "batch", "pca_normalized.csv") # Load the data data = readcsv(infile, t=np.float32) @@ -81,17 +92,18 @@ def main(readcsv=read_csv, method='svdDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data) assert np.allclose(result_classic.eigenvalues, result_gpu.eigenvalues, atol=1e-5) - assert np.allclose(result_classic.eigenvectors, result_gpu.eigenvectors, - atol=1e-5) + assert np.allclose( + result_classic.eigenvectors, result_gpu.eigenvectors, atol=1e-5 + ) assert np.allclose(result_classic.means, result_gpu.means, atol=1e-5) assert np.allclose(result_classic.variances, result_gpu.variances, atol=1e-5) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data) @@ -115,4 +127,4 @@ def main(readcsv=read_csv, method='svdDense'): print("\nEigenvectors:\n", result.eigenvectors) print("\nMeans:\n", result.means) print("\nVariances:\n", result.variances) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/pca_transform_batch.py b/examples/daal4py/sycl/pca_transform_batch.py index 9cc380b676..1b1e55d4bc 100644 --- a/examples/daal4py/sycl/pca_transform_batch.py +++ b/examples/daal4py/sycl/pca_transform_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py PCA example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,15 +28,18 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False @@ -43,11 +48,12 @@ def read_csv(f, c, t=np.float64): # Commone code for both CPU and GPU computations def compute(data, nComponents): # configure a PCA object and perform PCA - pca_algo = d4p.pca(isDeterministic=True, fptype='float', - resultsToCompute="mean|variance|eigenvalue") + pca_algo = d4p.pca( + isDeterministic=True, fptype="float", resultsToCompute="mean|variance|eigenvalue" + ) pca_res = pca_algo.compute(data) # Apply transform with whitening because means and eigenvalues are provided - pcatrans_algo = d4p.pca_transform(fptype='float', nComponents=nComponents) + pcatrans_algo = d4p.pca_transform(fptype="float", nComponents=nComponents) return pcatrans_algo.compute(data, pca_res.eigenvectors, pca_res.dataForTransform) @@ -55,12 +61,14 @@ def compute(data, nComponents): def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -68,8 +76,8 @@ def to_numpy(data): return data -def main(readcsv=read_csv, method='svdDense'): - dataFileName = os.path.join('..', 'data', 'batch', 'pca_transform.csv') +def main(readcsv=read_csv, method="svdDense"): + dataFileName = os.path.join("..", "data", "batch", "pca_transform.csv") nComponents = 2 # read data @@ -82,24 +90,24 @@ def main(readcsv=read_csv, method='svdDense'): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_data = sycl_buffer(data) result_gpu = compute(sycl_data, nComponents) assert np.allclose(result_classic.transformedData, result_gpu.transformedData) # It is possible to specify to make the computations on CPU - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_data = sycl_buffer(data) result_cpu = compute(sycl_data, nComponents) # pca_transform_result objects provides transformedData assert np.allclose(result_classic.transformedData, result_cpu.transformedData) - return (result_classic) + return result_classic if __name__ == "__main__": pcatrans_res = main() # print results of tranform print(pcatrans_res) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/sklearn_sycl.py b/examples/daal4py/sycl/sklearn_sycl.py index 05db2cbdcf..8aa7a3ea3c 100644 --- a/examples/daal4py/sycl/sklearn_sycl.py +++ b/examples/daal4py/sycl/sklearn_sycl.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,29 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py Scikit-Learn examples for GPU # run like this: # python -m sklearnex ./sklearn_sycl.py import numpy as np - -from sklearn.cluster import KMeans -from sklearn.linear_model import LinearRegression -from sklearn.linear_model import LogisticRegression -from sklearn.cluster import DBSCAN - +from sklearn.cluster import DBSCAN, KMeans from sklearn.datasets import load_iris +from sklearn.linear_model import LinearRegression, LogisticRegression dpctl_available = False try: import dpctl + from sklearnex._config import config_context + dpctl_available = True except ImportError: try: from daal4py.oneapi import sycl_context + print("*" * 80) print("\ndpctl package not found, switched to daal4py package\n") print("*" * 80) @@ -46,7 +45,7 @@ gpu_available = False if not dpctl_available: try: - with sycl_context('gpu'): + with sycl_context("gpu"): gpu_available = True except Exception: gpu_available = False @@ -54,8 +53,10 @@ def k_means_init_x(): print("KMeans init=X[:2]") - X = np.array([[1., 2.], [1., 4.], [1., 0.], - [10., 2.], [10., 4.], [10., 0.]], dtype=np.float32) + X = np.array( + [[1.0, 2.0], [1.0, 4.0], [1.0, 0.0], [10.0, 2.0], [10.0, 4.0], [10.0, 0.0]], + dtype=np.float32, + ) kmeans = KMeans(n_clusters=2, random_state=0, init=X[:2]).fit(X) print("kmeans.labels_") print(kmeans.labels_) @@ -67,9 +68,11 @@ def k_means_init_x(): def k_means_random(): print("KMeans init='random'") - X = np.array([[1., 2.], [1., 4.], [1., 0.], - [10., 2.], [10., 4.], [10., 0.]], dtype=np.float32) - kmeans = KMeans(n_clusters=2, random_state=0, init='random').fit(X) + X = np.array( + [[1.0, 2.0], [1.0, 4.0], [1.0, 0.0], [10.0, 2.0], [10.0, 4.0], [10.0, 0.0]], + dtype=np.float32, + ) + kmeans = KMeans(n_clusters=2, random_state=0, init="random").fit(X) print("kmeans.labels_") print(kmeans.labels_) print("kmeans.predict([[0, 0], [12, 3]])") @@ -80,7 +83,7 @@ def k_means_random(): def linear_regression(): print("LinearRegression") - X = np.array([[1., 1.], [1., 2.], [2., 2.], [2., 3.]], dtype=np.float32) + X = np.array([[1.0, 1.0], [1.0, 2.0], [2.0, 2.0], [2.0, 3.0]], dtype=np.float32) # y = 1 * x_0 + 2 * x_1 + 3 y = np.dot(X, np.array([1, 2], dtype=np.float32)) + 3 reg = LinearRegression().fit(X, y) @@ -97,9 +100,9 @@ def linear_regression(): def logistic_regression_lbfgs(): print("LogisticRegression solver='lbfgs'") X, y = load_iris(return_X_y=True) - clf = LogisticRegression(random_state=0, solver='lbfgs').fit( - X.astype('float32'), - y.astype('float32')) + clf = LogisticRegression(random_state=0, solver="lbfgs").fit( + X.astype("float32"), y.astype("float32") + ) print("clf.predict(X[:2, :])") print(clf.predict(X[:2, :])) print("clf.predict_proba(X[:2, :])") @@ -111,9 +114,9 @@ def logistic_regression_lbfgs(): def logistic_regression_newton(): print("LogisticRegression solver='newton-cg'") X, y = load_iris(return_X_y=True) - clf = LogisticRegression(random_state=0, solver='newton-cg').fit( - X.astype('float32'), - y.astype('float32')) + clf = LogisticRegression(random_state=0, solver="newton-cg").fit( + X.astype("float32"), y.astype("float32") + ) print("clf.predict(X[:2, :])") print(clf.predict(X[:2, :])) print("clf.predict_proba(X[:2, :])") @@ -124,8 +127,10 @@ def logistic_regression_newton(): def dbscan(): print("DBSCAN") - X = np.array([[1., 2.], [2., 2.], [2., 3.], - [8., 7.], [8., 8.], [25., 80.]], dtype=np.float32) + X = np.array( + [[1.0, 2.0], [2.0, 2.0], [2.0, 3.0], [8.0, 7.0], [8.0, 8.0], [25.0, 80.0]], + dtype=np.float32, + ) clustering = DBSCAN(eps=3, min_samples=2).fit(X) print("clustering.labels_") print(clustering.labels_) @@ -141,14 +146,15 @@ def get_context(device): def device_type_to_str(queue): if queue is None: - return 'cpu' + return "cpu" from dpctl import device_type + if queue.sycl_device.device_type == device_type.cpu: - return 'cpu' + return "cpu" if queue.sycl_device.device_type == device_type.gpu: - return 'gpu' - return 'unknown' + return "gpu" + return "unknown" if __name__ == "__main__": @@ -164,14 +170,14 @@ def device_type_to_str(queue): if dpctl_available: devices.append(None) - devices.append(dpctl.SyclQueue('cpu')) + devices.append(dpctl.SyclQueue("cpu")) if dpctl.has_gpu_devices(): - devices.append(dpctl.SyclQueue('gpu')) + devices.append(dpctl.SyclQueue("gpu")) else: - devices.append('cpu') + devices.append("cpu") if gpu_available: - devices.append('gpu') + devices.append("gpu") for device in devices: for e in examples: @@ -184,4 +190,4 @@ def device_type_to_str(queue): e() print("*" * 80) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/sycl/svm_batch.py b/examples/daal4py/sycl/svm_batch.py index 0d09eaf873..a29fcbf784 100755 --- a/examples/daal4py/sycl/svm_batch.py +++ b/examples/daal4py/sycl/svm_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2020 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,13 +12,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py SVM example for shared memory systems -import daal4py as d4p -import numpy as np import os + +import numpy as np + +import daal4py as d4p from daal4py.oneapi import sycl_buffer # let's try to use pandas' fast csv reader @@ -26,40 +28,43 @@ import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) + try: from daal4py.oneapi import sycl_context - with sycl_context('gpu'): + + with sycl_context("gpu"): gpu_available = True except: gpu_available = False # Common code for both CPU and GPU computations -def compute(train_indep_data, train_dep_data, test_indep_data, method='defaultDense'): +def compute(train_indep_data, train_dep_data, test_indep_data, method="defaultDense"): # Configure a SVM object to use linear kernel kernel_function = d4p.kernel_function_linear( - fptype='float', method='defaultDense', k=1.0, b=0.0 + fptype="float", method="defaultDense", k=1.0, b=0.0 ) train_algo = d4p.svm_training( - fptype='float', + fptype="float", method=method, kernel=kernel_function, C=1.0, accuracyThreshold=1e-3, tau=1e-8, - cacheSize=600000000 + cacheSize=600000000, ) train_result = train_algo.compute(train_indep_data, train_dep_data) # Create an algorithm object and call compute - predict_algo = d4p.svm_prediction(fptype='float', kernel=kernel_function) + predict_algo = d4p.svm_prediction(fptype="float", kernel=kernel_function) predict_result = predict_algo.compute(test_indep_data, train_result.model) decision_result = predict_result.prediction predict_labels = np.where(decision_result >= 0, 1, -1) @@ -70,12 +75,14 @@ def compute(train_indep_data, train_dep_data, test_indep_data, method='defaultDe def to_numpy(data): try: from pandas import DataFrame + if isinstance(data, DataFrame): return np.ascontiguousarray(data.values) except ImportError: pass try: from scipy.sparse import csr_matrix + if isinstance(data, csr_matrix): return data.toarray() except ImportError: @@ -85,8 +92,8 @@ def to_numpy(data): def main(readcsv=read_csv): # input data file - train_file = os.path.join('..', 'data', 'batch', 'svm_two_class_train_dense.csv') - predict_file = os.path.join('..', 'data', 'batch', 'svm_two_class_test_dense.csv') + train_file = os.path.join("..", "data", "batch", "svm_two_class_train_dense.csv") + predict_file = os.path.join("..", "data", "batch", "svm_two_class_test_dense.csv") nFeatures = 20 train_data = readcsv(train_file, range(nFeatures), t=np.float32) @@ -94,8 +101,9 @@ def main(readcsv=read_csv): predict_data = readcsv(predict_file, range(nFeatures), t=np.float32) predict_labels = readcsv(predict_file, range(nFeatures, nFeatures + 1), t=np.float32) - predict_result_classic, decision_function_classic = \ - compute(train_data, train_labels, predict_data, 'boser') + predict_result_classic, decision_function_classic = compute( + train_data, train_labels, predict_data, "boser" + ) train_data = to_numpy(train_data) train_labels = to_numpy(train_labels) @@ -103,21 +111,23 @@ def main(readcsv=read_csv): # It is possible to specify to make the computations on GPU if gpu_available: - with sycl_context('gpu'): + with sycl_context("gpu"): sycl_train_data = sycl_buffer(train_data) sycl_train_labels = sycl_buffer(train_labels) sycl_predict_data = sycl_buffer(predict_data) - predict_result_gpu, decision_function_gpu = \ - compute(sycl_train_data, sycl_train_labels, sycl_predict_data, 'thunder') + predict_result_gpu, decision_function_gpu = compute( + sycl_train_data, sycl_train_labels, sycl_predict_data, "thunder" + ) # assert np.allclose(predict_result_gpu, predict_result_classic) - with sycl_context('cpu'): + with sycl_context("cpu"): sycl_train_data = sycl_buffer(train_data) sycl_predict_data = sycl_buffer(predict_data) - predict_result_cpu, decision_function_cpu = \ - compute(sycl_train_data, train_labels, sycl_predict_data, 'thunder') + predict_result_cpu, decision_function_cpu = compute( + sycl_train_data, train_labels, sycl_predict_data, "thunder" + ) assert np.allclose(predict_result_cpu, predict_result_classic) return predict_labels, predict_result_classic, decision_function_classic @@ -128,11 +138,11 @@ def main(readcsv=read_csv): np.set_printoptions(precision=0) print( "\nSVM classification decision function (first 10 observations):\n", - decision_function[0:10] + decision_function[0:10], ) print( "\nSVM classification predict result (first 10 observations):\n", - predict_result[0:10] + predict_result[0:10], ) print("\nGround truth (first 10 observations):\n", predict_labels[0:10]) - print('All looks good!') + print("All looks good!") diff --git a/examples/daal4py/univariate_outlier_batch.py b/examples/daal4py/univariate_outlier_batch.py index 40e14d645d..886e3268a8 100644 --- a/examples/daal4py/univariate_outlier_batch.py +++ b/examples/daal4py/univariate_outlier_batch.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2014 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,26 +12,28 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # daal4py outlier detection univariate example for shared memory systems -import daal4py as d4p import numpy as np +import daal4py as d4p + # let's try to use pandas' fast csv reader try: import pandas def read_csv(f, c, t=np.float64): - return pandas.read_csv(f, usecols=c, delimiter=',', header=None, dtype=t) + return pandas.read_csv(f, usecols=c, delimiter=",", header=None, dtype=t) + except ImportError: # fall back to numpy loadtxt def read_csv(f, c, t=np.float64): - return np.loadtxt(f, usecols=c, delimiter=',', ndmin=2) + return np.loadtxt(f, usecols=c, delimiter=",", ndmin=2) -def main(readcsv=read_csv, method='defaultDense'): +def main(readcsv=read_csv, method="defaultDense"): # Input file infile = "./data/batch/outlierdetection.csv" @@ -55,4 +57,4 @@ def main(readcsv=read_csv, method='defaultDense'): print("\nInput data\n", data) print("\nOutlier detection result (univariate) weights:\n", res.weights) - print('All looks good!') + print("All looks good!") diff --git a/examples/sklearnex/basic_statistics_spmd.py b/examples/sklearnex/basic_statistics_spmd.py index c64a583a82..c6edac986e 100644 --- a/examples/sklearnex/basic_statistics_spmd.py +++ b/examples/sklearnex/basic_statistics_spmd.py @@ -14,23 +14,22 @@ # limitations under the License. # =============================================================================== +import dpctl.tensor as dpt import numpy as np +from dpctl import SyclQueue from mpi4py import MPI -from dpctl import SyclQueue -import dpctl.tensor as dpt from sklearnex.spmd.basic_statistics import BasicStatistics as BasicStatisticsSpmd def generate_data(par, size, seed=777): - ns, nf = par['ns'], par['nf'] + ns, nf = par["ns"], par["nf"] data_blocks, weight_blocks = [], [] rng = np.random.default_rng(seed) for b in range(size): - data = rng.uniform(b, (b + 1) * (b + 1), - size=(ns, nf)) + data = rng.uniform(b, (b + 1) * (b + 1), size=(ns, nf)) weights = rng.uniform(1, (b + 1), size=ns) weight_blocks.append(weights) data_blocks.append(data) @@ -47,7 +46,7 @@ def generate_data(par, size, seed=777): rank = comm.Get_rank() size = comm.Get_size() -params_spmd = {'ns': 19, 'nf': 31} +params_spmd = {"ns": 19, "nf": 31} data, weights = generate_data(params_spmd, size) weighted_data = np.diag(weights) @ data diff --git a/examples/sklearnex/kmeans_spmd.py b/examples/sklearnex/kmeans_spmd.py index abaf314e79..c3747353bc 100644 --- a/examples/sklearnex/kmeans_spmd.py +++ b/examples/sklearnex/kmeans_spmd.py @@ -14,16 +14,16 @@ # limitations under the License. # =============================================================================== -import numpy as np from warnings import warn -from mpi4py import MPI -from dpctl import SyclQueue import dpctl.tensor as dpt -from onedal.spmd.cluster import KMeans - +import numpy as np +from dpctl import SyclQueue +from mpi4py import MPI from sklearn.datasets import load_digits +from onedal.spmd.cluster import KMeans + def get_data_slice(chunk, count): assert chunk < count diff --git a/examples/sklearnex/knn_bf_classification_spmd.py b/examples/sklearnex/knn_bf_classification_spmd.py index 501bec3242..bdb09545b6 100644 --- a/examples/sklearnex/knn_bf_classification_spmd.py +++ b/examples/sklearnex/knn_bf_classification_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,19 +12,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== -import numpy as np -from sklearn.metrics import accuracy_score from warnings import warn -from mpi4py import MPI + import dpctl import dpctl.tensor as dpt +import numpy as np +from mpi4py import MPI +from sklearn.metrics import accuracy_score + from sklearnex.spmd.neighbors import KNeighborsClassifier def generate_X_y(par, seed): - ns, nf = par['ns'], par['nf'] + ns, nf = par["ns"], par["nf"] drng = np.random.default_rng(seed) data = drng.uniform(-1, 1, size=(ns, nf)) @@ -40,11 +42,13 @@ def generate_X_y(par, seed): if dpctl.has_gpu_devices: q = dpctl.SyclQueue("gpu") else: - raise RuntimeError('GPU devices unavailable. Currently, ' - 'SPMD execution mode is implemented only for this device type.') + raise RuntimeError( + "GPU devices unavailable. Currently, " + "SPMD execution mode is implemented only for this device type." + ) -params_train = {'ns': 100000, 'nf': 8} -params_test = {'ns': 100, 'nf': 8} +params_train = {"ns": 100000, "nf": 8} +params_test = {"ns": 100, "nf": 8} X_train, y_train = generate_X_y(params_train, rank) X_test, y_test = generate_X_y(params_test, rank + 99) @@ -54,18 +58,22 @@ def generate_X_y(par, seed): dpt_X_test = dpt.asarray(X_test, usm_type="device", sycl_queue=q) dpt_y_test = dpt.asarray(y_test, usm_type="device", sycl_queue=q) -model_spmd = KNeighborsClassifier(algorithm='brute', - n_neighbors=20, - weights='uniform', - p=2, - metric='minkowski') +model_spmd = KNeighborsClassifier( + algorithm="brute", n_neighbors=20, weights="uniform", p=2, metric="minkowski" +) model_spmd.fit(dpt_X_train, dpt_y_train) y_predict = model_spmd.predict(dpt_X_test) print("Brute Force Distributed kNN classification results:") print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) -print("Classification results (first 5 observations on rank {}):\n{}" - .format(rank, dpt.to_numpy(y_predict)[:5])) -print("Accuracy for entire rank {} (256 classes): {}\n" - .format(rank, accuracy_score(y_test, dpt.to_numpy(y_predict)))) +print( + "Classification results (first 5 observations on rank {}):\n{}".format( + rank, dpt.to_numpy(y_predict)[:5] + ) +) +print( + "Accuracy for entire rank {} (256 classes): {}\n".format( + rank, accuracy_score(y_test, dpt.to_numpy(y_predict)) + ) +) diff --git a/examples/sklearnex/knn_bf_regression_spmd.py b/examples/sklearnex/knn_bf_regression_spmd.py index ac0e8ad555..1ab54b2653 100644 --- a/examples/sklearnex/knn_bf_regression_spmd.py +++ b/examples/sklearnex/knn_bf_regression_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,20 +12,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== -import numpy as np -from sklearn.metrics import mean_squared_error from warnings import warn -from mpi4py import MPI + import dpctl import dpctl.tensor as dpt +import numpy as np +from mpi4py import MPI from numpy.testing import assert_allclose +from sklearn.metrics import mean_squared_error + from sklearnex.spmd.neighbors import KNeighborsRegressor def generate_X_y(par, coef_seed, data_seed): - ns, nf = par['ns'], par['nf'] + ns, nf = par["ns"], par["nf"] crng = np.random.default_rng(coef_seed) coef = crng.uniform(-10, 10, size=(nf,)) @@ -44,11 +46,13 @@ def generate_X_y(par, coef_seed, data_seed): if dpctl.has_gpu_devices: q = dpctl.SyclQueue("gpu") else: - raise RuntimeError('GPU devices unavailable. Currently, ' - 'SPMD execution mode is implemented only for this device type.') + raise RuntimeError( + "GPU devices unavailable. Currently, " + "SPMD execution mode is implemented only for this device type." + ) -params_train = {'ns': 1000000, 'nf': 3} -params_test = {'ns': 100, 'nf': 3} +params_train = {"ns": 1000000, "nf": 3} +params_test = {"ns": 100, "nf": 3} X_train, y_train, coef_train = generate_X_y(params_train, 10, rank) X_test, y_test, coef_test = generate_X_y(params_test, 10, rank + 99) @@ -60,18 +64,22 @@ def generate_X_y(par, coef_seed, data_seed): assert_allclose(coef_train, coef_test) -model_spmd = KNeighborsRegressor(algorithm='brute', - n_neighbors=5, - weights='uniform', - p=2, - metric='minkowski') +model_spmd = KNeighborsRegressor( + algorithm="brute", n_neighbors=5, weights="uniform", p=2, metric="minkowski" +) model_spmd.fit(dpt_X_train, dpt_y_train) y_predict = model_spmd.predict(dpt_X_test) print("Brute Force Distributed kNN regression results:") print("Ground truth (first 5 observations on rank {}):\n{}".format(rank, y_test[:5])) -print("Regression results (first 5 observations on rank {}):\n{}" - .format(rank, dpt.to_numpy(y_predict)[:5])) -print("RMSE for entire rank {}: {}\n" - .format(rank, mean_squared_error(y_test, dpt.to_numpy(y_predict), squared=False))) +print( + "Regression results (first 5 observations on rank {}):\n{}".format( + rank, dpt.to_numpy(y_predict)[:5] + ) +) +print( + "RMSE for entire rank {}: {}\n".format( + rank, mean_squared_error(y_test, dpt.to_numpy(y_predict), squared=False) + ) +) diff --git a/examples/sklearnex/linear_regression_spmd.py b/examples/sklearnex/linear_regression_spmd.py index a37f7f2686..192d84c8d4 100755 --- a/examples/sklearnex/linear_regression_spmd.py +++ b/examples/sklearnex/linear_regression_spmd.py @@ -14,12 +14,13 @@ # limitations under the License. # =============================================================================== -import numpy as np from warnings import warn -from mpi4py import MPI -from dpctl import SyclQueue import dpctl.tensor as dpt +import numpy as np +from dpctl import SyclQueue +from mpi4py import MPI + from sklearnex.spmd.linear_model import LinearRegression @@ -28,7 +29,7 @@ def generate_X_y(ns, data_seed): crng = np.random.default_rng(777) coef = crng.uniform(-4, 1, size=(nr, nf)).T - intp = crng.uniform(-1, 9, size=(nr, )) + intp = crng.uniform(-1, 9, size=(nr,)) drng = np.random.default_rng(data_seed) data = drng.uniform(-7, 7, size=(ns, nf)) @@ -50,8 +51,10 @@ def get_test_data(rank): size = comm.Get_size() if size < 2: - warn("This example was intentionally " - "designed to run in distributed mode only", RuntimeWarning) + warn( + "This example was intentionally " "designed to run in distributed mode only", + RuntimeWarning, + ) X, y = get_train_data(rank) diff --git a/examples/sklearnex/n_jobs.py b/examples/sklearnex/n_jobs.py index fc36d95fa9..13a5c05777 100644 --- a/examples/sklearnex/n_jobs.py +++ b/examples/sklearnex/n_jobs.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,17 +12,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # sklearnex doesn't have interface for threading configuration and not following # scikit-learn n_jobs yet. Thus it's requered to use daal4py package to set this. # nthreads parameter define number of threads used by sklearnex. # Without this code sklearnex would be using all system cores import daal4py + daal4py.daalinit(nthreads=2) # Calling scikit-learn patch - this would enable acceleration on all enabled algorithms from sklearnex import patch_sklearn + patch_sklearn() # Remaining non modified scikit-learn code @@ -37,8 +39,8 @@ X = StandardScaler().fit_transform(X) -from sklearn.cluster import DBSCAN from sklearn import metrics +from sklearn.cluster import DBSCAN db = DBSCAN(eps=0.3, min_samples=10).fit(X) labels = db.labels_ diff --git a/examples/sklearnex/patch_sklearn.py b/examples/sklearnex/patch_sklearn.py index 5052952028..3a87702cbf 100644 --- a/examples/sklearnex/patch_sklearn.py +++ b/examples/sklearnex/patch_sklearn.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,28 +12,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # Calling scikit-learn patch - this would enable acceleration on all # enabled algorithms. This is most straight forward way of patching from sklearnex import patch_sklearn + patch_sklearn() # Function that can validate current state of patching from sklearnex import sklearn_is_patched + sklearn_is_patched() # Calling scikit-learn unpatch - this would revert patching for all algorithms from sklearnex import unpatch_sklearn + unpatch_sklearn() # Direct import of functions in way aligned with scikit-learn from sklearnex.neighbors import NearestNeighbors -#Patching can be enabled for selected algorithms/estimators only +# Patching can be enabled for selected algorithms/estimators only patch_sklearn(["DBSCAN"]) -#Remaining non modified scikit-learn codes +# Remaining non modified scikit-learn codes from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler diff --git a/examples/sklearnex/pca_spmd.py b/examples/sklearnex/pca_spmd.py index 64e61a5a19..51aae3400d 100644 --- a/examples/sklearnex/pca_spmd.py +++ b/examples/sklearnex/pca_spmd.py @@ -14,10 +14,11 @@ # limitations under the License. # =============================================================================== -import numpy as np -from mpi4py import MPI import dpctl import dpctl.tensor as dpt +import numpy as np +from mpi4py import MPI + from sklearnex.spmd.decomposition import PCA diff --git a/examples/sklearnex/random_forest_classifier_spmd.py b/examples/sklearnex/random_forest_classifier_spmd.py index 83e6539ec3..fbfad2c3ce 100644 --- a/examples/sklearnex/random_forest_classifier_spmd.py +++ b/examples/sklearnex/random_forest_classifier_spmd.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # sklearnex RF example for distributed systems; SPMD mode # run like this: @@ -20,16 +20,14 @@ import dpctl import dpctl.tensor as dpt - import numpy as np - from mpi4py import MPI from sklearnex.spmd.ensemble import RandomForestClassifier def generate_X_y(par, seed): - ns, nf = par['ns'], par['nf'] + ns, nf = par["ns"], par["nf"] drng = np.random.default_rng(seed) data = drng.uniform(-1, 1, size=(ns, nf)) @@ -38,8 +36,8 @@ def generate_X_y(par, seed): return data, resp -params_train = {'ns': 10000, 'nf': 8} -params_test = {'ns': 100, 'nf': 8} +params_train = {"ns": 10000, "nf": 8} +params_test = {"ns": 100, "nf": 8} comm = MPI.COMM_WORLD mpi_size = comm.Get_size() @@ -60,5 +58,8 @@ def generate_X_y(par, seed): print("Random Forest classification results:") print("Ground truth (first 5 observations on rank {}):\n{}".format(mpi_rank, y_test[:5])) -print("Classification results (first 5 observations on rank {}):\n{}" - .format(mpi_rank, dpt.to_numpy(pred)[:5])) +print( + "Classification results (first 5 observations on rank {}):\n{}".format( + mpi_rank, dpt.to_numpy(pred)[:5] + ) +) diff --git a/examples/sklearnex/random_forest_regressor_spmd.py b/examples/sklearnex/random_forest_regressor_spmd.py index 797415488b..3a5f10e240 100644 --- a/examples/sklearnex/random_forest_regressor_spmd.py +++ b/examples/sklearnex/random_forest_regressor_spmd.py @@ -21,6 +21,7 @@ import dpctl import dpctl.tensor as dpt import dpnp + import numpy as np from mpi4py import MPI from numpy.testing import assert_allclose diff --git a/examples/sklearnex/verbose_mode.py b/examples/sklearnex/verbose_mode.py index 4354847c7b..2cd610fc55 100644 --- a/examples/sklearnex/verbose_mode.py +++ b/examples/sklearnex/verbose_mode.py @@ -1,4 +1,4 @@ -#=============================================================================== +# =============================================================================== # Copyright 2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,16 +12,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#=============================================================================== +# =============================================================================== # sklearnex can help you debug your aplications by printing messages on it's invocation # to allow you to see if stock of accelerated version was used. # By setting sklearnex logger level to "INFO" you would enable this verbose mode import logging -logging.getLogger('sklearnex').setLevel(logging.INFO) + +logging.getLogger("sklearnex").setLevel(logging.INFO) # Calling scikit-learn patch - this would enable acceleration on all enabled algorithms from sklearnex import patch_sklearn + patch_sklearn() # Remaining non modified scikit-learn code