Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added compatibility with persistent Storage Numpys from Hecuba #309

Open
wants to merge 315 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
315 commits
Select commit Hold shift + click to select a range
43ac05f
test
mbmiquel Mar 2, 2020
bdcbde4
test
mbmiquel Mar 2, 2020
abf47ad
test
mbmiquel Mar 2, 2020
6ee4813
test
mbmiquel Mar 2, 2020
041e4dc
test
mbmiquel Mar 2, 2020
bf56ff6
test
mbmiquel Mar 2, 2020
42d6796
test
mbmiquel Mar 2, 2020
68de457
test
mbmiquel Mar 2, 2020
becd5cc
test
mbmiquel Mar 2, 2020
5f0a319
test
mbmiquel Mar 2, 2020
ecf60dc
test
mbmiquel Mar 2, 2020
f6863eb
test
mbmiquel Mar 2, 2020
bc8c7e9
test
mbmiquel Mar 2, 2020
8e7f12e
test
mbmiquel Mar 2, 2020
8ee4124
test
mbmiquel Mar 2, 2020
280ecdb
test
mbmiquel Mar 2, 2020
7c69912
test
mbmiquel Mar 2, 2020
4c5a3e8
test
mbmiquel Mar 2, 2020
b389726
test
mbmiquel Mar 2, 2020
262b6c5
test
mbmiquel Mar 2, 2020
956a7b8
test
mbmiquel Mar 2, 2020
053c08c
test
mbmiquel Mar 2, 2020
3fa37d7
test
mbmiquel Mar 2, 2020
53a99ab
test
mbmiquel Mar 2, 2020
c5510a5
test
mbmiquel Mar 2, 2020
9f897e4
test
mbmiquel Mar 2, 2020
6403009
test
mbmiquel Mar 3, 2020
0b2a33f
test
mbmiquel Mar 3, 2020
737c350
test
mbmiquel Mar 3, 2020
4c02ced
test
mbmiquel Mar 3, 2020
489be00
test
mbmiquel Mar 3, 2020
2ba5547
test
mbmiquel Mar 3, 2020
526d88a
test
mbmiquel Mar 3, 2020
68c15c1
test
mbmiquel Mar 3, 2020
14f606f
test
mbmiquel Mar 3, 2020
295358c
test
mbmiquel Mar 3, 2020
59c97c3
test
mbmiquel Mar 3, 2020
7f81ebf
test
mbmiquel Mar 3, 2020
1f459f4
test
mbmiquel Mar 3, 2020
d8c4a32
test
mbmiquel Mar 3, 2020
05ffb5b
test
mbmiquel Mar 3, 2020
b0d4673
test
mbmiquel Mar 3, 2020
29cd744
test
mbmiquel Mar 3, 2020
f6d6212
test
mbmiquel Mar 3, 2020
40fb9b5
test
mbmiquel Mar 3, 2020
536cff8
test
mbmiquel Mar 3, 2020
b400ef2
test
mbmiquel Mar 3, 2020
cc33cc2
test
mbmiquel Mar 3, 2020
092de7c
test
mbmiquel Mar 3, 2020
8b01e9a
test
mbmiquel Mar 3, 2020
4e0871c
test
mbmiquel Mar 4, 2020
1c80159
test
mbmiquel Mar 4, 2020
c46e30a
test
mbmiquel Mar 11, 2020
eec9e69
test
mbmiquel Mar 11, 2020
ffcfc4c
test
mbmiquel Mar 11, 2020
46b2728
test
mbmiquel Mar 11, 2020
251d53b
test
mbmiquel Mar 11, 2020
6f9b10f
test
mbmiquel Mar 11, 2020
e1aaa0a
test
mbmiquel Mar 11, 2020
ed92f0e
test
mbmiquel Mar 11, 2020
910410f
test
mbmiquel Mar 11, 2020
8423c51
test
mbmiquel Mar 11, 2020
78ea8b7
test
mbmiquel Mar 11, 2020
75ac4ee
test
mbmiquel Mar 11, 2020
96cf85c
test
mbmiquel Mar 11, 2020
ee421ac
test
mbmiquel Mar 11, 2020
d0fe656
test
mbmiquel Mar 11, 2020
9fc645f
test
mbmiquel Mar 11, 2020
427bb32
test
mbmiquel Mar 11, 2020
f7914d7
test
mbmiquel Mar 11, 2020
7dd58de
test
mbmiquel Mar 11, 2020
6b21bb5
test
mbmiquel Mar 11, 2020
31de241
test
mbmiquel Mar 11, 2020
a79567a
test
mbmiquel Mar 11, 2020
503740c
test
mbmiquel Mar 11, 2020
df00c30
test
mbmiquel Mar 11, 2020
583765f
test
mbmiquel Mar 11, 2020
9ac6751
test
mbmiquel Mar 11, 2020
2a4aa7e
test
mbmiquel Mar 11, 2020
de6dc56
test
mbmiquel Mar 11, 2020
be17f93
test
mbmiquel Mar 11, 2020
e38cc3b
test
mbmiquel Mar 11, 2020
17b80de
test
mbmiquel Mar 11, 2020
480fc47
test
mbmiquel Mar 11, 2020
05d7229
test
mbmiquel Mar 11, 2020
20c0bbb
test
mbmiquel Mar 11, 2020
a7079d6
test
mbmiquel Mar 11, 2020
fb155ee
test
mbmiquel Mar 11, 2020
de9ba88
test
mbmiquel Mar 11, 2020
fe1ab1c
test
mbmiquel Mar 11, 2020
9ac1ddf
test
mbmiquel Mar 11, 2020
98c295f
test
mbmiquel Mar 11, 2020
3a4b298
test
mbmiquel Mar 11, 2020
589f05f
test
mbmiquel Mar 11, 2020
44f3cfd
test
mbmiquel Mar 11, 2020
3396b3d
test
mbmiquel Mar 12, 2020
a2db842
test
mbmiquel Mar 12, 2020
a4bd5f6
test
mbmiquel Mar 12, 2020
8a8cb98
test
mbmiquel Mar 12, 2020
7776b8c
test
mbmiquel Mar 12, 2020
38b81f2
test
mbmiquel Mar 12, 2020
8204e8f
test
mbmiquel Mar 12, 2020
ff0c959
test
mbmiquel Mar 13, 2020
1ba1b84
test
mbmiquel Mar 13, 2020
8f81e59
test
mbmiquel Mar 17, 2020
a2630dc
test
mbmiquel Mar 17, 2020
1c19dd3
test
mbmiquel Mar 17, 2020
f2a35cd
test
mbmiquel Mar 17, 2020
45b7288
test
mbmiquel Mar 17, 2020
9374a0f
test
mbmiquel Mar 17, 2020
8e56a97
test
mbmiquel Mar 17, 2020
0a57a47
test
mbmiquel Mar 17, 2020
d218de4
test
mbmiquel Mar 17, 2020
a29c6d5
test
mbmiquel Mar 17, 2020
0ee9c27
test
mbmiquel Mar 17, 2020
6e5c7e9
test
mbmiquel Mar 17, 2020
85b3aa9
test
mbmiquel Mar 17, 2020
e3930cc
test
mbmiquel Mar 17, 2020
6a6c996
test
mbmiquel Mar 17, 2020
e9e2b52
test
mbmiquel Mar 17, 2020
a634e4a
test
mbmiquel Mar 17, 2020
207eb63
test
mbmiquel Mar 17, 2020
f3291dc
test
mbmiquel Mar 17, 2020
2a9a272
test
mbmiquel Mar 17, 2020
c63759e
test
mbmiquel Mar 17, 2020
b42e8ad
test
mbmiquel Mar 17, 2020
3cc810b
test
mbmiquel Mar 17, 2020
1acdd13
test
mbmiquel Mar 17, 2020
018ad20
test
mbmiquel Mar 17, 2020
737465f
test
mbmiquel Mar 17, 2020
00a5c7d
test
mbmiquel Mar 17, 2020
3df0a70
test
mbmiquel Mar 17, 2020
6cb71df
test
mbmiquel Mar 17, 2020
b9b530e
test
mbmiquel Mar 17, 2020
86cc406
test
mbmiquel Mar 17, 2020
45d6b66
test
mbmiquel Mar 17, 2020
0be3d53
test
mbmiquel Mar 17, 2020
b6512cd
test
mbmiquel Mar 17, 2020
782cf3c
test
mbmiquel Mar 17, 2020
7314edd
test
mbmiquel Mar 17, 2020
5d26560
test
mbmiquel Mar 17, 2020
c8b58c4
test
mbmiquel Mar 17, 2020
775216d
test
mbmiquel Mar 17, 2020
6714db0
test
mbmiquel Mar 17, 2020
87c37a1
test
mbmiquel Mar 17, 2020
fea8e56
test
mbmiquel Mar 17, 2020
b0378f7
test
mbmiquel Mar 17, 2020
f4bc6a0
test
mbmiquel Mar 17, 2020
e3d7f04
test
mbmiquel Mar 17, 2020
0ce10da
test
mbmiquel Mar 17, 2020
66c3f1a
test
mbmiquel Mar 17, 2020
4b7c55b
test
mbmiquel Mar 17, 2020
f2e8a10
test
mbmiquel Mar 17, 2020
e48f7b3
test
mbmiquel Mar 17, 2020
922c10e
test
mbmiquel Mar 17, 2020
e292cd1
test
mbmiquel Mar 17, 2020
caa8875
test
mbmiquel Mar 17, 2020
697555a
test
mbmiquel Mar 17, 2020
dfa203d
test
mbmiquel Mar 17, 2020
c8295fb
test
mbmiquel Mar 17, 2020
90cc8bf
test
mbmiquel Mar 18, 2020
a49bcf3
test
mbmiquel Mar 18, 2020
65b4836
test
mbmiquel Mar 18, 2020
4aeadc8
test
mbmiquel Mar 18, 2020
926e925
test
mbmiquel Mar 18, 2020
905f050
test
mbmiquel Mar 18, 2020
7ab78b0
test
mbmiquel Mar 18, 2020
27355fe
test
mbmiquel Mar 18, 2020
b1161d3
test
mbmiquel Mar 18, 2020
1b85206
test
mbmiquel Mar 18, 2020
da651f0
test
mbmiquel Mar 18, 2020
f6f0501
test
mbmiquel Mar 18, 2020
708c6a1
test
mbmiquel Mar 18, 2020
8c640c0
test
mbmiquel Mar 18, 2020
5694c61
test
mbmiquel Mar 18, 2020
eb20fe1
test
mbmiquel Mar 18, 2020
96b1b95
test
mbmiquel Mar 18, 2020
a3eb480
test
mbmiquel Mar 18, 2020
13db148
test
mbmiquel Mar 18, 2020
c55d88f
test
mbmiquel Mar 18, 2020
0cb5628
test
mbmiquel Mar 18, 2020
2b08489
test
mbmiquel Mar 18, 2020
a3f3773
test
mbmiquel Mar 18, 2020
df35da7
test
mbmiquel Mar 18, 2020
c0809c0
test
mbmiquel Mar 18, 2020
9fbba1b
test
mbmiquel Mar 18, 2020
39bad81
test
mbmiquel Mar 18, 2020
82a7904
test
mbmiquel Mar 18, 2020
d70f62b
test
mbmiquel Mar 18, 2020
5838f63
test
mbmiquel Mar 18, 2020
f67314a
test
mbmiquel Mar 20, 2020
a42755b
test
mbmiquel Mar 20, 2020
085325b
test
mbmiquel Mar 20, 2020
680c31b
test
mbmiquel Mar 20, 2020
999e830
test
mbmiquel Mar 20, 2020
c686d7c
test
mbmiquel Mar 20, 2020
7a564e9
test
mbmiquel Mar 20, 2020
996c815
test
mbmiquel Mar 20, 2020
b838cf6
test
mbmiquel Mar 20, 2020
4336ca6
test
mbmiquel Mar 20, 2020
77faa78
test
mbmiquel Mar 20, 2020
25ddb50
test
mbmiquel Mar 20, 2020
9d51374
test
mbmiquel Mar 20, 2020
5a4b88e
test
mbmiquel Mar 20, 2020
83762a6
test
mbmiquel Mar 20, 2020
b947c57
test
mbmiquel Mar 20, 2020
8c14d65
test
mbmiquel Mar 20, 2020
b3bfb2f
test
mbmiquel Mar 20, 2020
a341413
test
mbmiquel Mar 20, 2020
f7fabfd
test
mbmiquel Mar 20, 2020
a8fdc71
test
mbmiquel Mar 20, 2020
57dad9c
test
mbmiquel Mar 20, 2020
c1ca51f
test
mbmiquel Mar 20, 2020
6b2b23e
test
mbmiquel Mar 20, 2020
cd609f6
test
mbmiquel Mar 23, 2020
81f7e2b
test
mbmiquel Mar 23, 2020
7a4ea33
test
mbmiquel Mar 23, 2020
e34d885
test
mbmiquel Mar 23, 2020
cb9470a
test
mbmiquel Mar 23, 2020
4f8e769
tested
mbmiquel Apr 24, 2020
77805e4
ready
mbmiquel Apr 24, 2020
af6955d
Merge branch 'test_compss' of https://github.com/bsc-dd/dislib into t…
mbmiquel Apr 24, 2020
2429c70
new yml
mbmiquel Apr 24, 2020
7fc02f8
final
mbmiquel May 12, 2020
d6acae4
Delete def _merge_blocks(blocks):.py
mbmiquel May 12, 2020
1f9a382
Delete classes.py
mbmiquel May 12, 2020
63a2ecf
Delete __init__.py
mbmiquel May 12, 2020
60b5c14
Delete hello_world.py
mbmiquel May 12, 2020
bf6d161
Delete test_merge.py
mbmiquel May 12, 2020
6fd9b69
Delete test_simple.py
mbmiquel May 12, 2020
5f14fc8
Delete test_test.py
mbmiquel May 12, 2020
34cc7fe
Delete test_test2.py
mbmiquel May 12, 2020
5be7493
Merge branch 'master' of https://github.com/bsc-wdc/dislib into bsc-w…
mbmiquel May 12, 2020
e686051
Merge branch 'bsc-wdc-master' into test_compss
mbmiquel May 12, 2020
179e255
Merge branch 'test_compss' of https://github.com/bsc-dd/dislib into t…
mbmiquel May 12, 2020
c62c7eb
run SH
mbmiquel May 12, 2020
09caa34
run
mbmiquel May 12, 2020
dec1616
implementation using hecuba dicts
mbmiquel Jul 31, 2020
b1a9332
los cambios de dislib de ellos y con dicts
mbmiquel Sep 2, 2020
2f9f04a
changes
mbmiquel Sep 2, 2020
8e50f81
first step merging, dislib version previous to july
mbmiquel Sep 15, 2020
7c09acb
Merge branch 'master' of https://github.com/bsc-wdc/dislib into disli…
mbmiquel Sep 15, 2020
7ac0ebd
new file
mbmiquel Sep 15, 2020
ef25418
test
mbmiquel Sep 15, 2020
248fa83
tests ejecutables (compss wait on solucionado)
mbmiquel Sep 17, 2020
3709418
quitando archivos no necesarios
mbmiquel Sep 18, 2020
9801740
cleaning data
mbmiquel Sep 23, 2020
cf5f6cf
New merge with lasso
mbmiquel Sep 23, 2020
b7bcb60
Merge branch 'master' of https://github.com/bsc-wdc/dislib into disli…
mbmiquel Sep 23, 2020
c7a8a24
error merge checked
mbmiquel Sep 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 21 additions & 18 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ sudo: required

branches:
only:
- master
- test_compss
- /^release-.*/

services:
Expand All @@ -14,24 +14,27 @@ env:
global:
- REGISTRY_USER=compss
- secure: ""
- TEST_CASSANDRA_VERSION=3.11.4

before_script:
- docker build --tag bscwdc/dislib .
- docker run $(bash <(curl -s https://codecov.io/env)) -d --name dislib bscwdc/dislib

script: "docker exec dislib /dislib/run_ci_checks.sh"

after_script:
- docker images
- docker exec dislib /dislib/bin/print_tests_logs.sh

before_deploy:
- docker login -u "$REGISTRY_USER" -p "$REGISTRY_PASS"
- docker tag bscwdc/dislib bscwdc/dislib:latest
deploy:
provider: script
script: docker push bscwdc/dislib:latest
on:
branch: master
- source launch_cassandra.sh
- docker build --tag emebemb/dislib_hecuba_compss_production:0.2 .
- docker run -it --network cassandra_bridge -d --name dislib emebemb/dislib_hecuba_compss_production:0.2


script: "docker exec -e CONTACT_NAMES='cassandra_container' -e NODE_PORT=9042 dislib /dislib/run_tests.sh"

#after_script:
# - docker images
# - docker exec dislib /dislib/bin/print_tests_logs.sh
#
#before_deploy:
# - docker login -u "$REGISTRY_USER" -p "$REGISTRY_PASS"
# - docker tag bscwdc/dislib bscwdc/dislib:latest
#deploy:
# provider: script
# script: docker push bscwdc/dislib:latest
# on:
# branch: master


13 changes: 12 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
FROM bscwdc/dislib-base:latest
#FROM bscwdc/dislib-base:latest
FROM adrianespejo/dislib_hecuba:0.1
MAINTAINER COMPSs Support <[email protected]>

#RUN apt-get update -y && apt-get update
#RUN apt-get install -y cmake python3-dev libpython3-dev gcc-4.8 libtool python3-numpy python3-pip python3-setuptools
#RUN curl -L https://github.com/bsc-dd/hecuba/archive/NumpyWritePartitions.tar.gz | tar -xz

#WORKDIR hecuba-NumpyWritePartitions
#RUN python3 -m pip install -r requirements.txt
#RUN python3 setup.py install
WORKDIR /

#RUN rm -rf dislib/
COPY . dislib/

ENV PYTHONPATH=$PYTHONPATH:/dislib
Expand Down
1 change: 1 addition & 0 deletions counter
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
2 changes: 1 addition & 1 deletion dislib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

from dislib.data.array import random_array, apply_along_axis, array, zeros, \
full, identity
full, load_from_hecuba, identity
from dislib.data.io import load_svmlight_file, load_npy_file, load_txt_file
from dislib.math import kron, svd

Expand Down
20 changes: 5 additions & 15 deletions dislib/cluster/kmeans/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from pycompss.api.api import compss_wait_on
from pycompss.api.parameter import COLLECTION_IN, Depth, Type
from pycompss.api.parameter import INOUT, COLLECTION_IN, Depth, Type
from pycompss.api.task import task
from scipy.sparse import csr_matrix
from sklearn.base import BaseEstimator
Expand All @@ -10,10 +10,8 @@

from dislib.data.array import Array


class KMeans(BaseEstimator):
""" Perform K-means clustering.

Parameters
----------
n_clusters : int, optional (default=8)
Expand All @@ -22,7 +20,6 @@ class KMeans(BaseEstimator):
init : {'random', nd-array or sparse matrix}, optional (default='random')
Method of initialization, defaults to 'random', which generates
random centers at the beginning.

If an nd-array or sparse matrix is passed, it should be of shape
(n_clusters, n_features) and gives the initial centers.
max_iter : int, optional (default=10)
Expand All @@ -37,14 +34,12 @@ class KMeans(BaseEstimator):
for centroid initialization.
verbose: boolean, optional (default=False)
Whether to print progress information.

Attributes
----------
centers : ndarray
Computed centroids.
n_iter : int
Number of iterations performed.

Examples
--------
>>> from dislib.cluster import KMeans
Expand Down Expand Up @@ -73,14 +68,12 @@ def __init__(self, n_clusters=8, init='random', max_iter=10, tol=1e-4,

def fit(self, x, y=None):
""" Compute K-means clustering.

Parameters
----------
x : ds-array
Samples to cluster.
y : ignored
Not used, present here for API consistency by convention.

Returns
-------
self : KMeans
Expand All @@ -95,6 +88,7 @@ def fit(self, x, y=None):
old_centers = self.centers.copy()
partials = []


for row in x._iterator(axis=0):
partial = _partial_sum(row._blocks, old_centers)
partials.append(partial)
Expand All @@ -108,31 +102,26 @@ def fit(self, x, y=None):

def fit_predict(self, x, y=None):
""" Compute cluster centers and predict cluster index for each sample.

Parameters
----------
x : ds-array
Samples to cluster.
y : ignored
Not used, present here for API consistency by convention.

Returns
-------
labels : ds-array, shape=(n_samples, 1)
Index of the cluster each sample belongs to.
"""

self.fit(x)
return self.predict(x)

def predict(self, x):
""" Predict the closest cluster each sample in the data belongs to.

Parameters
----------
x : ds-array
New data to predict.

Returns
-------
labels : ds-array, shape=(n_samples, 1)
Expand Down Expand Up @@ -193,7 +182,6 @@ def _init_centers(self, n_features, sparse):
def _partial_sum(blocks, centers):
partials = np.zeros((centers.shape[0], 2), dtype=object)
arr = Array._merge_blocks(blocks)

close_centers = pairwise_distances(arr, centers).argmin(axis=1)

for center_idx, _ in enumerate(centers):
Expand All @@ -204,6 +192,8 @@ def _partial_sum(blocks, centers):
return partials




@task(returns=dict)
def _merge(*data):
accum = data[0].copy()
Expand All @@ -217,4 +207,4 @@ def _merge(*data):
@task(blocks={Type: COLLECTION_IN, Depth: 2}, returns=np.array)
def _predict(blocks, centers):
arr = Array._merge_blocks(blocks)
return pairwise_distances(arr, centers).argmin(axis=1).reshape(-1, 1)
return pairwise_distances(arr, centers).argmin(axis=1).reshape(-1, 1)
4 changes: 2 additions & 2 deletions dislib/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dislib.data.array import array, random_array, apply_along_axis, zeros, \
full, identity
full, load_from_hecuba, identity
from dislib.data.io import load_txt_file, load_npy_file, load_svmlight_file

__all__ = ['load_txt_file', 'load_svmlight_file', 'array', 'random_array',
'apply_along_axis', 'load_npy_file', 'zeros', 'full', 'identity']
'apply_along_axis', 'load_from_hecuba', 'load_npy_file', 'zeros', 'full', 'identity']
Loading