Skip to content

Commit

Permalink
Optimize the cython implementation for finding components and generat…
Browse files Browse the repository at this point in the history
…ing the canonical ordering (#39)
  • Loading branch information
rappdw authored Mar 26, 2020
1 parent 3ac4853 commit 5a170c3
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 75 deletions.
36 changes: 17 additions & 19 deletions redblackgraph/reference/components.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import itertools as it
import numpy as np

from collections import defaultdict
from typing import Dict, Optional, Sequence
Expand All @@ -19,38 +20,35 @@ def find_components(A: Sequence[Sequence[int]], q:Optional[Dict[int, int]] = Non
# This is our algorithm:
#
# Allocate an array that will represent the component for each vertex
# Allocate a set that contains the vertices visited
# Iterate over each row not in the visited vertices:
# This is a new component, so increment the component id and assign the vertex of the current row to that id
# Allocate a set that holds vertices that will be added to this component
# In a given row, iterate over the columns not in the visited vertices (since this is a new component,
# no columns will be in the visited vertices):
# Any non-zero columns in that row will be assigned to the row component and added to the set of added vertices
#
# Allocate an array that will represent the vertices that have been visited
# Iterate over each vertex that hasn't been visited:
# This is a new component, so increment the component id
# Allocate a set to hold ids to be added to this component
# Add the current vertex to this set
# while the set is not empty
# pull a vextex from the set
# add it to the current component
# For each non-zero cell in the vertex's row and column add those vertices to the set for this component

n = len(A)
if q is None:
q = defaultdict(lambda: 0)
component_for_vertex = [0] * n
vertices = range(n)
visited_vertices = set()
component_for_vertex = np.zeros((n), dtype=np.uint32)
visited_vertices = np.zeros((n), dtype=np.bool_)
component_id = 0
for i in it.filterfalse(lambda x: x in visited_vertices, vertices):
for i in it.filterfalse(lambda x: visited_vertices[x], vertices):
vertices_added_to_component = set()
vertex_count = 0
vertices_added_to_component.add(i)
while vertices_added_to_component:
vertex = vertices_added_to_component.pop()
vertex_count += 1
visited_vertices.add(vertex)
visited_vertices[vertex] = True
component_for_vertex[vertex] = component_id
for j in it.filterfalse(lambda x: x in visited_vertices or x in vertices_added_to_component or A[vertex][x] == 0 or x == vertex, vertices):
vertices_added_to_component.add(j)
for k in it.filterfalse(lambda x: x in visited_vertices or x in vertices_added_to_component or A[x][j] == 0 or x == j, vertices):
vertices_added_to_component.add(k)
# now we need to iterate the vertex's column
for k in it.filterfalse(lambda x: x in visited_vertices or x in vertices_added_to_component or A[x][vertex] == 0 or x == vertex, vertices):
vertices_added_to_component.add(k)
for j in vertices:
if not ((A[vertex][j] == 0 and A[j][vertex] == 0) or visited_vertices[j] or j in vertices_added_to_component):
vertices_added_to_component.add(j)
q[component_id] = vertex_count
component_id += 1
return component_for_vertex
9 changes: 5 additions & 4 deletions redblackgraph/reference/ordering.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import itertools as it
import numpy as np

from collections import defaultdict
Expand All @@ -25,9 +24,11 @@ def _get_permutation(A: Sequence[Sequence[int]], q: Dict[int, int], ids: Sequenc
ancester_count_for_vertex = np.zeros((n), dtype=np.int32)
vertices = range(n)
for i in vertices:
for j in it.filterfalse(lambda x: (A[i][x] == 0 and A[x][i] == 0) or x == i, vertices):
max_rel_for_vertex[i] = max(max_rel_for_vertex[i], A[j][i])
ancester_count_for_vertex[i] += MSB(A[i][j])
for j in vertices:
if A[i][j]:
ancester_count_for_vertex[i] += MSB(A[i][j])
if A[j][i]:
max_rel_for_vertex[i] = max(max_rel_for_vertex[i], A[j][i])

basis = [i for i in range(len(ids))]
# sort descending on size of component and "ancestor count", ascending on all other elements
Expand Down
54 changes: 26 additions & 28 deletions redblackgraph/sparse/csgraph/_components.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import numpy as np
cimport numpy as np

import itertools as it

from redblackgraph.core.redblack import array as rb_array
from collections import defaultdict
from typing import Dict, Optional, Sequence

include 'parameters.pxi'
Expand All @@ -26,38 +23,39 @@ def find_components(A: rb_array, q:Optional[Dict[int, int]] = None) -> Sequence[
# This is our algorithm:
#
# Allocate an array that will represent the component for each vertex
# Allocate a set that contains the vertices visited
# Iterate over each row not in the visited vertices:
# This is a new component, so increment the component id and assign the vertex of the current row to that id
# Allocate a set that holds vertices that will be added to this component
# In a given row, iterate over the columns not in the visited vertices (since this is a new component,
# no columns will be in the visited vertices):
# Any non-zero columns in that row will be assigned to the row component and added to the set of added vertices
#
# Allocate an array that will represent the vertices that have been visited
# Iterate over each vertex that hasn't been visited:
# This is a new component, so increment the component id
# Allocate a set to hold ids to be added to this component
# Add the current vertex to this set
# while the set is not empty
# pull a vextex from the set
# add it to the current component
# For each non-zero cell in the vertex's row and column add those vertices to the set for this component

n = len(A)
if q is None:
q = defaultdict(lambda: 0)
component_for_vertex = [0] * n
cdef unsigned int n = len(A)
vertices = range(n)
visited_vertices = set()
component_id = 0
for i in it.filterfalse(lambda x: x in visited_vertices, vertices):
component_for_vertex_np = np.zeros((n), dtype=np.uint32)
cdef unsigned int[ : ] component_for_vertex = component_for_vertex_np
cdef unsigned char[ : ] visited_vertices = np.zeros((n), dtype=np.uint8)
cdef unsigned int component_id = 0
cdef unsigned int vertex_count
cdef DTYPE_t[:, :] Am = A
for i in vertices: # it.filterfalse(lambda x: visited_vertices[x], vertices):
if visited_vertices[i]:
continue
vertices_added_to_component = set()
vertex_count = 0
vertices_added_to_component.add(i)
while vertices_added_to_component:
vertex = vertices_added_to_component.pop()
vertex_count += 1
visited_vertices.add(vertex)
visited_vertices[vertex] = True
component_for_vertex[vertex] = component_id
for j in it.filterfalse(lambda x: x in visited_vertices or x in vertices_added_to_component or A[vertex][x] == 0 or x == vertex, vertices):
vertices_added_to_component.add(j)
for k in it.filterfalse(lambda x: x in visited_vertices or x in vertices_added_to_component or A[x][j] == 0 or x == j, vertices):
vertices_added_to_component.add(k)
# now we need to iterate the vertex's column
for k in it.filterfalse(lambda x: x in visited_vertices or x in vertices_added_to_component or A[x][vertex] == 0 or x == vertex, vertices):
vertices_added_to_component.add(k)
q[component_id] = vertex_count
for j in vertices:
if not ((Am[vertex][j] == 0 and Am[j][vertex] == 0) or visited_vertices[j] or j in vertices_added_to_component):
vertices_added_to_component.add(j)
if q is not None:
q[component_id] = vertex_count
component_id += 1
return component_for_vertex
return component_for_vertex_np
20 changes: 10 additions & 10 deletions redblackgraph/sparse/csgraph/_ordering.pyx
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import numpy as np
cimport numpy as np

import itertools as it

from collections import defaultdict
from typing import Dict, List, Sequence
from redblackgraph.types.ordering import Ordering
from ._components import find_components
Expand All @@ -21,14 +18,17 @@ def _get_permutation(A: Sequence[Sequence[int]], q: Dict[int, int], ids: Sequenc
# * max ancestor: ascending
# * color: descending
# * vertex_id: ascending
n = len(A)
max_rel_for_vertex = np.zeros((n), dtype=np.int32)
ancester_count_for_vertex = np.zeros((n), dtype=np.int32)
cdef unsigned int n = len(A)
cdef DTYPE_t[:] max_rel_for_vertex = np.zeros((n), dtype=np.int32)
cdef DTYPE_t[:] ancester_count_for_vertex = np.zeros((n), dtype=np.int32)
cdef DTYPE_t[:, :] Am = A
vertices = range(n)
for i in vertices:
for j in it.filterfalse(lambda x: (A[i][x] == 0 and A[x][i] == 0) or x == i, vertices):
max_rel_for_vertex[i] = max(max_rel_for_vertex[i], A[j][i])
ancester_count_for_vertex[i] += MSB(A[i][j])
for j in vertices:
if Am[i][j]:
ancester_count_for_vertex[i] += MSB(Am[i][j])
if Am[j][i]:
max_rel_for_vertex[i] = max(max_rel_for_vertex[i], Am[j][i])

basis = [i for i in range(len(ids))]
# sort descending on size of component and "ancestor count", ascending on all other elements
Expand All @@ -50,7 +50,7 @@ def avos_canonical_ordering(A: Sequence[Sequence[int]]) -> Ordering:
:return: an upper triangular matrix that is isomorphic to A
"""

q = defaultdict(lambda: 0) # dictionary keyed by component id, value is count of vertices in componenet
q = dict() # dictionary keyed by component id, value is count of vertices in component
components = find_components(A, q)
perumutation = np.array(_get_permutation(A, q, components), dtype=ITYPE)
return Ordering(permute(A, perumutation), perumutation)
5 changes: 4 additions & 1 deletion redblackgraph/sparse/csgraph/_permutation.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ cdef DTYPE_t _permute(np.ndarray[DTYPE_t, ndim=2, mode='c'] A, np.ndarray[DTYPE_
cdef unsigned int i, j, start, N = B.shape[0]
assert B.shape[1] == N
assert p.shape[0] == N
cdef DTYPE_t[:, :] Am = A
cdef DTYPE_t[:, :] Bm = B
cdef ITYPE_t[:] pm = p

for i in range(N):
if assume_upper_triangular:
Expand All @@ -26,4 +29,4 @@ cdef DTYPE_t _permute(np.ndarray[DTYPE_t, ndim=2, mode='c'] A, np.ndarray[DTYPE_
start = 0

for j in range(start, N):
B[i][j] = A[p[i]][p[j]]
Bm[i][j] = Am[pm[i]][pm[j]]
2 changes: 1 addition & 1 deletion scripts/rbgcf
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ if __name__ == '__main__':
if graph.shape[0] >= MAX_COLUMNS_EXCEL:
logger.error(f"Trying to ingest a graph that exceeds the size excel can handle. ({graph.shape[0]} vertices)")
if graph.shape[0] >= MAX_PRACTICAL_SIZE:
logger.warning(f"This graph is on the large size ({graph.shape[0]}). Processing times for graphs in excess of {MAX_PRACTICAL_SIZE} nodes are noticably slower.")
logger.warning(f"This graph is on the large size ({graph.shape[0]}). It will take a few seconds more to write the xlsx file.")

logger.info(f"Graph is created. Writing out simple format as xslx file to {outputfile}")

Expand Down
4 changes: 2 additions & 2 deletions tests/avos/test_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_find_components(transitive_closure, find_components):
assert_equal(A_star, expected_transitive_closure)

components = find_components(A_star)
assert components == [0, 0, 0, 1, 0, 0, 0, 1, 1, 0]
assert_equal(components, [0, 0, 0, 1, 0, 0, 0, 1, 1, 0])

@pytest.mark.parametrize(
"transitive_closure,find_components",
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_find_components_use_case_2(transitive_closure, find_components):
assert A_star.tolist() == expected_transitive_closure

components = find_components(A_star)
assert components == [0, 1, 0, 0, 1, 0, 1]
assert_equal(components, [0, 1, 0, 0, 1, 0, 1])

def test_find_components_dfs():
A = [[-1, 0, 0, 2, 0, 3, 0],
Expand Down
15 changes: 12 additions & 3 deletions tests/avos/test_ordering.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
import numpy as np
import redblackgraph.reference as ref
import redblackgraph.sparse.csgraph as sparse
import pytest

from numpy.testing import assert_equal

from collections import defaultdict

def find_components(A, q):
return sparse.find_components(np.array(A, dtype=np.int32), q)

def get_permutation(A, q, components):
return sparse._get_permutation(np.array(A, dtype=np.int32), q, components)

@pytest.mark.parametrize(
"transitive_closure,find_components,get_permutation",
[
(ref.transitive_closure, ref.find_components, ref.ordering._get_permutation),
(sparse.transitive_closure, sparse.find_components, sparse._get_permutation),
(sparse.transitive_closure, find_components, get_permutation),
]
)
def test_ordering(transitive_closure, find_components, get_permutation):
Expand Down Expand Up @@ -58,10 +67,10 @@ def test_ordering(transitive_closure, find_components, get_permutation):
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-1]
]
q = defaultdict(lambda: 0) # dictionary keyed by component id, value is count of vertices in componenet
q = dict() # dictionary keyed by component id, value is count of vertices in componenet
components = find_components(A, q)
#vertices = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21]
assert components == [ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0] # which component does a vertex belong to
assert_equal(components, [ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0]) # which component does a vertex belong to
assert q[0] == 19
assert q[1] == 3

Expand Down
29 changes: 22 additions & 7 deletions tests/reference/test_triangularization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,28 @@ def test_triangularize_via_topological_sort():
assert_equal(A_triangle.A, expected)

def test_triangularization():
A = [[-1, 0, 0, 2, 0, 3, 0],
[ 0,-1, 0, 0, 0, 0, 0],
[ 2, 0, 1, 0, 0, 0, 0],
[ 0, 0, 0,-1, 0, 0, 0],
[ 0, 2, 0, 0,-1, 0, 3],
[ 0, 0, 0, 0, 0, 1, 0],
[ 0, 0, 0, 0, 0, 0, 1]]
A = [
[-1, 0, 0, 2, 0, 3, 0],
[ 0,-1, 0, 0, 0, 0, 0],
[ 2, 0, 1, 0, 0, 0, 0],
[ 0, 0, 0,-1, 0, 0, 0],
[ 0, 2, 0, 0,-1, 0, 3],
[ 0, 0, 0, 0, 0, 1, 0],
[ 0, 0, 0, 0, 0, 0, 1]
]
A_star = transitive_closure(A).W

expected_A_star = [
[-1, 0, 0, 2, 0, 3, 0],
[ 0,-1, 0, 0, 0, 0, 0],
[ 2, 0, 1, 4, 0, 5, 0],
[ 0, 0, 0,-1, 0, 0, 0],
[ 0, 2, 0, 0,-1, 0, 3],
[ 0, 0, 0, 0, 0, 1, 0],
[ 0, 0, 0, 0, 0, 0, 1]
]
assert_equal(A_star, expected_A_star)

A_star_canonical = avos_canonical_ordering(A_star)
expected_canonical = [[ 1, 2, 4, 5, 0, 0, 0],
[ 0,-1, 2, 3, 0, 0, 0],
Expand All @@ -40,5 +54,6 @@ def test_triangularization():
[ 0, 0, 0, 0, 0,-1, 0],
[ 0, 0, 0, 0, 0, 0, 1]]

# print(capture(A_star_canonical.A))
assert_equal(A_star_canonical.A, expected_canonical)
assert_equal(A_star_canonical.label_permutation, [2, 0, 3, 5, 4, 1, 6])

0 comments on commit 5a170c3

Please sign in to comment.