diff --git a/doc/sphinx/algorithms-conf.json b/doc/sphinx/algorithms-conf.json new file mode 100644 index 000000000..5d9ef51ac --- /dev/null +++ b/doc/sphinx/algorithms-conf.json @@ -0,0 +1,1895 @@ +{ + "modes": { + "stream": { + "name": "stream", + "config": [ + { + "name": "nodeLabels", + "type": "List of String", + "default": ["*"], + "optional": true, + "description": "Filter the named graph using the given node labels. Nodes with any of the given labels will be included." + }, + { + "name": "relationshipTypes", + "type": "List of String", + "default": ["*"], + "optional": true, + "description": "Filter the named graph using the given relationship types. Relationships with any of the given types will be included." + }, + { + "name": "concurrency", + "type": "Integer", + "default": 4, + "optional": true, + "description": "The number of concurrent threads used for running the algorithm." + }, + { + "name": "jobId", + "type": "String", + "default": null, + "default_placeholder": "Generated internally", + "optional": true, + "description": "An ID that can be provided to more easily track the algorithm’s progress." + }, + { + "name": "logProgress", + "type": "Boolean", + "default": true, + "optional": true, + "description": "If disabled the progress percentage will not be logged." + } + ] + } + }, + "algorithms": [ + { + "name": "Article Rank", + "procedure": "gds.articleRank", + "config": [ + { + "name": "dampingFactor", + "type": "Float", + "default": "0.85", + "optional": true, + "description": "The damping factor of the Page Rank calculation. Must be in [0, 1)." + }, + { + "name": "maxIterations", + "type": "Integer", + "default": "20", + "optional": true, + "description": "The maximum number of iterations of Article Rank to run." + }, + { + "name": "tolerance", + "type": "Float", + "default": "0.0000001", + "optional": true, + "description": "Minimum change in scores between iterations. If all scores change less than the tolerance value the result is considered stable, and the algorithm returns." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "sourceNodes", + "type": "List or Node or Number", + "default": "[]", + "optional": true, + "description": "The nodes or node ids to use for computing Personalized Page Rank." + }, + { + "name": "scaler", + "type": "String or Map", + "default": "None", + "optional": true, + "description": "The name of the scaler applied for the final scores. Supported values are `None`, `MinMax`, `Max`, `Mean`, `Log`, and `StdScore`. To apply scaler-specific configuration, use the Map syntax: `{scaler: 'name', ...}`." + } + ], + "page_path": "algorithms/article-rank/" + }, + { + "name": "Articulation Points", + "procedure": "gds.articulationPoints", + "config": [], + "page_path": "algorithms/articulation-points/" + }, + { + "name": "Betweenness Centrality", + "procedure": "gds.betweenness", + "config": [ + { + "name": "samplingSize", + "type": "Integer", + "default": "node count", + "optional": true, + "description": "The number of source nodes to consider for computing centrality scores." + }, + { + "name": "samplingSeed", + "type": "Integer", + "default": "null", + "optional": true, + "description": "The seed value for the random number generator that selects start nodes." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/betweenness-centrality/" + }, + { + "name": "Bridges", + "procedure": "gds.bridges", + "config": [], + "page_path": "algorithms/bridges/" + }, + { + "name": "CELF", + "procedure": "gds.influenceMaximization.celf", + "config": [ + { + "name": "seedSetSize", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The number of nodes that maximize the expected spread in the network." + }, + { + "name": "monteCarloSimulations", + "type": "Integer", + "default": "100", + "optional": true, + "description": "The number of Monte-Carlo simulations." + }, + { + "name": "propagationProbability", + "type": "Float", + "default": "0.1", + "optional": true, + "description": "The probability of a node being activated by an active neighbour node." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "The seed value to control the randomness of the algorithm." + } + ], + "page_path": "algorithms/celf/" + }, + { + "name": "Closeness Centrality", + "procedure": "gds.closeness", + "config": [ + { + "name": "useWassermanFaust", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Use the improved Wasserman-Faust formula for closeness computation." + } + ], + "page_path": "algorithms/closeness-centrality/" + }, + { + "name": "Degree Centrality", + "procedure": "gds.degree", + "config": [ + { + "name": "orientation", + "type": "String", + "default": "NATURAL", + "optional": true, + "description": "The orientation used to compute node degrees. Supported orientations are `NATURAL`, `REVERSE` and `UNDIRECTED`." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use for weighted degree computation. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/degree-centrality/" + }, + { + "name": "Eigenvector Centrality", + "procedure": "gds.eigenvector", + "config": [ + { + "name": "maxIterations", + "type": "Integer", + "default": "20", + "optional": true, + "description": "The maximum number of iterations of Eigenvector Centrality to run." + }, + { + "name": "tolerance", + "type": "Float", + "default": "0.0000001", + "optional": true, + "description": "Minimum change in scores between iterations. If all scores change less than the tolerance value the result is considered stable and the algorithm returns." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "sourceNodes", + "type": "List or Node or Number", + "default": "[]", + "optional": true, + "description": "The nodes or node ids to use for computing Personalized Page Rank." + }, + { + "name": "scaler", + "type": "String or Map", + "default": "None", + "optional": true, + "description": "The name of the scaler applied for the final scores. Supported values are `None`, `MinMax`, `Max`, `Mean`, `Log`, and `StdScore`. To apply scaler-specific configuration, use the Map syntax: `{scaler: 'name', ...}`." + } + ], + "page_path": "algorithms/eigenvector-centrality/" + }, + { + "name": "PageRank", + "procedure": "gds.pageRank", + "config": [ + { + "name": "dampingFactor", + "type": "Float", + "default": "0.85", + "optional": true, + "description": "The damping factor of the Page Rank calculation. Must be in [0, 1)." + }, + { + "name": "maxIterations", + "type": "Integer", + "default": "20", + "optional": true, + "description": "The maximum number of iterations of Page Rank to run." + }, + { + "name": "tolerance", + "type": "Float", + "default": "0.0000001", + "optional": true, + "description": "Minimum change in scores between iterations. If all scores change less than the tolerance value the result is considered stable and the algorithm returns." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "sourceNodes", + "type": "List of Node or Number", + "default": "[]", + "optional": true, + "description": "The nodes or node ids to use for computing Personalized Page Rank." + }, + { + "name": "scaler", + "type": "String or Map", + "default": "None", + "optional": true, + "description": "The name of the scaler applied for the final scores. Supported values are `None`, `MinMax`, `Max`, `Mean`, `Log`, and `StdScore`. To apply scaler-specific configuration, use the Map syntax: `{scaler: 'name', ...}`." + } + ], + "page_path": "algorithms/page-rank/" + }, + { + "name": "Harmonic Centrality", + "procedure": "gds.closeness.harmonic", + "config": [], + "page_path": "algorithms/harmonic-centrality/" + }, + { + "name": "HITS", + "procedure": "gds.hits", + "config": [ + { + "name": "hitsIterations", + "type": "Integer", + "default": "20", + "optional": true, + "description": "The number of hits iterations to run. The number of pregel iterations will be equal to hitsIterations * 4" + }, + { + "name": "authProperty", + "type": "String", + "default": "\"auth\"", + "optional": true, + "description": "The name that is used for the auth property when using STREAM, MUTATE or WRITE modes." + }, + { + "name": "hubProperty", + "type": "String", + "default": "\"hub\"", + "optional": true, + "description": "The name that is used for the hub property when using STREAM, MUTATE or WRITE modes." + }, + { + "name": "partitioning", + "type": "String", + "default": "\"AUTO\"", + "optional": true, + "description": "The partitioning scheme used to divide the work between threads. Available options are AUTO, RANGE, DEGREE." + } + ], + "page_path": "algorithms/hits/" + }, + { + "name": "Conductance metric", + "procedure": "gds.conductance", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "communityProperty", + "type": "String", + "default": "n/a", + "optional": false, + "description": "The node property that holds the community ID as an integer for each node. Note that only non-negative community IDs are considered valid and will have their conductance computed." + } + ], + "page_path": "algorithms/conductance/" + }, + { + "name": "K-Core Decomposition", + "procedure": "gds.kcore", + "config": [], + "page_path": "algorithms/k-core/" + }, + { + "name": "K-1 Coloring", + "procedure": "gds.k1coloring", + "config": [ + { + "name": "maxIterations", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of iterations of K1 Coloring to run." + }, + { + "name": "minCommunitySize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/k1coloring/" + }, + { + "name": "K-Means Clustering", + "procedure": "gds.kmeans", + "config": [ + { + "name": "nodeProperty", + "type": "String", + "default": "n/a", + "optional": false, + "description": "A node property corresponding to an array of floats used by K-Means to cluster nodes into communities." + }, + { + "name": "k", + "type": "Integer", + "default": "10", + "optional": true, + "description": "Number of desired clusters." + }, + { + "name": "maxIterations", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of iterations of K-Means to run." + }, + { + "name": "deltaThreshold", + "type": "Float", + "default": "0.05", + "optional": true, + "description": "Value as a percentage to determine when to stop early. If fewer than `deltaThreshold * |nodes|` nodes change their cluster , the algorithm stops. Value must be between 0 (exclusive) and 1 (inclusive)." + }, + { + "name": "numberOfRestarts", + "type": "Integer", + "default": "1", + "optional": true, + "description": "Number of times to execute K-Means with different initial centers. The communities returned are those minimizing the average node-center distances." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "The seed value to control the initial centroid assignment." + }, + { + "name": "initialSampler", + "type": "String", + "default": "\"uniform\"", + "optional": true, + "description": "The method used to sample the first k centroids. \"uniform\" and \"kmeans++\", both case-insensitive, are valid inputs." + }, + { + "name": "seedCentroids", + "type": "List of List of Float", + "default": "[]", + "optional": true, + "description": "Parameter to explicitly give the initial centroids. It cannot be enabled together with a non-default value of the numberOfRestarts parameter." + }, + { + "name": "computeSilhouette", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "If set to true, the silhouette scores are computed once the clustering has been determined. Silhouette is a metric on how well the nodes have been clustered." + } + ], + "page_path": "algorithms/kmeans/" + }, + { + "name": "Label Propagation", + "procedure": "gds.labelPropagation", + "config": [ + { + "name": "maxIterations", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of iterations to run." + }, + { + "name": "nodeWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "The name of a node property that contains node weights." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "seedProperty", + "type": "String", + "default": "n/a", + "optional": true, + "description": "The name of a node property that defines an initial numeric label." + }, + { + "name": "consecutiveIds", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Flag to decide whether component identifiers are mapped into a consecutive id space (requires additional memory)." + }, + { + "name": "minCommunitySize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/label-propagation/" + }, + { + "name": "Leiden", + "procedure": "gds.leiden", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "maxLevels", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of levels in which the graph is clustered and then condensed." + }, + { + "name": "gamma", + "type": "Float", + "default": "1.0", + "optional": true, + "description": "Resolution parameter used when computing the modularity. Internally the value is divided by the number of relationships for an unweighted graph, or the sum of weights of all relationships otherwise. [1]" + }, + { + "name": "theta", + "type": "Float", + "default": "0.01", + "optional": true, + "description": "Controls the randomness while breaking a community into smaller ones." + }, + { + "name": "tolerance", + "type": "Float", + "default": "0.0001", + "optional": true, + "description": "Minimum change in modularity between iterations. If the modularity changes less than the tolerance value, the result is considered stable and the algorithm returns." + }, + { + "name": "includeIntermediateCommunities", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Indicates whether to write intermediate communities. If set to false, only the final community is persisted." + }, + { + "name": "seedProperty", + "type": "String", + "default": "n/a", + "optional": true, + "description": "Used to set the initial community for a node. The property value needs to be a non-negative number." + }, + { + "name": "minCommunitySize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/leiden/" + }, + { + "name": "Local Clustering Coefficient", + "procedure": "gds.localClusteringCoefficient", + "config": [ + { + "name": "triangleCountProperty", + "type": "String", + "default": "n/a", + "optional": true, + "description": "Node property that contains pre-computed triangle count." + } + ], + "page_path": "algorithms/local-clustering-coefficient/" + }, + { + "name": "Louvain", + "procedure": "gds.louvain", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "seedProperty", + "type": "String", + "default": "n/a", + "optional": true, + "description": "Used to set the initial community for a node. The property value needs to be a non-negative number." + }, + { + "name": "maxLevels", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of levels in which the graph is clustered and then condensed." + }, + { + "name": "maxIterations", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of iterations that the modularity optimization will run for each level." + }, + { + "name": "tolerance", + "type": "Float", + "default": "0.0001", + "optional": true, + "description": "Minimum change in modularity between iterations. If the modularity changes less than the tolerance value, the result is considered stable and the algorithm returns." + }, + { + "name": "includeIntermediateCommunities", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Indicates whether to write intermediate communities. If set to false, only the final community is persisted." + }, + { + "name": "consecutiveIds", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Flag to decide whether component identifiers are mapped into a consecutive id space (requires additional memory). Cannot be used in combination with the includeIntermediateCommunities flag." + }, + { + "name": "minCommunitySize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/louvain/" + }, + { + "name": "Modularity metric", + "procedure": "gds.modularity", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "communityProperty", + "type": "String", + "default": "n/a", + "optional": false, + "description": "The node property that holds the community ID as an integer for each node. Note that only non-negative community IDs are considered valid and will have their modularity score computed." + } + ], + "page_path": "algorithms/modularity/" + }, + { + "name": "Modularity Optimization", + "procedure": "gds.modularityOptimization", + "config": [ + { + "name": "maxIterations", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The maximum number of iterations to run." + }, + { + "name": "tolerance", + "type": "Float", + "default": "0.0001", + "optional": true, + "description": "Minimum change in modularity between iterations. If the modularity changes less than the tolerance value, the result is considered stable and the algorithm returns." + }, + { + "name": "seedProperty", + "type": "String", + "default": "n/a", + "optional": true, + "description": "Used to define initial set of labels (must be a non-negative number)." + }, + { + "name": "consecutiveIds", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Flag to decide whether component identifiers are mapped into a consecutive id space (requires additional memory)." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "minCommunitySize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/modularity-optimization/" + }, + { + "name": "Strongly Connected Components", + "procedure": "gds.scc", + "config": [ + { + "name": "consecutiveIds", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Flag to decide whether component identifiers are mapped into a consecutive id space (requires additional memory)." + } + ], + "page_path": "algorithms/strongly-connected-components/" + }, + { + "name": "Triangle Count", + "procedure": "gds.triangleCount", + "config": [ + { + "name": "maxDegree", + "type": "Integer", + "default": "2^63^ - 1", + "optional": true, + "description": "If a node has a degree higher than this it will not be considered by the algorithm. The triangle count for these nodes will be `-1`." + } + ], + "page_path": "algorithms/triangle-count/" + }, + { + "name": "Weakly Connected Components", + "procedure": "gds.wcc", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "seedProperty", + "type": "String", + "default": "n/a", + "optional": true, + "description": "Used to set the initial component for a node. The property value needs to be a number." + }, + { + "name": "threshold", + "type": "Float", + "default": "null", + "optional": true, + "description": "The value of the weight above which the relationship is considered in the computation." + }, + { + "name": "consecutiveIds", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Flag to decide whether component identifiers are mapped into a consecutive id space (requires additional memory)." + }, + { + "name": "minComponentSize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/wcc/" + }, + { + "name": "Approximate Maximum k-cut", + "procedure": "gds.maxkcut", + "config": [ + { + "name": "k", + "type": "Integer", + "default": "2", + "optional": true, + "description": "The number of disjoint communities the nodes will be divided into." + }, + { + "name": "iterations", + "type": "Integer", + "default": "8", + "optional": true, + "description": "The number of iterations the algorithm will run before returning the best solution among all the iterations." + }, + { + "name": "vnsMaxNeighborhoodOrder", + "type": "Integer", + "default": "0 (VNS off)", + "optional": true, + "description": "The maximum number of nodes VNS will swap when perturbing solutions." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "A random seed which is used for all randomness in the computation. Requires concurrency = 1." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "If set, the values stored at the given property are used as relationship weights during the computation. If not set, the graph is considered unweighted." + }, + { + "name": "minCommunitySize", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Only nodes inside communities larger or equal the given value are returned." + } + ], + "page_path": "algorithms/approx-max-k-cut/" + }, + { + "name": "Speaker-Listener Label Propagation", + "procedure": "gds.sllpa", + "config": [ + { + "name": "maxIterations", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "Maximum number of iterations to run." + }, + { + "name": "minAssociationStrength", + "type": "String", + "default": "0.2", + "optional": true, + "description": "Minimum influence required for a community to retain a node." + }, + { + "name": "partitioning", + "type": "String", + "default": "\"RANGE\"", + "optional": true, + "description": "The partitioning scheme used to divide the work between threads. Available options are AUTO, RANGE, DEGREE." + } + ], + "page_path": "algorithms/sllpa/" + }, + { + "name": "Node Similarity", + "procedure": "gds.nodeSimilarity", + "config": [ + { + "name": "similarityCutoff", + "type": "Float", + "default": "1e-42", + "optional": true, + "description": "Lower limit for the similarity score to be present in the result.\nValues must be between 0 and 1." + }, + { + "name": "degreeCutoff", + "type": "Integer", + "default": "1", + "optional": true, + "description": "Inclusive lower bound on the node degree for a node to be considered in the comparisons.\nThis value can not be lower than 1." + }, + { + "name": "upperDegreeCutoff", + "type": "Integer", + "default": "2147483647", + "optional": true, + "description": "Inclusive upper bound on the node degree for a node to be considered in the comparisons.\nThis value can not be lower than 1." + }, + { + "name": "topK", + "type": "Integer", + "default": "10", + "optional": true, + "description": "Limit on the number of scores per node.\nThe K largest results are returned.\nThis value cannot be lower than 1." + }, + { + "name": "bottomK", + "type": "Integer", + "default": "10", + "optional": true, + "description": "Limit on the number of scores per node.\nThe K smallest results are returned.\nThis value cannot be lower than 1." + }, + { + "name": "topN", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Global limit on the number of scores computed.\nThe N largest total results are returned.\nThis value cannot be negative, a value of 0 means no global limit." + }, + { + "name": "bottomN", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Global limit on the number of scores computed.\nThe N smallest total results are returned.\nThis value cannot be negative, a value of 0 means no global limit." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights.\nIf unspecified, the algorithm runs unweighted." + }, + { + "name": "similarityMetric", + "type": "String", + "default": "JACCARD", + "optional": true, + "description": "The metric used to compute similarity.\nCan be either `JACCARD`, `OVERLAP` or `COSINE`." + }, + { + "name": " useComponents", + "type": "Boolean or String", + "default": "false", + "optional": true, + "description": "If enabled, Node Similarity will use components to improve the performance of the computation, skipping comparisons of nodes in different components.\nSet to `false` (Default): the algorithm does not use components, but computes similarity across the entire graph.\nSet to `true`: the algorithm uses components, and will compute these components before computing similarity.\nSet to *String*: use pre-computed components stored in graph, *String* is the key for a node property representing components." + } + ], + "page_path": "algorithms/node-similarity/" + }, + { + "name": "Filtered Node Similarity", + "procedure": "gds.nodeSimilarity.filtered", + "config": [ + { + "name": "similarityCutoff", + "type": "Float", + "default": "1e-42", + "optional": true, + "description": "Lower limit for the similarity score to be present in the result.\nValues must be between 0 and 1." + }, + { + "name": "degreeCutoff", + "type": "Integer", + "default": "1", + "optional": true, + "description": "Inclusive lower bound on the node degree for a node to be considered in the comparisons.\nThis value can not be lower than 1." + }, + { + "name": "upperDegreeCutoff", + "type": "Integer", + "default": "2147483647", + "optional": true, + "description": "Inclusive upper bound on the node degree for a node to be considered in the comparisons.\nThis value can not be lower than 1." + }, + { + "name": "topK", + "type": "Integer", + "default": "10", + "optional": true, + "description": "Limit on the number of scores per node.\nThe K largest results are returned.\nThis value cannot be lower than 1." + }, + { + "name": "bottomK", + "type": "Integer", + "default": "10", + "optional": true, + "description": "Limit on the number of scores per node.\nThe K smallest results are returned.\nThis value cannot be lower than 1." + }, + { + "name": "topN", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Global limit on the number of scores computed.\nThe N largest total results are returned.\nThis value cannot be negative, a value of 0 means no global limit." + }, + { + "name": "bottomN", + "type": "Integer", + "default": "0", + "optional": true, + "description": "Global limit on the number of scores computed.\nThe N smallest total results are returned.\nThis value cannot be negative, a value of 0 means no global limit." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights.\nIf unspecified, the algorithm runs unweighted." + }, + { + "name": "similarityMetric", + "type": "String", + "default": "JACCARD", + "optional": true, + "description": "The metric used to compute similarity.\nCan be either JACCARD, OVERLAP or COSINE." + }, + { + "name": " useComponents", + "type": "Boolean or String", + "default": "false", + "optional": true, + "description": "If enabled, Node Similarity will use components to improve the performance of the computation, skipping comparisons of nodes in different components.\nSet to false (Default): the algorithm does not use components, but computes similarity across the entire graph.\nSet to true: the algorithm uses components, and will compute these components before computing similarity.\nSet to String: use pre-computed components stored in the graph, with String as the key for a node property representing components" + }, + { + "name": "sourceNodeFilter", + "type": "Integer or List of Integer or String", + "default": "n/a", + "optional": false, + "description": "The source node filter to apply.\nAccepts a single node id,\na List of node ids,\nor a single label." + }, + { + "name": "targetNodeFilter", + "type": "Integer or List of Integer or String", + "default": "n/a", + "optional": false, + "description": "The target node filter to apply.\nAccepts a single node id,\na List of node ids,\nor a single label." + } + ], + "page_path": "algorithms/filtered-node-similarity/" + }, + { + "name": "K-Nearest Neighbors", + "procedure": "gds.knn", + "config": [ + { + "name": "nodeProperties", + "type": "String or Map or List of Strings / Maps", + "default": "n/a", + "optional": false, + "description": "The node properties to use for similarity computation along with their selected similarity metrics.\nAccepts a single property key,\na Map of property keys to metrics,\nor a List of property keys and/or Maps, as above.\nSee Node properties and metrics configuration for details." + }, + { + "name": "topK", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The number of neighbors to find for each node.\nThe K-nearest neighbors are returned.\nThis value cannot be lower than 1." + }, + { + "name": "sampleRate", + "type": "Float", + "default": "0.5", + "optional": true, + "description": "Sample rate to limit the number of comparisons per node.\nValue must be between 0 (exclusive) and 1 (inclusive)." + }, + { + "name": "deltaThreshold", + "type": "Float", + "default": "0.001", + "optional": true, + "description": "Value as a percentage to determine when to stop early.\nIf fewer updates than the configured value happen, the algorithm stops.\nValue must be between 0 (exclusive) and 1 (inclusive)." + }, + { + "name": "maxIterations", + "type": "Integer", + "default": "100", + "optional": true, + "description": "Hard limit to stop the algorithm after that many iterations." + }, + { + "name": "randomJoins", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The number of random attempts per node to connect new node neighbors based on random selection, for each iteration." + }, + { + "name": "initialSampler", + "type": "String", + "default": "\"uniform\"", + "optional": true, + "description": "The method used to sample the first k random neighbors for each node. \"uniform\" and \"randomWalk\", both case-insensitive, are valid inputs." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "The seed value to control the randomness of the algorithm.\nNote that concurrency must be set to 1 when setting this parameter." + }, + { + "name": "similarityCutoff", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "Filter out from the list of K-nearest neighbors nodes with similarity below this threshold." + }, + { + "name": "perturbationRate", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "The probability of replacing the least similar known neighbor with an encountered neighbor of equal similarity." + } + ], + "page_path": "algorithms/knn/" + }, + { + "name": "Filtered K-Nearest Neighbors", + "procedure": "gds.knn.filtered", + "config": [ + { + "name": "nodeProperties", + "type": "String or Map or List of Strings / Maps", + "default": "n/a", + "optional": false, + "description": "The node properties to use for similarity computation along with their selected similarity metrics.\nAccepts a single property key,\na Map of property keys to metrics,\nor a List of property keys and/or Maps, as above.\nSee Node properties and metrics configuration for details." + }, + { + "name": "topK", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The number of neighbors to find for each node.\nThe K-nearest neighbors are returned.\nThis value cannot be lower than 1." + }, + { + "name": "sampleRate", + "type": "Float", + "default": "0.5", + "optional": true, + "description": "Sample rate to limit the number of comparisons per node.\nValue must be between 0 (exclusive) and 1 (inclusive)." + }, + { + "name": "deltaThreshold", + "type": "Float", + "default": "0.001", + "optional": true, + "description": "Value as a percentage to determine when to stop early.\nIf fewer updates than the configured value happen, the algorithm stops.\nValue must be between 0 (exclusive) and 1 (inclusive)." + }, + { + "name": "maxIterations", + "type": "Integer", + "default": "100", + "optional": true, + "description": "Hard limit to stop the algorithm after that many iterations." + }, + { + "name": "randomJoins", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The number of random attempts per node to connect new node neighbors based on random selection, for each iteration." + }, + { + "name": "initialSampler", + "type": "String", + "default": "\"uniform\"", + "optional": true, + "description": "The method used to sample the first k random neighbors for each node. \"uniform\" and \"randomWalk\", both case-insensitive, are valid inputs." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "The seed value to control the randomness of the algorithm.\nNote that concurrency must be set to 1 when setting this parameter." + }, + { + "name": "similarityCutoff", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "Filter out from the list of K-nearest neighbors nodes with similarity below this threshold." + }, + { + "name": "perturbationRate", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "The probability of replacing the least similar known neighbor with an encountered neighbor of equal similarity." + }, + { + "name": "sourceNodeFilter", + "type": "Integer or List of Integer or String", + "default": "n/a", + "optional": false, + "description": "The source node filter to apply.\nAccepts a single node id,\na List of node ids,\nor a single label." + }, + { + "name": "targetNodeFilter", + "type": "Integer or List of Integer or String", + "default": "n/a", + "optional": false, + "description": "The target node filter to apply.\nAccepts a single node id,\na List of node ids,\nor a single label." + }, + { + "name": "seedTargetNodes", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Enable seeding of target nodes." + } + ], + "page_path": "algorithms/filtered-knn/" + }, + { + "name": "Delta-Stepping Single-Source Shortest Path", + "procedure": "gds.allShortestPaths.delta", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j source node or node id." + }, + { + "name": "delta", + "type": "Float", + "default": "2.0", + "optional": true, + "description": "The bucket width for grouping nodes with the same tentative distance to the source node." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/delta-single-source/" + }, + { + "name": "Dijkstra Source-Target Shortest Path", + "procedure": "gds.shortestPath.dijkstra", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j source node or node id." + }, + { + "name": "targetNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j target node or node id." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/dijkstra-source-target/" + }, + { + "name": "Dijkstra Single-Source Shortest Path", + "procedure": "gds.allShortestPaths.dijkstra", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j source node or node id." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/dijkstra-single-source/" + }, + { + "name": "A* Shortest Path", + "procedure": "gds.shortestPath.astar", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j source node or node id." + }, + { + "name": "targetNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j target node or node id." + }, + { + "name": "latitudeProperty", + "type": "Float", + "default": "n/a", + "optional": false, + "description": "The node property that stores the latitude value." + }, + { + "name": "longitudeProperty", + "type": "Float", + "default": "n/a", + "optional": false, + "description": "The node property that stores the longitude value." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/astar/" + }, + { + "name": "Yen's Shortest Path algorithm", + "procedure": "gds.shortestPath.yens", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j source node or node id." + }, + { + "name": "targetNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j target node or node id." + }, + { + "name": "k", + "type": "Integer", + "default": "1", + "optional": true, + "description": "The number of shortest paths to compute between source and target node." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/yens/" + }, + { + "name": "Minimum Weight Spanning Tree", + "procedure": "gds.spanningTree", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "null", + "optional": false, + "description": "The starting source node ID." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "objective", + "type": "String", + "default": "'minimum'", + "optional": true, + "description": "If specified, the parameter dictates whether to find the minimum or the maximum weight spanning tree. By default, a minimum weight spanning tree is returned. Permitted values are 'minimum' and 'maximum'." + } + ], + "page_path": "algorithms/minimum-weight-spanning-tree/" + }, + { + "name": "Minimum Directed Steiner Tree", + "procedure": "gds.steinerTree", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "null", + "optional": false, + "description": "The starting source node ID." + }, + { + "name": "targetNodes", + "type": "List of Integer", + "default": "null", + "optional": false, + "description": "The list of target nodes" + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + }, + { + "name": "delta", + "type": "Float", + "default": "2.0", + "optional": true, + "description": "The bucket width for grouping nodes with the same tentative distance to the source node. Look into the Delta-Stepping documentation for more information." + }, + { + "name": "applyRerouting", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "If specified, the algorithm will try to improve the outcome via an additional post-processing heuristic." + } + ], + "page_path": "algorithms/directed-steiner-tree/" + }, + { + "name": "Random Walk", + "procedure": "gds.randomWalk", + "config": [ + { + "name": "sourceNodes", + "type": "List of Integer", + "default": "List of all nodes", + "optional": true, + "description": "The list of nodes from which to do a random walk." + }, + { + "name": "walkLength", + "type": "Integer", + "default": "80", + "optional": true, + "description": "The number of steps in a single random walk." + }, + { + "name": "walksPerNode", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The number of random walks generated for each node." + }, + { + "name": "inOutFactor", + "type": "Float", + "default": "1.0", + "optional": true, + "description": "Tendency of the random walk to stay close to the start node or fan out in the graph. Higher value means stay local." + }, + { + "name": "returnFactor", + "type": "Float", + "default": "1.0", + "optional": true, + "description": "Tendency of the random walk to return to the last visited node. A value below 1.0 means a higher tendency." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights to influence the probabilities of the random walks. The weights need to be >= 0. If unspecified, the algorithm runs unweighted." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "random", + "optional": true, + "description": "Seed value for the random number generator used to generate the random walks." + }, + { + "name": "walkBufferSize", + "type": "Integer", + "default": "1000", + "optional": true, + "description": "The number of random walks to complete before starting training." + } + ], + "page_path": "algorithms/random-walk/" + }, + { + "name": "Breadth First Search", + "procedure": "gds.bfs", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The node id of the node where to start the traversal." + }, + { + "name": "targetNodes", + "type": "List of Integer", + "default": "empty list", + "optional": true, + "description": "Ids for target nodes. Traversal terminates when any target node is visited." + }, + { + "name": "maxDepth", + "type": "Integer", + "default": "-1", + "optional": true, + "description": "The maximum distance from the source node at which nodes are visited." + } + ], + "page_path": "algorithms/bfs/" + }, + { + "name": "Depth First Search", + "procedure": "gds.dfs", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The node id of the node where to start the traversal." + }, + { + "name": "targetNodes", + "type": "List of Integer", + "default": "empty list", + "optional": true, + "description": "Ids for target nodes. Traversal terminates when any target node is visited." + }, + { + "name": "maxDepth", + "type": "Integer", + "default": "-1", + "optional": true, + "description": "The maximum distance from the source node at which nodes are visited." + } + ], + "page_path": "algorithms/dfs/" + }, + { + "name": "Bellman-Ford Single-Source Shortest Path", + "procedure": "gds.bellmanFord", + "config": [ + { + "name": "sourceNode", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The Neo4j source node or node id." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/bellman-ford-single-source/" + }, + { + "name": "Longest Path for DAG", + "procedure": "gds.dag.longestPath", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/dag/longest-path/" + }, + { + "name": "All Pairs Shortest Path", + "procedure": "gds.allShortestPaths", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/all-pairs-shortest-path/" + }, + { + "name": "Topological Sort", + "procedure": "gds.dag.topologicalSort", + "config": [ + { + "name": "computeMaxDistanceFromSource", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Whether to enable computation of the maximal distance from source" + } + ], + "page_path": "algorithms/dag/topological-sort/" + }, + { + "name": "Longest Path for DAG", + "procedure": "gds.dag.longestPath", + "config": [ + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted." + } + ], + "page_path": "algorithms/dag/longest-path/" + }, + { + "name": "Fast Random Projection", + "procedure": "gds.fastRP", + "config": [ + { + "name": "propertyRatio", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "The desired ratio of the property embedding dimension to the total `embeddingDimension`. A positive value requires `featureProperties` to be non-empty." + }, + { + "name": "featureProperties", + "type": "List of String", + "default": "[]", + "optional": true, + "description": "The names of the node properties that should be used as input features. All property names must exist in the projected graph and be of type Float or List of Float." + }, + { + "name": "embeddingDimension", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The dimension of the computed node embeddings. Minimum value is 1." + }, + { + "name": "iterationWeights", + "type": "List of Float", + "default": "[0.0, 1.0, 1.0]", + "optional": true, + "description": "Contains a weight for each iteration. The weight controls how much the intermediate embedding from the iteration contributes to the final embedding." + }, + { + "name": "nodeSelfInfluence", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "Controls for each node how much its initial random vector contributes to its final embedding." + }, + { + "name": "normalizationStrength", + "type": "Float", + "default": "0.0", + "optional": true, + "description": "The initial random vector for each node is scaled by its degree to the power of `normalizationStrength`." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "A random seed which is used for all randomness in computing the embeddings." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use for weighted random projection. If unspecified, the algorithm runs unweighted." + } + ], + "config_notes": [ + "The number of iterations is equal to the length of `iterationWeights`.", + "It is required that `iterationWeights` is non-empty or `nodeSelfInfluence` is non-zero." + ], + "page_path": "machine-learning/node-embeddings/fastrp/" + }, + { + "name": "GraphSAGE", + "procedure": "gds.beta.graphSage", + "config": [ + { + "name": "batchSize", + "type": "Integer", + "default": "100", + "optional": true, + "description": "The number of nodes per batch." + } + ], + "page_path": "machine-learning/node-embeddings/graph-sage/" + }, + { + "name": "Node2Vec", + "procedure": "gds.node2vec", + "config": [ + { + "name": "walkLength", + "type": "Integer", + "default": "80", + "optional": true, + "description": "The number of steps in a single random walk." + }, + { + "name": "walksPerNode", + "type": "Integer", + "default": "10", + "optional": true, + "description": "The number of random walks generated for each node." + }, + { + "name": "inOutFactor", + "type": "Float", + "default": "1.0", + "optional": true, + "description": "Tendency of the random walk to stay close to the start node or fan out in the graph. Higher value means stay local." + }, + { + "name": "returnFactor", + "type": "Float", + "default": "1.0", + "optional": true, + "description": "Tendency of the random walk to return to the last visited node. A value below 1.0 means a higher tendency." + }, + { + "name": "relationshipWeightProperty", + "type": "String", + "default": "null", + "optional": true, + "description": "Name of the relationship property to use as weights to influence the probabilities of the random walks. The weights need to be >= 0. If unspecified, the algorithm runs unweighted." + }, + { + "name": "windowSize", + "type": "Integer", + "default": "10", + "optional": true, + "description": "Size of the context window when training the neural network." + }, + { + "name": "negativeSamplingRate", + "type": "Integer", + "default": "5", + "optional": true, + "description": "Number of negative samples to produce for each positive sample." + }, + { + "name": "positiveSamplingFactor", + "type": "Float", + "default": "0.001", + "optional": true, + "description": "Factor for influencing the distribution for positive samples. A higher value increases the probability that frequent nodes are down-sampled." + }, + { + "name": "negativeSamplingExponent", + "type": "Float", + "default": "0.75", + "optional": true, + "description": "Exponent applied to the node frequency to obtain the negative sampling distribution. A value of 1.0 samples proportionally to the frequency. A value of 0.0 samples each node equally." + }, + { + "name": "embeddingDimension", + "type": "Integer", + "default": "128", + "optional": true, + "description": "Size of the computed node embeddings." + }, + { + "name": "embeddingInitializer", + "type": "String", + "default": "NORMALIZED", + "optional": true, + "description": "Method to initialize embeddings. Values are sampled uniformly from a range `[-a, a]`. With `NORMALIZED`, `a=0.5/embeddingDimension` and with `UNIFORM` instead `a=1`." + }, + { + "name": "iterations", + "type": "Integer", + "default": "1", + "optional": true, + "description": "Number of training iterations." + }, + { + "name": "initialLearningRate", + "type": "Float", + "default": "0.01", + "optional": true, + "description": "Learning rate used initially for training the neural network. The learning rate decreases after each training iteration." + }, + { + "name": "minLearningRate", + "type": "Float", + "default": "0.0001", + "optional": true, + "description": "Lower bound for learning rate as it is decreased during training." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "random", + "optional": true, + "description": "Seed value used to generate the random walks, which are used as the training set of the neural network. Note, that the generated embeddings are still nondeterministic." + }, + { + "name": "walkBufferSize", + "type": "Integer", + "default": "1000", + "optional": true, + "description": "The number of random walks to complete before starting training." + } + ], + "page_path": "machine-learning/node-embeddings/node2vec/" + }, + { + "name": "HashGNN", + "procedure": "gds.hashgnn", + "config": [ + { + "name": "featureProperties", + "type": "List of String", + "default": "[]", + "optional": true, + "description": "The names of the node properties that should be used as input features. All property names must exist in the projected graph and be of type Float or List of Float." + }, + { + "name": "iterations", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The number of iterations to run HashGNN. Must be at least 1." + }, + { + "name": "embeddingDensity", + "type": "Integer", + "default": "n/a", + "optional": false, + "description": "The number of features to sample per node in each iteration. Called `K` in the original paper. Must be at least 1." + }, + { + "name": "heterogeneous", + "type": "Boolean", + "default": "false", + "optional": true, + "description": "Whether different relationship types should be treated differently." + }, + { + "name": "neighborInfluence", + "type": "Float", + "default": "1.0", + "optional": true, + "description": "Controls how often neighbors' features are sampled in each iteration relative to sampling the node's own features. Must be non-negative." + }, + { + "name": "binarizeFeatures", + "type": "Map", + "default": "n/a", + "optional": true, + "description": "A map with keys `dimension` and `threshold`. If given, features are transformed into `dimension` binary features via hyperplane rounding. Increasing `threshold` makes the output more sparse, and it defaults to `0`. The value of `dimension` must be at least 1." + }, + { + "name": "generateFeatures", + "type": "Map", + "default": "n/a", + "optional": true, + "description": "A map with keys `dimension` and `densityLevel`. Should be given if and only if `featureProperties` is empty. If given, `dimension` binary features are generated with approximately `densityLevel` active features per node. Both must be at least 1 and `densityLevel` at most `dimension`." + }, + { + "name": "outputDimension", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "If given, the embeddings are projected randomly into `outputDimension` dense features. Must be at least 1." + }, + { + "name": "randomSeed", + "type": "Integer", + "default": "n/a", + "optional": true, + "description": "A random seed which is used for all randomness in computing the embeddings." + } + ], + "page_path": "machine-learning/node-embeddings/hashgnn/" + } + ] +} \ No newline at end of file diff --git a/doc/sphinx/create_algorithms_rst.py b/doc/sphinx/create_algorithms_rst.py index 1dc8c9b10..a3c5bd4cf 100644 --- a/doc/sphinx/create_algorithms_rst.py +++ b/doc/sphinx/create_algorithms_rst.py @@ -1,23 +1,128 @@ import json from textwrap import dedent -with open("algorithms.json") as f, open("source/algorithms.rst", "w") as fw: - functions = json.load(f) +INCLUDED_ALGORITHMS = { + "Article Rank", + "Articulation Points", + "Betweenness Centrality", + "Bridges", + # "CELF", + # "Closeness Centrality", + "Degree Centrality", + "Eigenvector Centrality", + "PageRank", + # "Harmonic Centrality", + # "HITS", + "Conductance metric", + # "K-Core Decomposition", + # "K-1 Coloring", + # "K-Means Clustering", + # "Label Propagation", + # "Leiden", + "Local Clustering Coefficient", + # "Louvain", + "Modularity metric", + # "Modularity Optimization", + # "Strongly Connected Components", + "Triangle Count", + # "Weakly Connected Components", + # "Approximate Maximum k-cut", + # "Speaker-Listener Label Propagation", + "Node Similarity", + # "Filtered Node Similarity", + # "K-Nearest Neighbors", + # "Filtered K-Nearest Neighbors", + "Delta-Stepping Single-Source Shortest Path", + # "Dijkstra Source-Target Shortest Path", + # "Dijkstra Single-Source Shortest Path", + # "A* Shortest Path", + # "Yen's Shortest Path algorithm", + # "Minimum Weight Spanning Tree", + # "Minimum Directed Steiner Tree", + # "Random Walk", + "Breadth First Search", + "Depth First Search", + # "Bellman-Ford Single-Source Shortest Path", + # "Longest Path for DAG", + # "All Pairs Shortest Path", + # "Topological Sort", + # "Longest Path for DAG", + "Fast Random Projection", + "GraphSAGE", + "Node2Vec", + "HashGNN", +} - fw.write( - dedent( - """\ - .. - DO NOT EDIT - File generated automatically - Algorithms procedures - ---------------------- - Listing of all algorithm procedures in the Neo4j Graph Data Science Python Client API. - These all assume that an object of :class:`.GraphDataScience` is available as `gds`. +def write_param(param, optional): + name, description, default = conf["name"], conf["description"], conf["default"] + default_placeholder = f' ({conf["default_placeholder"]})' if "default_placeholder" in conf else "" + description = description.replace("\n", "\n ") - """ - ) - ) + if optional: + return f" * **{name}** - *(Optional)* {description} *Default*: {default}{default_placeholder}." + else: + return f" * **{name}** - {description}" + + +def get_required_conf(config): + return [conf for conf in config if not conf["optional"]] + + +def get_optional_conf(config): + return [conf for conf in config if conf["optional"]] + + +def enrich_signature(sig, required, optional): + conf_string = [] + + for conf in required: + conf_string.append(conf["name"]) + + for conf in optional: + conf_name, conf_type, conf_default = conf["name"], conf["type"], conf["default"] + # if conf_type == "Float": + # try: + # conf_default = float(conf_default) + # except: + # print(f"{conf_default} not a float") + # elif conf_type == "Integer": + # try: + # conf_default = int(conf_default) + # except: + # print(f"{conf_default} not an int") + + if conf_default == "null": + conf_default = None + + conf_string.append(f"{conf_name}={conf_default}") + + if conf_string: + return sig.replace("**config: Any", f"*, {', '.join(conf_string)}") + else: + return sig + + +with open("algorithms-conf.json") as f: + j = json.load(f) + modes = j["modes"] + algorithms = {algo["procedure"]: algo for algo in j["algorithms"] if algo["name"] in INCLUDED_ALGORITHMS} + +PREAMBLE = """\ + .. + DO NOT EDIT - File generated automatically + + Algorithms procedures + ---------------------- + Listing of all algorithm procedures in the Neo4j Graph Data Science Python Client API. + These all assume that an object of :class:`.GraphDataScience` is available as `gds`. + +""" + +with open("algorithms.json") as f, open("source/algorithms.rst", "w") as fw: + functions = json.load(f) + + fw.write(dedent(PREAMBLE)) for function in functions: name, sig, ret_type = ( @@ -25,7 +130,20 @@ function["function"]["signature"], function["function"]["return_type"], ) - fw.write(f".. py:function:: {name}({sig}) -> {ret_type}\n\n") + + # Example: gds.triangleCount.stream -> (gds.triangleCount, stream) + proc_name, proc_mode = name.rsplit(".", maxsplit=1) + required = [] + optional = [] + + if proc_name in algorithms and proc_mode == "stream": + mode_config = modes[proc_mode]["config"] + config = algorithms[proc_name]["config"] + + required = get_required_conf(mode_config) + get_required_conf(config) + optional = get_optional_conf(mode_config) + get_optional_conf(config) + + fw.write(f".. py:function:: {name}({enrich_signature(sig, required, optional)}) -> {ret_type}\n\n") if "description" in function: description = function["description"].strip() @@ -38,3 +156,23 @@ version, message = function["deprecated"]["version"], function["deprecated"]["message"] fw.write(f".. deprecated:: {version}\n") fw.write(f" {message}\n\n") + + if required or optional: + fw.write(" |\n\n") + fw.write(" **Parameters:**\n\n") + + for param in sig.split(","): + param_name, param_type = param.split(":") + param_name = param_name.strip() + param_type = param_type.strip() + + if param_name != "**config": + fw.write(f" * **{param_name}** - {param_type}\n\n") + else: + for conf in required: + fw.write(write_param(conf, False) + "\n\n") + + for conf in optional: + fw.write(write_param(conf, True) + "\n\n") + + fw.write("\n\n") diff --git a/doc/sphinx/source/algorithms.rst b/doc/sphinx/source/algorithms.rst index c6cf4c59b..fcd4f10b2 100644 --- a/doc/sphinx/source/algorithms.rst +++ b/doc/sphinx/source/algorithms.rst @@ -24,11 +24,35 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for allShortestPaths.delta.stats. -.. py:function:: gds.allShortestPaths.delta.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.allShortestPaths.delta.stream(G: Graph, *, sourceNode, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, delta=2.0, relationshipWeightProperty=None) -> DataFrame The Delta Stepping shortest path algorithm computes the shortest (weighted) path between one node and any other node in the graph. The computation is run multi-threaded. + | + + **Parameters:** + + * **G** - Graph + + * **sourceNode** - The Neo4j source node or node id. + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **delta** - *(Optional)* The bucket width for grouping nodes with the same tentative distance to the source node. *Default*: 2.0. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + + .. py:function:: gds.allShortestPaths.delta.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for allShortestPaths.delta.strema. @@ -372,10 +396,40 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.articleRank.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.articleRank.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, dampingFactor=0.85, maxIterations=20, tolerance=0.0000001, relationshipWeightProperty=None, sourceNodes=[], scaler=None) -> DataFrame Article Rank is a variant of the Page Rank algorithm, which measures the transitive influence or connectivity of nodes. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **dampingFactor** - *(Optional)* The damping factor of the Page Rank calculation. Must be in [0, 1). *Default*: 0.85. + + * **maxIterations** - *(Optional)* The maximum number of iterations of Article Rank to run. *Default*: 20. + + * **tolerance** - *(Optional)* Minimum change in scores between iterations. If all scores change less than the tolerance value the result is considered stable, and the algorithm returns. *Default*: 0.0000001. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + * **sourceNodes** - *(Optional)* The nodes or node ids to use for computing Personalized Page Rank. *Default*: []. + + * **scaler** - *(Optional)* The name of the scaler applied for the final scores. Supported values are `None`, `MinMax`, `Max`, `Mean`, `Log`, and `StdScore`. To apply scaler-specific configuration, use the Map syntax: `{scaler: 'name', ...}`. *Default*: None. + + + .. py:function:: gds.articleRank.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -404,10 +458,28 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.articulationPoints.stream(G: Graph, **config: Any) -> Series[Any] +.. py:function:: gds.articulationPoints.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True) -> Series[Any] Articulation Points is an algorithm that finds nodes that disconnect components if removed. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + + .. py:function:: gds.articulationPoints.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -800,10 +872,34 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Betweenness centrality measures the relative information flow that passes through a node. -.. py:function:: gds.betweenness.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.betweenness.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, samplingSize=node count, samplingSeed=None, relationshipWeightProperty=None) -> DataFrame Betweenness centrality measures the relative information flow that passes through a node. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **samplingSize** - *(Optional)* The number of source nodes to consider for computing centrality scores. *Default*: node count. + + * **samplingSeed** - *(Optional)* The seed value for the random number generator that selects start nodes. *Default*: null. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + + .. py:function:: gds.betweenness.stream.estimate(G: Graph, **config: Any) -> Series[Any] Betweenness centrality measures the relative information flow that passes through a node. @@ -834,20 +930,62 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.bfs.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.bfs.stream(G: Graph, *, sourceNode, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, targetNodes=empty list, maxDepth=-1) -> DataFrame BFS is a traversal algorithm, which explores all of the neighbor nodes at the present depth prior to moving on to the nodes at the next depth level. + | + + **Parameters:** + + * **G** - Graph + + * **sourceNode** - The node id of the node where to start the traversal. + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **targetNodes** - *(Optional)* Ids for target nodes. Traversal terminates when any target node is visited. *Default*: empty list. + + * **maxDepth** - *(Optional)* The maximum distance from the source node at which nodes are visited. *Default*: -1. + + + .. py:function:: gds.bfs.stream.estimate(G: Graph, **config: Any) -> Series[Any] BFS is a traversal algorithm, which explores all of the neighbor nodes at the present depth prior to moving on to the nodes at the next depth level. -.. py:function:: gds.bridges.stream(G: Graph, **config: Any) -> Series[Any] +.. py:function:: gds.bridges.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True) -> Series[Any] An algorithm to find Bridge edges in a graph. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + + .. py:function:: gds.bridges.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -893,11 +1031,33 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Collapse Path algorithm is a traversal algorithm capable of creating relationships between the start and end nodes of a traversal -.. py:function:: gds.conductance.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.conductance.stream(G: Graph, *, communityProperty, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, relationshipWeightProperty=None) -> DataFrame Evaluates a division of nodes into communities based on the proportion of relationships that cross community boundaries. + | + + **Parameters:** + + * **G** - Graph + + * **communityProperty** - The node property that holds the community ID as an integer for each node. Note that only non-negative community IDs are considered valid and will have their conductance computed. + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + + .. py:function:: gds.dag.topologicalSort.stream(G: Graph, **config: Any) -> DataFrame Returns a topological ordering of the nodes in a directed acyclic graph (DAG). @@ -922,10 +1082,32 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Degree centrality measures the number of incoming and outgoing relationships from a node. -.. py:function:: gds.degree.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.degree.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, orientation=NATURAL, relationshipWeightProperty=None) -> DataFrame Degree centrality measures the number of incoming and outgoing relationships from a node. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **orientation** - *(Optional)* The orientation used to compute node degrees. Supported orientations are `NATURAL`, `REVERSE` and `UNDIRECTED`. *Default*: NATURAL. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use for weighted degree computation. If unspecified, the algorithm runs unweighted. *Default*: null. + + + .. py:function:: gds.degree.stream.estimate(G: Graph, **config: Any) -> Series[Any] Degree centrality measures the number of incoming and outgoing relationships from a node. @@ -948,12 +1130,36 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.dfs.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.dfs.stream(G: Graph, *, sourceNode, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, targetNodes=empty list, maxDepth=-1) -> DataFrame Depth-first search (DFS) is an algorithm for traversing or searching tree or graph data structures. The algorithm starts at the root node (selecting some arbitrary node as the root node in the case of a graph) and explores as far as possible along each branch before backtracking. + | + + **Parameters:** + + * **G** - Graph + + * **sourceNode** - The node id of the node where to start the traversal. + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **targetNodes** - *(Optional)* Ids for target nodes. Traversal terminates when any target node is visited. *Default*: empty list. + + * **maxDepth** - *(Optional)* The maximum distance from the source node at which nodes are visited. *Default*: -1. + + + .. py:function:: gds.dfs.stream.estimate(G: Graph, **config: Any) -> Series[Any] Depth-first search (DFS) is an algorithm for traversing or searching tree or graph data structures. @@ -976,10 +1182,38 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.eigenvector.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.eigenvector.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, maxIterations=20, tolerance=0.0000001, relationshipWeightProperty=None, sourceNodes=[], scaler=None) -> DataFrame Eigenvector Centrality is an algorithm that measures the transitive influence or connectivity of nodes. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **maxIterations** - *(Optional)* The maximum number of iterations of Eigenvector Centrality to run. *Default*: 20. + + * **tolerance** - *(Optional)* Minimum change in scores between iterations. If all scores change less than the tolerance value the result is considered stable and the algorithm returns. *Default*: 0.0000001. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + * **sourceNodes** - *(Optional)* The nodes or node ids to use for computing Personalized Page Rank. *Default*: []. + + * **scaler** - *(Optional)* The name of the scaler applied for the final scores. Supported values are `None`, `MinMax`, `Max`, `Mean`, `Log`, and `StdScore`. To apply scaler-specific configuration, use the Map syntax: `{scaler: 'name', ...}`. *Default*: None. + + + .. py:function:: gds.eigenvector.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -1336,10 +1570,30 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.localClusteringCoefficient.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.localClusteringCoefficient.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, triangleCountProperty=n/a) -> DataFrame The local clustering coefficient is a metric quantifying how connected the neighborhood of a node is. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **triangleCountProperty** - *(Optional)* Node property that contains pre-computed triangle count. *Default*: n/a. + + + .. py:function:: gds.localClusteringCoefficient.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -1408,7 +1662,29 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g .. py:function:: gds.modularity.stats.estimate(G: Graph, **config: Any) -> Series[Any] -.. py:function:: gds.modularity.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.modularity.stream(G: Graph, *, communityProperty, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, relationshipWeightProperty=None) -> DataFrame + + | + + **Parameters:** + + * **G** - Graph + + * **communityProperty** - The node property that holds the community ID as an integer for each node. Note that only non-negative community IDs are considered valid and will have their modularity score computed. + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + .. py:function:: gds.modularity.stream.estimate(G: Graph, **config: Any) -> Series[Any] @@ -1464,12 +1740,66 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.nodeSimilarity.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.nodeSimilarity.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, similarityCutoff=1e-42, degreeCutoff=1, upperDegreeCutoff=2147483647, topK=10, bottomK=10, topN=0, bottomN=0, relationshipWeightProperty=None, similarityMetric=JACCARD, useComponents=false) -> DataFrame The Node Similarity algorithm compares a set of nodes based on the nodes they are connected to. Two nodes are considered similar if they share many of the same neighbors. Node Similarity computes pair-wise similarities based on the Jaccard metric. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **similarityCutoff** - *(Optional)* Lower limit for the similarity score to be present in the result. + Values must be between 0 and 1. *Default*: 1e-42. + + * **degreeCutoff** - *(Optional)* Inclusive lower bound on the node degree for a node to be considered in the comparisons. + This value can not be lower than 1. *Default*: 1. + + * **upperDegreeCutoff** - *(Optional)* Inclusive upper bound on the node degree for a node to be considered in the comparisons. + This value can not be lower than 1. *Default*: 2147483647. + + * **topK** - *(Optional)* Limit on the number of scores per node. + The K largest results are returned. + This value cannot be lower than 1. *Default*: 10. + + * **bottomK** - *(Optional)* Limit on the number of scores per node. + The K smallest results are returned. + This value cannot be lower than 1. *Default*: 10. + + * **topN** - *(Optional)* Global limit on the number of scores computed. + The N largest total results are returned. + This value cannot be negative, a value of 0 means no global limit. *Default*: 0. + + * **bottomN** - *(Optional)* Global limit on the number of scores computed. + The N smallest total results are returned. + This value cannot be negative, a value of 0 means no global limit. *Default*: 0. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. + If unspecified, the algorithm runs unweighted. *Default*: null. + + * **similarityMetric** - *(Optional)* The metric used to compute similarity. + Can be either `JACCARD`, `OVERLAP` or `COSINE`. *Default*: JACCARD. + + * ** useComponents** - *(Optional)* If enabled, Node Similarity will use components to improve the performance of the computation, skipping comparisons of nodes in different components. + Set to `false` (Default): the algorithm does not use components, but computes similarity across the entire graph. + Set to `true`: the algorithm uses components, and will compute these components before computing similarity. + Set to *String*: use pre-computed components stored in graph, *String* is the key for a node property representing components. *Default*: false. + + + .. py:function:: gds.nodeSimilarity.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -1544,10 +1874,40 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.pageRank.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.pageRank.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, dampingFactor=0.85, maxIterations=20, tolerance=0.0000001, relationshipWeightProperty=None, sourceNodes=[], scaler=None) -> DataFrame Page Rank is an algorithm that measures the transitive influence or connectivity of nodes. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **dampingFactor** - *(Optional)* The damping factor of the Page Rank calculation. Must be in [0, 1). *Default*: 0.85. + + * **maxIterations** - *(Optional)* The maximum number of iterations of Page Rank to run. *Default*: 20. + + * **tolerance** - *(Optional)* Minimum change in scores between iterations. If all scores change less than the tolerance value the result is considered stable and the algorithm returns. *Default*: 0.0000001. + + * **relationshipWeightProperty** - *(Optional)* Name of the relationship property to use as weights. If unspecified, the algorithm runs unweighted. *Default*: null. + + * **sourceNodes** - *(Optional)* The nodes or node ids to use for computing Personalized Page Rank. *Default*: []. + + * **scaler** - *(Optional)* The name of the scaler applied for the final scores. Supported values are `None`, `MinMax`, `Max`, `Mean`, `Log`, and `StdScore`. To apply scaler-specific configuration, use the Map syntax: `{scaler: 'name', ...}`. *Default*: None. + + + .. py:function:: gds.pageRank.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure. @@ -1783,11 +2143,31 @@ These all assume that an object of :class:`.GraphDataScience` is available as `g Returns an estimation of the memory consumption for that procedure. -.. py:function:: gds.triangleCount.stream(G: Graph, **config: Any) -> DataFrame +.. py:function:: gds.triangleCount.stream(G: Graph, *, nodeLabels=['*'], relationshipTypes=['*'], concurrency=4, jobId=None, logProgress=True, maxDegree=2^63^ - 1) -> DataFrame Triangle counting is a community detection graph algorithm that is used to determine the number of triangles passing through each node in the graph. + | + + **Parameters:** + + * **G** - Graph + + * **nodeLabels** - *(Optional)* Filter the named graph using the given node labels. Nodes with any of the given labels will be included. *Default*: ['*']. + + * **relationshipTypes** - *(Optional)* Filter the named graph using the given relationship types. Relationships with any of the given types will be included. *Default*: ['*']. + + * **concurrency** - *(Optional)* The number of concurrent threads used for running the algorithm. *Default*: 4. + + * **jobId** - *(Optional)* An ID that can be provided to more easily track the algorithm’s progress. *Default*: None (Generated internally). + + * **logProgress** - *(Optional)* If disabled the progress percentage will not be logged. *Default*: True. + + * **maxDegree** - *(Optional)* If a node has a degree higher than this it will not be considered by the algorithm. The triangle count for these nodes will be `-1`. *Default*: 2^63^ - 1. + + + .. py:function:: gds.triangleCount.stream.estimate(G: Graph, **config: Any) -> Series[Any] Returns an estimation of the memory consumption for that procedure.