Skip to content

Commit

Permalink
Merge pull request #6 from VIDA-NYU/trimmer
Browse files Browse the repository at this point in the history
Refactor signature trimmer and robust signature blocks generator
  • Loading branch information
heikomuller authored Dec 11, 2020
2 parents 4f7caa8 + c89a5d3 commit a659e7c
Show file tree
Hide file tree
Showing 92 changed files with 4,231 additions and 1,609 deletions.
6 changes: 5 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,14 @@
<manifest>
<addClasspath>true</addClasspath>
<mainClass>org.opendata.curation.d4.D4</mainClass>
<!--mainClass>org.opendata.curation.d4.signature.RobustSignatureGenerator</mainClass-->
<!--mainClass>org.opendata.db.tools.Dataset2ColumnsConverter</mainClass-->
<!--mainClass>org.opendata.db.tools.EQIndexGenerator</mainClass-->
<!--mainClass>org.opendata.db.tools.TermIndexGenerator</mainClass-->
<!--mainClass>org.opendata.curation.d4.export.PrimaryDomainWriter</mainClass-->
<!--mainClass>org.opendata.db.eq.EQColumnCountHistorgramWriter</mainClass-->
<!--mainClass>org.opendata.curation.d4.experiments.SignatureDropStatsExperiment</mainClass-->
<!--mainClass>org.opendata.curation.d4.explore.GTDisjointTermsPrinter</mainClass-->
<!--mainClass>org.opendata.curation.d4.export.ExportStrongDomains</mainClass-->
</manifest>
</archive>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,33 +76,10 @@ public synchronized boolean contains(int nodeId) {
}
}

/**
* Get the connected component that contains the node with the given
* identifier.
*
* @param nodeId
* @return
*/
private int getComponentForNode(int nodeId) {

//if (!_nodes.contains(nodeId)) {
// throw new RuntimeException("Unknown node identifier: " + nodeId);
//}

// If the nodeId is not contained in the component map then the node
// is in the component that has the same identifier as the nodeId
//if (_componentMap.containsKey(nodeId)) {
// return _componentMap.get(nodeId);
//} else {
// return nodeId;
//}
return _componentMap[nodeId];
}

public synchronized void edge(int sourceId, int targetId) {

int sourceCompId = this.getComponentForNode(sourceId);
int targetCompId = this.getComponentForNode(targetId);
int sourceCompId = _componentMap[sourceId];
int targetCompId = _componentMap[targetId];

if (sourceCompId != targetCompId) {
// The respective components may not have been instantiated yet.
Expand Down Expand Up @@ -165,6 +142,20 @@ public synchronized IdentifiableObjectSet<IdentifiableIDSet> getComponents() {

return result;
}

/**
* Test if all nodes belong to the same single component.
*
* @return
*/
public boolean isComplete() {

if (_components.size() == 1) {
int compSize = _components.values().iterator().next().size();
return (compSize == _nodes.length());
}
return false;
}

private void merge(
HashSet<Integer> target,
Expand All @@ -180,11 +171,4 @@ private void merge(

_components.remove(sourceCompId);
}

public synchronized boolean nodesAreInSameComponent(int node1, int node2) {

int comp1 = this.getComponentForNode(node1);
int comp2 = this.getComponentForNode(node2);
return (comp1 == comp2);
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/opendata/core/io/EntitySetReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void read(

public void read(EntityConsumer consumer) throws java.io.IOException {

this.read(new AnyObjectFilter<Integer>(), consumer);
this.read(new AnyObjectFilter<>(), consumer);
}

public EntitySet readEntities(ObjectFilter<Integer> filter) throws java.io.IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opendata.curation.d4.column;

import java.util.HashMap;
import org.opendata.core.set.HashIDSet;
package org.opendata.core.io.prov;

/**
* Factory pattern for column consumers.
* Simple interface to manage the information source for different types of
* object collections. These collections may either be read from disk or
* generated in memory.
*
* @author Heiko Mueller <[email protected]>
*/
public interface ExpandedColumnConsumerFactory {
public interface DataCollection {

public ExpandedColumnConsumer getConsumer(HashMap<Integer, HashIDSet> groups);
public String source();
}
28 changes: 28 additions & 0 deletions src/main/java/org/opendata/core/io/prov/DataSink.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* This file is part of the Data-Driven Domain Discovery Tool (D4).
*
* Copyright (c) 2018-2020 New York University.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opendata.core.io.prov;

/**
* Simple interface to manage the data sink name for data generators.
*
* @author Heiko Mueller <[email protected]>
*/
public interface DataSink {

public String target();
}
188 changes: 0 additions & 188 deletions src/main/java/org/opendata/core/prune/CandidateSetFinder.java

This file was deleted.

Loading

0 comments on commit a659e7c

Please sign in to comment.