Skip to content

Commit

Permalink
Another fix for removeFeatures that properly deals with examples that…
Browse files Browse the repository at this point in the history
… contain duplicate features (which could occur if the method is called before the example has been validated).
  • Loading branch information
Craigacp committed Aug 13, 2020
1 parent cf013e7 commit 42694f8
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 15 deletions.
12 changes: 7 additions & 5 deletions Core/src/main/java/org/tribuo/impl/ArrayExample.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.tribuo.transform.TransformerMap;
import org.tribuo.util.Merger;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
Expand Down Expand Up @@ -293,18 +294,19 @@ public int size() {

@Override
public void removeFeatures(List<Feature> featureList) {
Map<String,Integer> map = new HashMap<>();
Map<String,List<Integer>> map = new HashMap<>();
for (int i = 0; i < featureNames.length; i++) {
map.put(featureNames[i],i);
List<Integer> list = map.computeIfAbsent(featureNames[i],(k) -> new ArrayList<>());
list.add(i);
}

PriorityQueue<Integer> removeQueue = new PriorityQueue<>();
for (Feature f : featureList) {
Integer i = map.get(f.getName());
List<Integer> i = map.get(f.getName());
if (i != null) {
// If we've found this feature ID remove it from the map to prevent double counting
// If we've found this feature remove it from the map to prevent double counting
map.remove(f.getName());
removeQueue.add(i);
removeQueue.addAll(i);
}
}

Expand Down
12 changes: 7 additions & 5 deletions Core/src/main/java/org/tribuo/impl/BinaryFeaturesExample.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package org.tribuo.impl;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
Expand Down Expand Up @@ -312,18 +313,19 @@ public int size() {

@Override
public void removeFeatures(List<Feature> featureList) {
Map<String,Integer> map = new HashMap<>();
Map<String,List<Integer>> map = new HashMap<>();
for (int i = 0; i < featureNames.length; i++) {
map.put(featureNames[i],i);
List<Integer> list = map.computeIfAbsent(featureNames[i],(k) -> new ArrayList<>());
list.add(i);
}

PriorityQueue<Integer> removeQueue = new PriorityQueue<>();
for (Feature f : featureList) {
Integer i = map.get(f.getName());
List<Integer> i = map.get(f.getName());
if (i != null) {
// If we've found this feature ID remove it from the map to prevent double counting
// If we've found this feature remove it from the map to prevent double counting
map.remove(f.getName());
removeQueue.add(i);
removeQueue.addAll(i);
}
}

Expand Down
12 changes: 7 additions & 5 deletions Core/src/main/java/org/tribuo/impl/IndexedArrayExample.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.tribuo.Output;
import org.tribuo.util.Merger;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
Expand Down Expand Up @@ -200,18 +201,19 @@ public void reduceByName(Merger merger) {

@Override
public void removeFeatures(List<Feature> featureList) {
Map<String,Integer> map = new HashMap<>();
Map<String,List<Integer>> map = new HashMap<>();
for (int i = 0; i < featureNames.length; i++) {
map.put(featureNames[i],i);
List<Integer> list = map.computeIfAbsent(featureNames[i],(k) -> new ArrayList<>());
list.add(i);
}

PriorityQueue<Integer> removeQueue = new PriorityQueue<>();
for (Feature f : featureList) {
Integer i = map.get(f.getName());
List<Integer> i = map.get(f.getName());
if (i != null) {
// If we've found this feature ID remove it from the map to prevent double counting
// If we've found this feature remove it from the map to prevent double counting
map.remove(f.getName());
removeQueue.add(i);
removeQueue.addAll(i);
}
}

Expand Down
13 changes: 13 additions & 0 deletions Core/src/test/java/org/tribuo/ExampleTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,21 @@ public void testArrayExampleRemove() {
assertEquals(2,example.size());
assertEquals("A",example.lookup("A").name);
assertEquals("C",example.lookup("C").name);

example = new ArrayExample<>(output,new String[]{"A","B","C","D","E","A","C","E"},new double[]{1,1,1,1,1,1,1,1});
featureList = new ArrayList<>();
featureList.add(new Feature("D",1.0));
featureList.add(new Feature("D",1.0));
featureList.add(new Feature("B",1.0));
featureList.add(new Feature("E",1.0));
example.removeFeatures(featureList);
assertEquals(4,example.size());
assertEquals("A",example.lookup("A").name);
assertEquals("C",example.lookup("C").name);
}



public static void checkDenseExample(Example<MockOutput> expected, Example<MockOutput> actual) {
assertEquals(expected.size(),actual.size());
Iterator<Feature> expectedItr = expected.iterator();
Expand Down

0 comments on commit 42694f8

Please sign in to comment.