diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e3a9b41 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Binary files +*.class + +# Eclipse files +.classpath +.project diff --git a/Classification.java b/src/main/java/de/daslaboratorium/machinelearning/classifier/Classification.java similarity index 100% rename from Classification.java rename to src/main/java/de/daslaboratorium/machinelearning/classifier/Classification.java diff --git a/Classifier.java b/src/main/java/de/daslaboratorium/machinelearning/classifier/Classifier.java similarity index 100% rename from Classifier.java rename to src/main/java/de/daslaboratorium/machinelearning/classifier/Classifier.java diff --git a/IFeatureProbability.java b/src/main/java/de/daslaboratorium/machinelearning/classifier/IFeatureProbability.java similarity index 100% rename from IFeatureProbability.java rename to src/main/java/de/daslaboratorium/machinelearning/classifier/IFeatureProbability.java diff --git a/BayesClassifier.java b/src/main/java/de/daslaboratorium/machinelearning/classifier/bayes/BayesClassifier.java similarity index 95% rename from BayesClassifier.java rename to src/main/java/de/daslaboratorium/machinelearning/classifier/bayes/BayesClassifier.java index cbdf520..9e37d6b 100644 --- a/BayesClassifier.java +++ b/src/main/java/de/daslaboratorium/machinelearning/classifier/bayes/BayesClassifier.java @@ -1,10 +1,13 @@ -package de.daslaboratorium.machinelearning.classifier; +package de.daslaboratorium.machinelearning.classifier.bayes; import java.util.Collection; import java.util.Comparator; import java.util.SortedSet; import java.util.TreeSet; +import de.daslaboratorium.machinelearning.classifier.Classification; +import de.daslaboratorium.machinelearning.classifier.Classifier; + /** * A concrete implementation of the abstract Classifier class. The Bayes * classifier implements a naive Bayes approach to classifying a given set of diff --git a/src/test/java/de/daslaboratorium/machinelearning/classifier/bayes/BayesClassifierTest.java b/src/test/java/de/daslaboratorium/machinelearning/classifier/bayes/BayesClassifierTest.java new file mode 100644 index 0000000..0a849ee --- /dev/null +++ b/src/test/java/de/daslaboratorium/machinelearning/classifier/bayes/BayesClassifierTest.java @@ -0,0 +1,70 @@ +package de.daslaboratorium.machinelearning.classifier.bayes; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import de.daslaboratorium.machinelearning.classifier.Classification; +import de.daslaboratorium.machinelearning.classifier.Classifier; + +public class BayesClassifierTest { + + private static final double EPSILON = 0.001; + private static final String CATEGORY_NEGATIVE = "negative"; + private static final String CATEGORY_POSITIVE = "positive"; + private Classifier bayes; + + @Before + public void setUp() { + /* + * Create a new classifier instance. The context features are + * Strings and the context will be classified with a String according + * to the featureset of the context. + */ + bayes = new BayesClassifier(); + + /* + * The classifier can learn from classifications that are handed over + * to the learn methods. Imagin a tokenized text as follows. The tokens + * are the text's features. The category of the text will either be + * positive or negative. + */ + final String[] positiveText = "I love sunny days".split("\\s"); + bayes.learn(CATEGORY_POSITIVE, Arrays.asList(positiveText)); + + final String[] negativeText = "I hate rain".split("\\s"); + bayes.learn(CATEGORY_NEGATIVE, Arrays.asList(negativeText)); + } + + @Test + public void testStringClassification() { + final String[] unknownText1 = "today is a sunny day".split("\\s"); + final String[] unknownText2 = "there will be rain".split("\\s"); + + Assert.assertEquals(CATEGORY_POSITIVE, bayes.classify(Arrays.asList(unknownText1)).getCategory()); + Assert.assertEquals(CATEGORY_NEGATIVE, bayes.classify(Arrays.asList(unknownText2)).getCategory()); + } + + @Test + public void testStringClassificationInDetails() { + + final String[] unknownText1 = "today is a sunny day".split("\\s"); + + Collection> classifications = ((BayesClassifier) bayes).classifyDetailed( + Arrays.asList(unknownText1)); + + List> list = new ArrayList>(classifications); + + Assert.assertEquals(CATEGORY_NEGATIVE, list.get(0).getCategory()); + Assert.assertEquals(0.0078125, list.get(0).getProbability(), EPSILON); + + Assert.assertEquals(CATEGORY_POSITIVE, list.get(1).getCategory()); + Assert.assertEquals(0.0234375, list.get(1).getProbability(), EPSILON); + } + +}