From ec16ad2656d9a5847ad1b89c49bea9ff6d0af0d4 Mon Sep 17 00:00:00 2001 From: Tomas Barton Date: Tue, 9 Jun 2015 20:58:17 +0200 Subject: [PATCH] import --- .gitignore | 14 ++ README.md | 5 + evolve-sc | 3 + nb-configuration.xml | 21 ++ pom.xml | 204 ++++++++++++++++++ run | 9 + .../clustering/benchmark/AbsParams.java | 24 +++ .../clueminer/clustering/benchmark/Bench.java | 133 ++++++++++++ .../clustering/benchmark/BenchParams.java | 26 +++ .../clustering/benchmark/Container.java | 42 ++++ .../clustering/benchmark/Experiment.java | 80 +++++++ .../clustering/benchmark/GnuplotReporter.java | 185 ++++++++++++++++ .../clustering/benchmark/HclustBenchmark.java | 41 ++++ .../clueminer/clustering/benchmark/Main.java | 72 +++++++ .../benchmark/consensus/ConsensusExp.java | 69 ++++++ .../benchmark/consensus/ConsensusParams.java | 34 +++ .../benchmark/consensus/ConsensusRun.java | 152 +++++++++++++ .../benchmark/evolve/EvolveExp.java | 101 +++++++++ .../benchmark/evolve/EvolveParams.java | 24 +++ .../clustering/benchmark/exp/Data.java | 171 +++++++++++++++ .../benchmark/exp/EvolveScores.java | 61 ++++++ .../clustering/benchmark/exp/HclusPar.java | 42 ++++ .../clustering/benchmark/exp/HclusPar2.java | 43 ++++ .../clustering/benchmark/exp/Hclust.java | 84 ++++++++ .../clustering/benchmark/gen/NsgaGen.java | 74 +++++++ .../clustering/benchmark/gen/NsgaGenExp.java | 131 +++++++++++ .../benchmark/gen/NsgaGenParams.java | 49 +++++ .../clustering/benchmark/nsga/NsgaExp.java | 134 ++++++++++++ .../clustering/benchmark/nsga/NsgaParams.java | 55 +++++ .../clustering/benchmark/nsga/NsgaScore.java | 81 +++++++ src/main/nbm/manifest.mf | 2 + .../clustering/benchmark/Bundle.properties | 5 + .../clustering/benchmark/ExperimentTest.java | 50 +++++ .../benchmark/HclustBenchmarkTest.java | 128 +++++++++++ 34 files changed, 2349 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 evolve-sc create mode 100644 nb-configuration.xml create mode 100644 pom.xml create mode 100755 run create mode 100644 src/main/java/org/clueminer/clustering/benchmark/AbsParams.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/Bench.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/BenchParams.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/Container.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/Experiment.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/GnuplotReporter.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/HclustBenchmark.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/Main.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusExp.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusParams.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusRun.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveExp.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveParams.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/exp/Data.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/exp/EvolveScores.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar2.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/exp/Hclust.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGen.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenExp.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenParams.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaExp.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaParams.java create mode 100644 src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaScore.java create mode 100644 src/main/nbm/manifest.mf create mode 100644 src/main/resources/org/clueminer/clustering/benchmark/Bundle.properties create mode 100644 src/test/java/org/clueminer/clustering/benchmark/ExperimentTest.java create mode 100644 src/test/java/org/clueminer/clustering/benchmark/HclustBenchmarkTest.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1558415 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +*.class + +# Package Files # +*.jar +*.war +*.ear +*~ +.classpath +.project +.settings/ +target/ +logs/ +/nbproject/private/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..72acee2 --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +# Clustering benchmarks + +in order to run benchmark compile dependencies into a single JAR file: + + mvn assembly:assembly diff --git a/evolve-sc b/evolve-sc new file mode 100755 index 0000000..985cce3 --- /dev/null +++ b/evolve-sc @@ -0,0 +1,3 @@ +#!/bin/bash +ARGS="evolve-sc --test --generations 20 --population 50 $@" +`./run $ARGS` diff --git a/nb-configuration.xml b/nb-configuration.xml new file mode 100644 index 0000000..496e312 --- /dev/null +++ b/nb-configuration.xml @@ -0,0 +1,21 @@ + + + + + unsupervised + + + + ${project.basedir}/../../license.txt + + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..f42db0e --- /dev/null +++ b/pom.xml @@ -0,0 +1,204 @@ + + + 4.0.0 + + clueminer-parent + org.clueminer + 0.1-SNAPSHOT + ../.. + + + org.clueminer + clustering-benchmark + 0.1-SNAPSHOT + nbm + + clustering-benchmark + + + UTF-8 + + + + + + netbeans + NetBeans + http://bits.netbeans.org/maven2/ + + false + + + + + + + org.netbeans.api + org-netbeans-api-annotations-common + RELEASE80 + + + ${project.groupId} + dataset-api + ${project.version} + + + ${project.groupId} + clustering-impl + ${project.version} + + + ${project.groupId} + clustering-api + ${project.version} + + + ${project.groupId} + dataset-impl + ${project.version} + + + ${project.groupId} + dataset-io + ${project.version} + + + ${project.groupId} + fixtures + ${project.version} + + + ${project.groupId} + dataset-benchmark + ${project.version} + + + org.netbeans.api + org-openide-util-lookup + RELEASE80 + + + org.netbeans.api + org-openide-util + RELEASE80 + + + ${project.groupId} + utils + ${project.version} + + + ${project.groupId} + clustering-evolution + ${project.version} + + + ${project.groupId} + clustering-eval + ${project.version} + + + ${project.groupId} + clustering-dist + ${project.version} + + + ${project.groupId} + fixtures-clustering + ${project.version} + + + junit + junit + 4.10 + test + + + com.beust + jcommander + 1.32 + + + ${project.groupId} + guava + ${project.version} + + + ${project.groupId} + fastutil + ${project.version} + + + ${project.groupId} + evolution-api + ${project.version} + + + + + + + org.codehaus.mojo + nbm-maven-plugin + true + + + org.clueminer.clustering.benchmark + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven.compiler} + + 1.7 + 1.7 + + + org.clueminer.clustering.benchmark.Main + org.clueminer.clustering.benchmark + true + + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + true + + + org.clueminer.clustering.benchmark.Main + org.clueminer.clustering.benchmark + true + + + + + + maven-assembly-plugin + + + jar-with-dependencies + + + + org.clueminer.clustering.benchmark.Main + + + + + + + diff --git a/run b/run new file mode 100755 index 0000000..72c793b --- /dev/null +++ b/run @@ -0,0 +1,9 @@ +#!/bin/bash +ARGS="$@" +MAIN="org.clueminer.clustering.benchmark.Main" +jarfile="$(ls -t target/*jar-with-dependencies.jar | head -1)" +if [[ -f "$jarfile" ]]; then + java -jar $jarfile $ARGS +else + mvn "-Dexec.args=-classpath %classpath $MAIN $ARGS" -Dexec.executable=java -Dexec.classpathScope=runtime org.codehaus.mojo:exec-maven-plugin:1.2.1:exec +fi diff --git a/src/main/java/org/clueminer/clustering/benchmark/AbsParams.java b/src/main/java/org/clueminer/clustering/benchmark/AbsParams.java new file mode 100644 index 0000000..651f058 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/AbsParams.java @@ -0,0 +1,24 @@ +package org.clueminer.clustering.benchmark; + +import com.beust.jcommander.Parameter; +import java.io.File; +import org.clueminer.utils.FileUtils; +import org.openide.util.NbBundle; + +/** + * + * @author Tomas Barton + */ +public class AbsParams { + + @Parameter(names = "--dir", description = "directory for results", required = false) + public String home = System.getProperty("user.home") + File.separatorChar + + NbBundle.getMessage(FileUtils.class, "FOLDER_Home"); + + @Parameter(names = "--repeat", description = "number of repetitions of each experiment") + public int repeat = 5; + + @Parameter(names = "--log", description = "java log level") + public String log = "INFO"; + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/Bench.java b/src/main/java/org/clueminer/clustering/benchmark/Bench.java new file mode 100644 index 0000000..6e224e5 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/Bench.java @@ -0,0 +1,133 @@ +package org.clueminer.clustering.benchmark; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.ParameterException; +import java.io.File; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Formatter; +import java.util.logging.Handler; +import java.util.logging.Level; +import java.util.logging.LogManager; +import java.util.logging.Logger; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.benchmark.DatasetFixture; +import org.clueminer.log.ClmFormatter; + +/** + * + * @author Tomas Barton + */ +public abstract class Bench { + + protected static String benchmarkFolder; + protected HashMap, Integer>> availableDatasets = new HashMap<>(); + + public Bench() { + //constructor without arguments + } + + public static void ensureFolder(String folder) { + File file = new File(folder); + if (!file.exists()) { + if (file.mkdirs()) { + System.out.println("Directory " + folder + " created!"); + } else { + System.out.println("Failed to create " + folder + "directory!"); + } + } + } + + public abstract void main(String[] args); + + public static void printUsage(String[] args, JCommander cmd, AbsParams params) { + + try { + cmd.parse(args); + + } catch (ParameterException ex) { + System.out.println(ex.getMessage()); + cmd.usage(); + System.exit(0); + } + } + + protected void loadDatasets() { + Map, Integer> datasets = DatasetFixture.allDatasets(); + for (Map.Entry, Integer> entry : datasets.entrySet()) { + Dataset d = entry.getKey(); + availableDatasets.put(d.getName(), entry); + } + } + + /** + * Load specific dataset by name + * + * @param name + */ + protected void load(String name) { + Map, Integer> datasets = DatasetFixture.allDatasets(); + for (Map.Entry, Integer> entry : datasets.entrySet()) { + Dataset d = entry.getKey(); + if (d.getName().equalsIgnoreCase(name)) { + availableDatasets.put(d.getName(), entry); + } + } + } + + public static String safeName(String name) { + return name.toLowerCase().replace(" ", "_"); + } + + public void setupLogging(AbsParams params) { + Logger log = LogManager.getLogManager().getLogger(""); + Formatter formater = new ClmFormatter(); + Level level; + + switch (params.log.toUpperCase()) { + case "INFO": + level = Level.INFO; + break; + case "SEVERE": + level = Level.SEVERE; + break; + case "WARNING": + level = Level.WARNING; + break; + case "ALL": + level = Level.ALL; + break; + case "FINE": + level = Level.FINE; + break; + case "FINER": + level = Level.FINER; + break; + case "FINEST": + level = Level.FINEST; + break; + default: + throw new RuntimeException("log level " + log + " is not supported"); + } + setupHandlers(log, level, formater); + + //remove date line from logger + log.setUseParentHandlers(false); + } + + private void setupHandlers(Logger logger, Level level, Formatter formater) { + for (Handler handler : logger.getHandlers()) { + handler.setLevel(level); + handler.setFormatter(formater); + } + Logger parentLogger = logger.getParent(); + if (null != parentLogger) { + for (Handler handler : parentLogger.getHandlers()) { + handler.setLevel(level); + handler.setFormatter(formater); + } + } + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/BenchParams.java b/src/main/java/org/clueminer/clustering/benchmark/BenchParams.java new file mode 100644 index 0000000..86a2c08 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/BenchParams.java @@ -0,0 +1,26 @@ +package org.clueminer.clustering.benchmark; + +import com.beust.jcommander.Parameter; + +/** + * + * @author Tomas Barton + */ +public class BenchParams extends AbsParams { + + @Parameter(names = "--n", description = "size of biggest dataset", required = false) + public int n = 20; + + @Parameter(names = "--n-small", description = "size of smallest", required = false) + public int nSmall = 5; + + @Parameter(names = "--steps", description = "number of datasets which will be generated") + public int steps = 4; + + @Parameter(names = "--dimension", description = "number of attributes of each dataset") + public int dimension = 5; + + @Parameter(names = "--linkage", description = "linkage method") + public String linkage = "Single Linkage"; + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/Container.java b/src/main/java/org/clueminer/clustering/benchmark/Container.java new file mode 100644 index 0000000..926c8a9 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/Container.java @@ -0,0 +1,42 @@ +package org.clueminer.clustering.benchmark; + +import java.util.logging.Logger; +import org.clueminer.clustering.TreeDiff; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.clustering.api.HierarchicalResult; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.utils.Props; + +/** + * + * @author Tomas Barton + */ +public abstract class Container implements Runnable { + + private HierarchicalResult result; + private final AgglomerativeClustering algorithm; + private final Dataset dataset; + private static final Logger logger = Logger.getLogger(Container.class.getName()); + + public Container(AgglomerativeClustering algorithm, Dataset dataset) { + this.algorithm = algorithm; + this.dataset = dataset; + } + + public abstract HierarchicalResult hierarchical(AgglomerativeClustering algorithm, Dataset dataset, Props params); + + @Override + public void run() { + Props params = new Props(); + this.result = hierarchical(algorithm, dataset, params); + } + + public boolean equals(Container other) { + if (this.result == null || other.result == null) { + throw new RuntimeException("got null result. this = " + result + " other = " + other); + } + return TreeDiff.compare(this.result, other.result); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/Experiment.java b/src/main/java/org/clueminer/clustering/benchmark/Experiment.java new file mode 100644 index 0000000..befcbab --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/Experiment.java @@ -0,0 +1,80 @@ +package org.clueminer.clustering.benchmark; + +import java.util.Random; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.plugin.ArrayDataset; +import org.clueminer.report.NanoBench; +import org.openide.util.Exceptions; + +/** + * + * @author Tomas Barton + */ +public class Experiment implements Runnable { + + protected final Random rand; + protected final BenchParams params; + protected final AgglomerativeClustering[] algorithms; + protected final String results; + + public Experiment(BenchParams params, String results, AgglomerativeClustering[] algorithms) { + rand = new Random(); + this.params = params; + this.results = results; + this.algorithms = algorithms; + } + + @Override + public void run() { + int inc = (params.n - params.nSmall) / params.steps; + + GnuplotReporter reporter = new GnuplotReporter(results, new String[]{"algorithm", "linkage", "n"}, algorithms, params.nSmall + "-" + params.n); + System.out.println("increment = " + inc); + for (int i = params.nSmall; i <= params.n; i += inc) { + Dataset dataset = generateData(i, params.dimension); + for (AgglomerativeClustering alg : algorithms) { + String[] opts = new String[]{alg.getName(), params.linkage, String.valueOf(dataset.size())}; + NanoBench.create().measurements(params.repeat).collect(reporter, opts).measure( + alg.getName() + " - " + params.linkage + " - " + dataset.size(), + new HclustBenchmark().hclust(alg, dataset, params.linkage) + ); + // Get the Java runtime + Runtime runtime = Runtime.getRuntime(); + // Run the garbage collector + runtime.gc(); + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + Exceptions.printStackTrace(ex); + } + } + } + reporter.finish(); + } + + /** + * Generate random dataset of doubles with given dimensions + * + * @param size + * @param dim + * @return + */ + protected Dataset generateData(int size, int dim) { + System.out.println("generating data: " + size + " x " + dim); + Dataset dataset = new ArrayDataset<>(size, dim); + for (int i = 0; i < dim; i++) { + dataset.attributeBuilder().create("attr-" + i, "NUMERIC"); + } + for (int i = 0; i < size; i++) { + dataset.instance(i).setName(String.valueOf(i)); + for (int j = 0; j < dim; j++) { + dataset.set(i, j, rand.nextDouble()); + } + } + + return dataset; + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/GnuplotReporter.java b/src/main/java/org/clueminer/clustering/benchmark/GnuplotReporter.java new file mode 100644 index 0000000..406539d --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/GnuplotReporter.java @@ -0,0 +1,185 @@ +package org.clueminer.clustering.benchmark; + +import com.google.common.collect.ObjectArrays; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.util.LinkedList; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.dataset.benchmark.GnuplotHelper; +import org.clueminer.dataset.benchmark.PointTypeIterator; +import org.clueminer.report.BigORes; +import org.clueminer.report.Reporter; +import org.openide.util.Exceptions; + +/** + * + * @author Tomas Barton + */ +public class GnuplotReporter extends GnuplotHelper implements Reporter { + + private final String dataDir; + private final File dataFile; + private final String folder; + private final AgglomerativeClustering[] algorithms; + private final LinkedList plots; + + public GnuplotReporter(String folder, String[] opts, AgglomerativeClustering[] algorithms, String suffix) { + this.dataDir = folder + File.separatorChar + "data"; + mkdir(dataDir); + this.dataFile = new File(dataDir + File.separatorChar + "results-" + suffix + ".csv"); + this.algorithms = algorithms; + this.folder = folder; + this.plots = new LinkedList<>(); + writeHeader(opts); + + String memPath = dataDir + File.separatorChar + "mem" + suffix + ".gpt"; + String cpuPath = dataDir + File.separatorChar + "cpu" + suffix + ".gpt"; + String cpu2Path = dataDir + File.separatorChar + "cpu2" + suffix + ".gpt"; + String tpsPath = dataDir + File.separatorChar + "tps" + suffix + ".gpt"; + + writePlotScript(new File(memPath), + plotComplexity(8, "memory (kB)", 10, 7, dataFile.getName(), algorithms, "Memory usage of hierarchical clustering algorithms - " + opts[1], false)); + writePlotScript(new File(cpuPath), + plotCpu(8, "CPU", 10, 2, dataFile.getName(), algorithms, "CPU usage of hierarchical clustering algorithms - " + opts[1], false)); + writePlotScript(new File(cpu2Path), + plotComplexity(8, "CPU time", 10, 2, dataFile.getName(), algorithms, "CPU usage of hierarchical clustering algorithms - " + opts[1], false)); + writePlotScript(new File(tpsPath), + plotComplexity(8, "tps", 10, 5, dataFile.getName(), algorithms, "Transactuion per second - " + opts[1], true)); + + writeBashScript(folder); + } + + private void writeHeader(String[] opts) { + String[] head = new String[]{"label", "avg time (ms)", "memory (MB)", "total time (s)", "tps", "repeats", "memory (kB)"}; + String[] line = ObjectArrays.concat(head, opts, String.class); + writeCsvLine(dataFile, line, false); + } + + /** + * + * @param result + */ + @Override + public void finalResult(BigORes result) { + String[] res = new String[]{result.getLabel(), result.avgTimeMs(), + result.totalMemoryInMb(), result.totalTimeInS(), result.tps(), + result.measurements(), result.totalMemoryInKb() + }; + String[] line = ObjectArrays.concat(res, result.getOpts(), String.class); + writeCsvLine(dataFile, line, true); + } + + /** + * + * @param file to write Gnuplot script + * @param dataFile + * @param labelPos column of label which is used for data rows in chart + * @param type + * @param x + * @param y + */ + private void writePlotScript(File file, String script) { + PrintWriter template; + try { + template = new PrintWriter(file, "UTF-8"); + template.write(script); + template.close(); + } catch (FileNotFoundException | UnsupportedEncodingException ex) { + Exceptions.printStackTrace(ex); + } + plots.add(withoutExtension(file)); + } + + private String plotCpu(int labelPos, String yLabel, int x, int y, String dataFile, AgglomerativeClustering[] algorithms, String title, boolean logscale) { + String res = "set datafile separator \",\"\n" + + "set key outside bottom horizontal box\n" + + "set title \"" + title + "\"\n" + + "set xlabel \"data size\" font \"Times,12\"\n" + + "set ylabel \"" + yLabel + "\" font \"Times,12\"\n" + // + "set xtics 0,0.5 nomirror\n" + // + "set ytics 0,0.5 nomirror\n" + + "set mytics 2\n" + + "set mx2tics 2\n" + + "set grid\n" + + "set pointsize 0.5\n" + + "f(x) = 0.5 * x**2\n"; + if (logscale) { + res += "set logscale y 2\n"; + } + int i = 0; + PointTypeIterator pti = new PointTypeIterator(); + for (AgglomerativeClustering alg : algorithms) { + if (i == 0) { + res += "plot "; + } + res += "\"< awk -F\\\",\\\" '{if($" + labelPos + " == \\\"" + alg.getName() + + "\\\") print}' " + dataFile + "\" u " + x + ":" + y + + " t \"" + alg.getName() + "\" w linespoints pt " + pti.next(); + res += ", \\\n"; + i++; + } + res += "f(x) title 'x^2' with lines linestyle 18\n"; + return res; + } + + private String plotComplexity(int labelPos, String yLabel, int x, int y, String dataFile, AgglomerativeClustering[] algorithms, String title, boolean logscale) { + String res = "set datafile separator \",\"\n" + + "set key outside bottom horizontal box\n" + + "set title \"" + title + "\"\n" + + "set xlabel \"data size\" font \"Times,12\"\n" + + "set ylabel \"" + yLabel + "\" font \"Times,12\"\n" + // + "set xtics 0,0.5 nomirror\n" + // + "set ytics 0,0.5 nomirror\n" + + "set mytics 2\n" + + "set mx2tics 2\n" + + "set grid\n" + + "set pointsize 0.5\n"; + if (logscale) { + res += "set logscale y 2\n"; + } + int i = 0; + int last = algorithms.length - 1; + PointTypeIterator pti = new PointTypeIterator(); + for (AgglomerativeClustering alg : algorithms) { + if (i == 0) { + res += "plot "; + } + res += "\"< awk -F\\\",\\\" '{if($" + labelPos + " == \\\"" + alg.getName() + + "\\\") print}' " + dataFile + "\" u " + x + ":" + y + + " t \"" + alg.getName() + "\" w linespoints pt " + pti.next(); + if (i != last) { + res += ", \\\n"; + } else { + res += "\n"; + } + + i++; + } + return res; + } + + /** + * Should be called when all plot files are written + */ + public void finish() { + //TODO maybe some cleanup? + } + + private void writeBashScript(String dataDir) { + try { + bashPlotScript(plots.toArray(new String[plots.size()]), dataDir, "set term pdf font 'Times-New-Roman,8'", "pdf"); + bashPlotScript(plots.toArray(new String[plots.size()]), dataDir, "set terminal pngcairo size 1024,768 enhanced font 'Verdana,10'", "png"); + + } catch (FileNotFoundException ex) { + Exceptions.printStackTrace(ex); + } catch (UnsupportedEncodingException ex) { + Exceptions.printStackTrace(ex); + } catch (IOException ex) { + Exceptions.printStackTrace(ex); + } + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/HclustBenchmark.java b/src/main/java/org/clueminer/clustering/benchmark/HclustBenchmark.java new file mode 100644 index 0000000..fdeefe7 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/HclustBenchmark.java @@ -0,0 +1,41 @@ +package org.clueminer.clustering.benchmark; + +import org.clueminer.clustering.aggl.linkage.CompleteLinkage; +import org.clueminer.clustering.aggl.linkage.SingleLinkage; +import org.clueminer.clustering.api.AgglParams; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.clustering.api.HierarchicalResult; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.utils.Props; + +/** + * + * @author Tomas Barton + */ +public class HclustBenchmark { + + public Container hclust(final AgglomerativeClustering algorithm, final Dataset dataset, final String linkage) { + + final Container runnable = new Container(algorithm, dataset) { + + @Override + public HierarchicalResult hierarchical(AgglomerativeClustering algorithm, Dataset dataset, Props params) { + params.putBoolean(AgglParams.CLUSTER_ROWS, true); + params.put(AgglParams.LINKAGE, linkage); + + return algorithm.hierarchy(dataset, params); + } + }; + return runnable; + } + + public Container singleLinkage(final AgglomerativeClustering algorithm, final Dataset dataset) { + return hclust(algorithm, dataset, SingleLinkage.name); + } + + public Container completeLinkage(final AgglomerativeClustering algorithm, final Dataset dataset) { + return hclust(algorithm, dataset, CompleteLinkage.name); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/Main.java b/src/main/java/org/clueminer/clustering/benchmark/Main.java new file mode 100644 index 0000000..1d6076d --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/Main.java @@ -0,0 +1,72 @@ +package org.clueminer.clustering.benchmark; + +import org.clueminer.clustering.benchmark.exp.Data; +import org.clueminer.clustering.benchmark.exp.HclusPar; +import org.clueminer.clustering.benchmark.exp.HclusPar2; +import org.clueminer.clustering.benchmark.exp.Hclust; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import org.clueminer.clustering.benchmark.consensus.ConsensusExp; +import org.clueminer.clustering.benchmark.exp.EvolveScores; +import org.clueminer.clustering.benchmark.gen.NsgaGen; +import org.clueminer.clustering.benchmark.nsga.NsgaScore; + +/** + * + * @author deric + */ +public class Main { + + private static final Map map = new HashMap<>(); + private static Main instance; + + public Main() { + map.put("hclust", new Hclust()); + map.put("data", new Data()); + map.put("hclust-par", new HclusPar()); + map.put("hclust-par2", new HclusPar2()); + map.put("evolve-sc", new EvolveScores()); + map.put("nsga", new NsgaScore()); + map.put("nsga-gen", new NsgaGen()); + map.put("consensus", new ConsensusExp()); + } + + /** + * Entrypoint to all experiments + * + * @param args the command line arguments + */ + public static void main(String[] args) { + if (instance == null) { + instance = new Main(); + } + if (args.length == 0) { + usage(); + } + String exp = args[0]; + if (!Main.map.containsKey(exp)) { + usage(); + } + + String[] other = Arrays.copyOfRange(args, 1, args.length); + Bench bench = Main.map.get(exp); + //run it + bench.main(other); + } + + private static void usage() { + System.out.println("Usage: java -jar {jar name} [experiment name] [[optional arguments]]"); + System.out.println("Valid experriments values are:"); + for (String key : map.keySet()) { + for (int i = 0; i < 5; i++) { + System.out.print(" "); + } + System.out.print("- " + key + "\n"); + } + System.out.println("use '[experiment] --help' to find out more about optional arguments"); + System.out.println("--------------------------"); + System.exit(1); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusExp.java b/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusExp.java new file mode 100644 index 0000000..aad7462 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusExp.java @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2011-2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.consensus; + +import com.beust.jcommander.JCommander; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.benchmark.Bench; +import static org.clueminer.clustering.benchmark.Bench.ensureFolder; +import static org.clueminer.clustering.benchmark.Bench.printUsage; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; + +/** + * + * @author deric + */ +public class ConsensusExp extends Bench { + + public static final String name = "consensus"; + + protected static ConsensusParams parseArguments(String[] args) { + ConsensusParams params = new ConsensusParams(); + JCommander cmd = new JCommander(params); + printUsage(args, cmd, params); + return params; + } + + @Override + public void main(String[] args) { + ConsensusParams params = parseArguments(args); + if (params.dataset != null) { + load(params.dataset); + } else { + loadDatasets(); + } + setupLogging(params); + + int i = 0; + for (Map.Entry, Integer>> e : availableDatasets.entrySet()) { + System.out.println((i++) + ":" + e.getKey()); + } + + benchmarkFolder = params.home + '/' + "benchmark" + '/' + name; + ensureFolder(benchmarkFolder); + System.out.println("writing results to: " + benchmarkFolder); + + System.out.println("=== starting " + name); + ConsensusRun exp = new ConsensusRun(params, benchmarkFolder, (Dataset) availableDatasets.get(params.dataset)); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusParams.java b/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusParams.java new file mode 100644 index 0000000..7bb5669 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusParams.java @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2011-2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.consensus; + +import com.beust.jcommander.Parameter; +import org.clueminer.clustering.benchmark.AbsParams; + +/** + * + * @author deric + */ +public class ConsensusParams extends AbsParams { + + @Parameter(names = "--dataset", description = "use specific dataset") + public String dataset = null; + + @Parameter(names = "--method", description = "clustering algorithm name") + public String method = null; + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusRun.java b/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusRun.java new file mode 100644 index 0000000..d44c3a3 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/consensus/ConsensusRun.java @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2011-2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.consensus; + +import com.google.common.base.Supplier; +import com.google.common.collect.Maps; +import com.google.common.collect.Table; +import com.google.common.collect.Tables; +import java.io.File; +import java.util.LinkedList; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.clueminer.bagging.COMUSA; +import org.clueminer.bagging.CoAssociationReduce; +import org.clueminer.bagging.KMeansBagging; +import org.clueminer.clustering.ClusteringExecutorCached; +import org.clueminer.clustering.aggl.linkage.AverageLinkage; +import org.clueminer.clustering.api.AgglParams; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.Clustering; +import org.clueminer.clustering.api.ClusteringAlgorithm; +import org.clueminer.clustering.api.ClusteringFactory; +import org.clueminer.clustering.api.Executor; +import org.clueminer.clustering.api.factory.EvaluationFactory; +import static org.clueminer.clustering.benchmark.Bench.safeName; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.benchmark.ResultsCollector; +import org.clueminer.utils.Props; +import org.openide.util.Exceptions; + +/** + * + * @author deric + */ +public class ConsensusRun implements Runnable { + + private static ResultsCollector rc; + private ConsensusParams params; + private String benchmarkFolder; + //table for keeping results from experiments + private Table table; + private static final Logger logger = Logger.getLogger(ConsensusRun.class.getName()); + private Dataset dataset; + + public ConsensusRun(ConsensusParams params, String benchmarkFolder, Dataset dataset) { + this.params = params; + this.benchmarkFolder = benchmarkFolder; + this.dataset = dataset; + + createTable(); + rc = new ResultsCollector(table); + } + + private void createTable() { + table = Tables.newCustomTable( + Maps.>newHashMap(), + new Supplier>() { + @Override + public Map get() { + return Maps.newHashMap(); + } + }); + } + + @Override + public void run() { + try { + String name; + String folder; + EvaluationFactory ef = EvaluationFactory.getInstance(); + LinkedList evals = new LinkedList<>(); + evals.add(ef.getProvider("NMIsum")); + evals.add(ef.getProvider("Adjusted Rand")); + + ClusteringAlgorithm alg = ClusteringFactory.getInstance().getProvider(params.method); + Executor exec = new ClusteringExecutorCached(alg); + + createTable(); + name = safeName(dataset.getName()); + folder = benchmarkFolder + File.separatorChar + name; + + String csvRes = folder + File.separatorChar + "_" + name + ".csv"; + logger.log(Level.INFO, "dataset: {0} size: {1} num attr: {2}", new Object[]{name, dataset.size(), dataset.attributeCount()}); + //ensureFolder(benchmarkFolder + File.separatorChar + name); + Clustering c; + Props props = algorithmSetup(params.method); + props.putInt("k", dataset.getClasses().size()); + double score; + for (int i = 0; i < params.repeat; i++) { + c = exec.clusterRows(dataset, props); + for (ClusterEvaluation eval : evals) { + score = c.getEvaluationTable().getScore(eval); + table.put("run " + i, eval.getName(), score); + } + } + rc.writeToCsv(csvRes); + + } catch (Exception e) { + Exceptions.printStackTrace(e); + } + } + + private Props algorithmSetup(String alg) { + Props p = new Props(); + p.putInt(KMeansBagging.BAGGING, 10); + switch (alg) { + case "KmB-COMUSA-RAND": + p.put(KMeansBagging.CONSENSUS, COMUSA.name); + p.put(KMeansBagging.INIT_METHOD, "RANDOM"); + p.putDouble(COMUSA.RELAX, 1.0); + break; + case "KmB-COMUSA-MO": + p.put(KMeansBagging.CONSENSUS, COMUSA.name); + p.put(KMeansBagging.INIT_METHOD, "MO"); + p.put("mo_1", "AIC"); + p.put("mo_2", "SD index"); + break; + case "KmB-CoAssocHAC-MO-avg": + p.put(KMeansBagging.CONSENSUS, CoAssociationReduce.name); + p.put(KMeansBagging.INIT_METHOD, "MO"); + p.put("mo_1", "AIC"); + p.put("mo_2", "SD index"); + p.put(AgglParams.LINKAGE, AverageLinkage.name); + break; + case "KmB-CoAssocHAC-MO-AIC_SD": + p.put(KMeansBagging.CONSENSUS, CoAssociationReduce.name); + p.put(KMeansBagging.INIT_METHOD, "MO"); + p.put("mo_1", "AIC"); + p.put("mo_2", "SD index"); + break; + + } + return p; + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveExp.java b/src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveExp.java new file mode 100644 index 0000000..5a30cc2 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveExp.java @@ -0,0 +1,101 @@ +package org.clueminer.clustering.benchmark.evolve; + +import com.google.common.base.Supplier; +import com.google.common.collect.Maps; +import com.google.common.collect.Table; +import com.google.common.collect.Tables; +import java.io.File; +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; +import org.clueminer.clustering.api.AgglParams; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.factory.ExternalEvaluatorFactory; +import static org.clueminer.clustering.benchmark.Bench.ensureFolder; +import static org.clueminer.clustering.benchmark.Bench.safeName; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.benchmark.ConsoleDump; +import org.clueminer.dataset.benchmark.GnuplotWriter; +import org.clueminer.dataset.benchmark.ResultsCollector; +import org.clueminer.evolution.multim.MultiMuteEvolution; +import org.openide.util.Exceptions; + +/** + * Evolution of hierarchical clusterings with different (unsupervised) + * optimization criterion (single criterion) + * + * @author Tomas Barton + */ +public class EvolveExp implements Runnable { + + private static ResultsCollector rc; + private EvolveParams params; + private String benchmarkFolder; + private ClusterEvaluation[] scores; + private HashMap, Integer>> datasets; + //table for keeping results from experiments + private final Table table; + + public EvolveExp(EvolveParams params, String benchmarkFolder, ClusterEvaluation[] scores, HashMap, Integer>> availableDatasets) { + this.params = params; + this.benchmarkFolder = benchmarkFolder; + this.scores = scores; + this.datasets = availableDatasets; + + table = Tables.newCustomTable( + Maps.>newHashMap(), + new Supplier>() { + @Override + public Map get() { + return Maps.newHashMap(); + } + }); + rc = new ResultsCollector(table); + } + + @Override + public void run() { + try { + MultiMuteEvolution evolution; + String name; + + ClusterEvaluation ext = fetchExternal(params.external); + //evolution.setAlgorithm(new HACLW()); + System.out.println("datasets size: " + datasets.size()); + for (Map.Entry, Integer>> e : datasets.entrySet()) { + Dataset d = e.getValue().getKey(); + name = safeName(d.getName()); + String csvRes = benchmarkFolder + File.separatorChar + name + File.separatorChar + name + ".csv"; + System.out.println("=== dataset " + name); + System.out.println("size: " + d.size()); + ensureFolder(benchmarkFolder + File.separatorChar + name); + for (ClusterEvaluation eval : scores) { + evolution = new MultiMuteEvolution(); + evolution.setDataset(d); + evolution.setEvaluator(eval); + evolution.setExternal(ext); + evolution.setGenerations(params.generations); + evolution.setPopulationSize(params.population); + GnuplotWriter gw = new GnuplotWriter(evolution, benchmarkFolder, name + File.separatorChar + safeName(eval.getName())); + gw.setPlotDumpMod(50); + gw.setCustomTitle("cutoff=" + evolution.getDefaultParam(AgglParams.CUTOFF_STRATEGY) + "(" + evolution.getDefaultParam(AgglParams.CUTOFF_SCORE) + ")"); + //collect data from evolution + evolution.addEvolutionListener(new ConsoleDump()); + evolution.addEvolutionListener(gw); + evolution.addEvolutionListener(rc); + evolution.run(); + System.out.println("## updating results in: " + csvRes); + rc.writeToCsv(csvRes); + } + } + } catch (Exception e) { + Exceptions.printStackTrace(e); + } + } + + private ClusterEvaluation fetchExternal(String external) { + return ExternalEvaluatorFactory.getInstance().getProvider(external); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveParams.java b/src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveParams.java new file mode 100644 index 0000000..aea1a41 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/evolve/EvolveParams.java @@ -0,0 +1,24 @@ +package org.clueminer.clustering.benchmark.evolve; + +import com.beust.jcommander.Parameter; +import org.clueminer.clustering.benchmark.AbsParams; + +/** + * + * @author Tomas Barton + */ +public class EvolveParams extends AbsParams { + + @Parameter(names = "--external", description = "reference criterion for comparing with internal criterion (Precision, Accuracy, NMI)") + public String external = "AUC"; + + @Parameter(names = "--test", description = "test only on one dataset") + public boolean test = false; + + @Parameter(names = "--generations", description = "number of generations in evolution") + public int generations = 10; + + @Parameter(names = "--population", description = "size of population in each generation") + public int population = 10; + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/exp/Data.java b/src/main/java/org/clueminer/clustering/benchmark/exp/Data.java new file mode 100644 index 0000000..04f33fc --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/exp/Data.java @@ -0,0 +1,171 @@ +package org.clueminer.clustering.benchmark.exp; + +import com.google.common.base.Supplier; +import com.google.common.collect.Maps; +import com.google.common.collect.Table; +import com.google.common.collect.Tables; +import java.io.File; +import java.util.HashMap; +import java.util.Map; +import org.clueminer.clustering.algorithm.KMeans; +import org.clueminer.clustering.api.InternalEvaluator; +import org.clueminer.clustering.api.factory.InternalEvaluatorFactory; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.benchmark.ConsoleDump; +import org.clueminer.dataset.benchmark.GnuplotWriter; +import org.clueminer.dataset.benchmark.ResultsCollector; +import org.clueminer.clustering.api.ExternalEvaluator; +import org.clueminer.clustering.benchmark.Bench; +import org.clueminer.eval.external.JaccardIndex; +import org.clueminer.evolution.attr.AttrEvolution; +import org.clueminer.utils.FileUtils; +import org.openide.util.NbBundle; + +/** + * + * @author tombart + */ +public class Data extends Bench { + + private AttrEvolution test; + //table for keeping results from experiments + private Table table; + private static ResultsCollector rc; + private static String csvOutput; + private static Data instance; + + /** + * @param args the command line arguments + */ + @Override + public void main(String[] args) { + int i = 0, j; + String arg; + char flag; + boolean vflag = false; + String datasetName = ""; + + while (i < args.length && args[i].startsWith("-")) { + arg = args[i++]; + + // use this type of check for "wordy" arguments + switch (arg) { + // use this type of check for arguments that require arguments + case "-verbose": + System.out.println("verbose mode on"); + vflag = true; + break; + // use this type of check for a series of flag arguments + case "-dataset": + if (i < args.length) { + datasetName = args[i++]; + } else { + System.err.println("-dataset requires a name"); + } + if (vflag) { + System.out.println("dataset = " + datasetName); + } + break; + default: + for (j = 1; j < arg.length(); j++) { + flag = arg.charAt(j); + switch (flag) { + case 'x': + if (vflag) { + System.out.println("Option x"); + } + break; + case 'n': + if (vflag) { + System.out.println("Option n"); + } + break; + default: + System.err.println("Run: illegal option " + flag); + break; + } + } + break; + } + } + if (i == args.length) { + System.err.println("Usage: Benchmark [-verbose] [-xn] [-dataset name]"); + } + + init(); + execute(datasetName); + } + + private void init() { + table = Tables.newCustomTable( + Maps.>newHashMap(), + new Supplier>() { + @Override + public Map get() { + return Maps.newHashMap(); + } + }); + + String home = System.getProperty("user.home") + File.separatorChar + + NbBundle.getMessage( + FileUtils.class, + "FOLDER_Home"); + ensureFolder(home); + benchmarkFolder = home + File.separatorChar + "benchmark"; + ensureFolder(benchmarkFolder); + rc = new ResultsCollector(table); + csvOutput = benchmarkFolder + File.separatorChar + "results.csv"; + + //preload dataset names + loadDatasets(); + } + + public void execute(String datasetName) { + Map, Integer> datasets = new HashMap<>(); + if (availableDatasets.containsKey(datasetName)) { + Map.Entry, Integer> entry = availableDatasets.get(datasetName); + datasets.put(entry.getKey(), entry.getValue()); + } else { + System.out.println("dataset " + datasetName + " not found"); + System.out.println("known datasets: "); + for (String d : availableDatasets.keySet()) { + System.out.print(d + " "); + } + System.out.println("---"); + } + // DatasetFixture.allDatasets(); + + InternalEvaluatorFactory factory = InternalEvaluatorFactory.getInstance(); + ExternalEvaluator ext = new JaccardIndex(); + + String name; + System.out.println("working folder: " + benchmarkFolder); + for (Map.Entry, Integer> entry : datasets.entrySet()) { + Dataset dataset = entry.getKey(); + name = dataset.getName(); + String csvRes = benchmarkFolder + File.separatorChar + name + File.separatorChar + name + ".csv"; + System.out.println("=== dataset " + name); + System.out.println("size: " + dataset.size()); + System.out.println(dataset.toString()); + String dataDir = benchmarkFolder + File.separatorChar + name; + (new File(dataDir)).mkdir(); + for (InternalEvaluator eval : factory.getAll()) { + System.out.println("evaluator: " + eval.getName()); + test = new AttrEvolution(dataset, 20); + test.setAlgorithm(new KMeans()); + test.setK(entry.getValue()); + test.setEvaluator(eval); + test.setExternal(ext); + GnuplotWriter gw = new GnuplotWriter(test, benchmarkFolder, name + "/" + name + "-" + safeName(eval.getName())); + gw.setPlotDumpMod(50); + //collect data from evolution + test.addEvolutionListener(new ConsoleDump()); + test.addEvolutionListener(gw); + test.addEvolutionListener(rc); + test.run(); + rc.writeToCsv(csvRes); + } + } + } +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/exp/EvolveScores.java b/src/main/java/org/clueminer/clustering/benchmark/exp/EvolveScores.java new file mode 100644 index 0000000..7e4283d --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/exp/EvolveScores.java @@ -0,0 +1,61 @@ +package org.clueminer.clustering.benchmark.exp; + +import com.beust.jcommander.JCommander; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.InternalEvaluator; +import org.clueminer.clustering.api.factory.InternalEvaluatorFactory; +import org.clueminer.clustering.benchmark.Bench; +import org.clueminer.clustering.benchmark.evolve.EvolveExp; +import org.clueminer.clustering.benchmark.evolve.EvolveParams; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; + +/** + * + * @author Tomas Barton + */ +public class EvolveScores extends Bench { + + public static final String name = "evolve-sc"; + + protected static EvolveParams parseArguments(String[] args) { + EvolveParams params = new EvolveParams(); + JCommander cmd = new JCommander(params); + printUsage(args, cmd, params); + return params; + } + + @Override + public void main(String[] args) { + EvolveParams params = parseArguments(args); + if (params.test) { + load("iris"); + } else { + loadDatasets(); + } + setupLogging(params); + System.out.println("loaded dataset"); + int i = 0; + for (Map.Entry, Integer>> e : availableDatasets.entrySet()) { + System.out.println((i++) + ":" + e.getKey()); + } + + benchmarkFolder = params.home + '/' + "benchmark" + '/' + name; + ensureFolder(benchmarkFolder); + System.out.println("writing results to: " + benchmarkFolder); + + System.out.println("=== starting " + name); + List eval = InternalEvaluatorFactory.getInstance().getAll(); + ClusterEvaluation[] scores = eval.toArray(new ClusterEvaluation[eval.size()]); + System.out.println("scores size: " + scores.length); + EvolveExp exp = new EvolveExp(params, benchmarkFolder, scores, availableDatasets); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar.java b/src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar.java new file mode 100644 index 0000000..8711c4a --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar.java @@ -0,0 +1,42 @@ +package org.clueminer.clustering.benchmark.exp; + +import java.io.File; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.aggl.HACLW; +import org.clueminer.clustering.aggl.HACLWMS; +import org.clueminer.clustering.aggl.HacLwMsPar; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.clustering.benchmark.BenchParams; +import org.clueminer.clustering.benchmark.Experiment; +import static org.clueminer.clustering.benchmark.Bench.ensureFolder; + +/** + * + * @author deric + */ +public class HclusPar extends Hclust { + + /** + * @param args the command line arguments + */ + @Override + public void main(String[] args) { + BenchParams params = parseArguments(args); + setupLogging(params); + + benchmarkFolder = params.home + File.separatorChar + "benchmark" + File.separatorChar + "hclust-par"; + ensureFolder(benchmarkFolder); + + System.out.println("# n = " + params.n); + System.out.println("=== starting experiment:"); + AgglomerativeClustering[] algorithms = new AgglomerativeClustering[]{ + new HACLW(), new HACLWMS(), new HacLwMsPar(4), new HacLwMsPar(8), new HacLwMsPar(16), new HacLwMsPar(32) + }; + Experiment exp = new Experiment(params, benchmarkFolder, algorithms); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar2.java b/src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar2.java new file mode 100644 index 0000000..6206631 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/exp/HclusPar2.java @@ -0,0 +1,43 @@ +package org.clueminer.clustering.benchmark.exp; + +import java.io.File; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.aggl.HACLW; +import org.clueminer.clustering.aggl.HACLWMS; +import org.clueminer.clustering.aggl.HacLwMsPar; +import org.clueminer.clustering.aggl.HacLwMsPar2; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.clustering.benchmark.BenchParams; +import org.clueminer.clustering.benchmark.Experiment; +import static org.clueminer.clustering.benchmark.exp.Hclust.parseArguments; + +/** + * + * @author Tomas Barton + */ +public class HclusPar2 extends Hclust { + + /** + * @param args the command line arguments + */ + @Override + public void main(String[] args) { + BenchParams params = parseArguments(args); + setupLogging(params); + + benchmarkFolder = params.home + File.separatorChar + "hclust-par2"; + ensureFolder(benchmarkFolder); + + System.out.println("# n = " + params.n); + System.out.println("=== starting experiment:"); + AgglomerativeClustering[] algorithms = new AgglomerativeClustering[]{ + new HACLW(), new HACLWMS(), new HacLwMsPar(2), new HacLwMsPar(4), new HacLwMsPar2(2), new HacLwMsPar2(4) + }; + Experiment exp = new Experiment(params, benchmarkFolder, algorithms); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/exp/Hclust.java b/src/main/java/org/clueminer/clustering/benchmark/exp/Hclust.java new file mode 100644 index 0000000..a1f2ca0 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/exp/Hclust.java @@ -0,0 +1,84 @@ +package org.clueminer.clustering.benchmark.exp; + +import com.beust.jcommander.JCommander; +import com.beust.jcommander.ParameterException; +import java.io.File; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.aggl.HAC; +import org.clueminer.clustering.aggl.HACLW; +import org.clueminer.clustering.aggl.HACLWMS; +import org.clueminer.clustering.algorithm.HCL; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.clustering.benchmark.Bench; +import org.clueminer.clustering.benchmark.BenchParams; +import org.clueminer.clustering.benchmark.Experiment; + +/** + * + * @author deric + */ +public class Hclust extends Bench { + + protected static Hclust instance; + + /** + * @param args the command line arguments + */ + @Override + public void main(String[] args) { + BenchParams params = parseArguments(args); + setupLogging(params); + + benchmarkFolder = params.home + File.separatorChar + "benchmark" + File.separatorChar + "hclust"; + ensureFolder(benchmarkFolder); + + System.out.println("# n = " + params.n); + System.out.println("=== starting experiment:"); + AgglomerativeClustering[] algorithms = new AgglomerativeClustering[]{new HCL(), new HAC(), new HACLW(), new HACLWMS()}; + Experiment exp = new Experiment(params, benchmarkFolder, algorithms); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } + + protected static BenchParams parseArguments(String[] args) { + BenchParams params = new BenchParams(); + JCommander cmd = new JCommander(params); + printUsage(args, cmd, params); + return params; + } + + public static void printUsage(String[] args, JCommander cmd, BenchParams params) { + /* if (args.length == 0) { StringBuilder sb = new StringBuilder(); + cmd.usage(sb); + sb.append("\n").append("attributes marked with * are mandatory"); + System.out.println(sb); + System.err.println("missing mandatory arguments"); + System.exit(0); + }*/ + try { + cmd.parse(args); + /** + * TODO validate values of parameters + */ + if (params.n <= 0 || params.dimension <= 0) { + throw new ParameterException("invalid data dimensions " + params.n + " x " + params.dimension); + } + + if (params.steps <= 0) { + throw new ParameterException("invalid steps size " + params.steps); + } + + if (params.nSmall == params.n) { + throw new ParameterException("n can't be same as n-small! " + params.nSmall); + } + + } catch (ParameterException ex) { + System.out.println(ex.getMessage()); + cmd.usage(); + System.exit(0); + } + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGen.java b/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGen.java new file mode 100644 index 0000000..d854ff8 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGen.java @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.gen; + +import com.beust.jcommander.JCommander; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.factory.InternalEvaluatorFactory; +import org.clueminer.clustering.benchmark.Bench; +import static org.clueminer.clustering.benchmark.Bench.ensureFolder; +import static org.clueminer.clustering.benchmark.Bench.printUsage; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; + +/** + * + * @author deric + */ +public class NsgaGen extends Bench { + + public static final String name = "nsga-gen"; + + protected static NsgaGenParams parseArguments(String[] args) { + NsgaGenParams params = new NsgaGenParams(); + JCommander cmd = new JCommander(params); + printUsage(args, cmd, params); + return params; + } + + @Override + public void main(String[] args) { + NsgaGenParams params = parseArguments(args); + if (params.dataset != null) { + load(params.dataset); + } else { + loadDatasets(); + } + setupLogging(params); + + int i = 0; + for (Map.Entry, Integer>> e : availableDatasets.entrySet()) { + System.out.println((i++) + ":" + e.getKey()); + } + + benchmarkFolder = params.home + '/' + "benchmark" + '/' + name; + ensureFolder(benchmarkFolder); + System.out.println("writing results to: " + benchmarkFolder); + + System.out.println("=== starting " + name); + InternalEvaluatorFactory factory = InternalEvaluatorFactory.getInstance(); + ClusterEvaluation c1 = factory.getProvider(params.c1); + ClusterEvaluation c2 = factory.getProvider(params.c2); + NsgaGenExp exp = new NsgaGenExp(params, benchmarkFolder, c1, c2, availableDatasets); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenExp.java b/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenExp.java new file mode 100644 index 0000000..3ef26a6 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenExp.java @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.gen; + +import com.google.common.base.Supplier; +import com.google.common.collect.Maps; +import com.google.common.collect.Table; +import com.google.common.collect.Tables; +import java.io.File; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.factory.EvaluationFactory; +import static org.clueminer.clustering.benchmark.Bench.safeName; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.benchmark.ConsoleDump; +import org.clueminer.dataset.benchmark.GnuplotMO; +import org.clueminer.dataset.benchmark.ResultsCollector; +import org.clueminer.evolution.mo.MoEvolution; +import org.openide.util.Exceptions; + +/** + * + * @author deric + */ +public class NsgaGenExp implements Runnable { + + private static ResultsCollector rc; + private NsgaGenParams params; + private String benchmarkFolder; + private HashMap, Integer>> datasets; + //table for keeping results from experiments + private Table table; + private ClusterEvaluation c1; + private ClusterEvaluation c2; + private static final Logger logger = Logger.getLogger(NsgaGenExp.class.getName()); + + public NsgaGenExp(NsgaGenParams params, String benchmarkFolder, ClusterEvaluation c1, ClusterEvaluation c2, HashMap, Integer>> availableDatasets) { + this.params = params; + this.benchmarkFolder = benchmarkFolder; + this.c1 = c1; + this.c2 = c2; + this.datasets = availableDatasets; + + createTable(); + rc = new ResultsCollector(table); + } + + private void createTable() { + table = Tables.newCustomTable( + Maps.>newHashMap(), + new Supplier>() { + @Override + public Map get() { + return Maps.newHashMap(); + } + }); + } + + @Override + public void run() { + try { + MoEvolution evolution = new MoEvolution(); + + evolution.setPopulationSize(params.population); + evolution.setNumSolutions(params.solutions); + evolution.setExternal(EvaluationFactory.getInstance().getProvider("Jaccard")); + evolution.setMutationProbability(params.mutation); + evolution.setCrossoverProbability(params.crossover); + + GnuplotMO gw = new GnuplotMO(); + //gw.setCustomTitle("cutoff=" + evolution.getDefaultParam(AgglParams.CUTOFF_STRATEGY) + "(" + evolution.getDefaultParam(AgglParams.CUTOFF_SCORE) + ")"); + //collect data from evolution + evolution.addEvolutionListener(new ConsoleDump()); + evolution.addMOEvolutionListener(gw); + evolution.addMOEvolutionListener(rc); + evolution.addObjective(c1); + evolution.addObjective(c2); + + int[] generations = new int[]{1, 10, 50, 100, 1000}; + + String name; + String folder; + logger.log(Level.INFO, "datasets size: {0}", datasets.size()); + for (Map.Entry, Integer>> e : datasets.entrySet()) { + Dataset d = e.getValue().getKey(); + name = safeName(d.getName()); + folder = benchmarkFolder + File.separatorChar + name; + gw.mkdir(folder); + String csvRes = folder + File.separatorChar + "_" + name + ".csv"; + logger.log(Level.INFO, "dataset: {0} size: {1} num attr: {2}", new Object[]{name, d.size(), d.attributeCount()}); + //ensureFolder(benchmarkFolder + File.separatorChar + name); + + evolution.setDataset(d); + + for (int i = 0; i < generations.length; i++) { + int g = generations[i]; + evolution.setGenerations(g); + gw.setCurrentDir(benchmarkFolder, name + "-" + g); + //for (int k = 0; k < params.repeat; k++) { + // logger.log(Level.INFO, "run {0}: {1} & {2}", new Object[]{k, c1.getName(), c2.getName()}); + evolution.run(); + rc.writeToCsv(csvRes); + //} + evolution.fireFinishedBatch(); + } + createTable(); + } + } catch (Exception e) { + Exceptions.printStackTrace(e); + } + } + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenParams.java b/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenParams.java new file mode 100644 index 0000000..29aac13 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/gen/NsgaGenParams.java @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.gen; + +import com.beust.jcommander.Parameter; +import org.clueminer.clustering.benchmark.AbsParams; + +/** + * + * @author deric + */ +public class NsgaGenParams extends AbsParams { + + @Parameter(names = "--population", description = "size of population in each generation") + public int population = 20; + + @Parameter(names = "--solutions", description = "number of final solutions which will be returned as result") + public int solutions = 10; + + @Parameter(names = "--mutation", description = "probability of mutation") + public double mutation = 0.5; + + @Parameter(names = "--crossover", description = "probability of crossover") + public double crossover = 0.5; + + @Parameter(names = "--dataset", description = "use specific dataset") + public String dataset = null; + + @Parameter(names = "--c1", description = "criterion 1") + public String c1 = "Davies-Bouldin"; + + @Parameter(names = "--c2", description = "criterion 2") + public String c2 = "AIC"; + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaExp.java b/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaExp.java new file mode 100644 index 0000000..63fa38e --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaExp.java @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.nsga; + +import com.google.common.base.Supplier; +import com.google.common.collect.Maps; +import com.google.common.collect.Table; +import com.google.common.collect.Tables; +import java.io.File; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.factory.EvaluationFactory; +import static org.clueminer.clustering.benchmark.Bench.safeName; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.dataset.benchmark.ConsoleDump; +import org.clueminer.dataset.benchmark.GnuplotMO; +import org.clueminer.dataset.benchmark.ResultsCollector; +import org.clueminer.evolution.mo.MoEvolution; +import org.openide.util.Exceptions; + +/** + * + * @author Tomas Barton + */ +public class NsgaExp implements Runnable { + + private static ResultsCollector rc; + private NsgaParams params; + private String benchmarkFolder; + private ClusterEvaluation[] scores; + private HashMap, Integer>> datasets; + //table for keeping results from experiments + private Table table; + private static final Logger logger = Logger.getLogger(NsgaExp.class.getName()); + + public NsgaExp(NsgaParams params, String benchmarkFolder, ClusterEvaluation[] scores, HashMap, Integer>> availableDatasets) { + this.params = params; + this.benchmarkFolder = benchmarkFolder; + this.scores = scores; + this.datasets = availableDatasets; + + createTable(); + rc = new ResultsCollector(table); + } + + @Override + public void run() { + try { + MoEvolution evolution = new MoEvolution(); + evolution.setGenerations(params.generations); + evolution.setPopulationSize(params.population); + evolution.setNumSolutions(params.solutions); + evolution.setExternal(EvaluationFactory.getInstance().getProvider(params.supervised)); + evolution.setMutationProbability(params.mutation); + evolution.setCrossoverProbability(params.crossover); + evolution.setkLimit(params.limitK); + ClusterEvaluation c1, c2; + + GnuplotMO gw = new GnuplotMO(); + //gw.setCustomTitle("cutoff=" + evolution.getDefaultParam(AgglParams.CUTOFF_STRATEGY) + "(" + evolution.getDefaultParam(AgglParams.CUTOFF_SCORE) + ")"); + //collect data from evolution + evolution.addEvolutionListener(new ConsoleDump()); + evolution.addMOEvolutionListener(gw); + evolution.addMOEvolutionListener(rc); + + String name; + logger.log(Level.INFO, "datasets size: {0}", datasets.size()); + for (Map.Entry, Integer>> e : datasets.entrySet()) { + Dataset d = e.getValue().getKey(); + name = safeName(d.getName()); + String csvRes = benchmarkFolder + File.separatorChar + name + File.separatorChar + "_" + name + ".csv"; + logger.log(Level.INFO, "dataset: {0} size: {1} num attr: {2}", new Object[]{name, d.size(), d.attributeCount()}); + //ensureFolder(benchmarkFolder + File.separatorChar + name); + + gw.setCurrentDir(benchmarkFolder, name); + + evolution.setDataset(d); + + for (int i = 0; i < scores.length; i++) { + c1 = scores[i]; + //lower triangular matrix without diagonal + //(doesn't matter which criterion is first, we want to try + //all combinations) + for (int j = 0; j < i; j++) { + c2 = scores[j]; + evolution.clearObjectives(); + evolution.addObjective(c1); + evolution.addObjective(c2); + //run! + for (int k = 0; k < params.repeat; k++) { + logger.log(Level.INFO, "run {0}: {1} & {2}", new Object[]{k, c1.getName(), c2.getName()}); + evolution.run(); + rc.writeToCsv(csvRes); + } + evolution.fireFinishedBatch(); + logger.log(Level.INFO, "finished {0} & {1}", new Object[]{c1.getName(), c2.getName()}); + } + } + createTable(); + } + } catch (Exception e) { + Exceptions.printStackTrace(e); + } + } + + private void createTable() { + table = Tables.newCustomTable( + Maps.>newHashMap(), + new Supplier>() { + @Override + public Map get() { + return Maps.newHashMap(); + } + }); + } +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaParams.java b/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaParams.java new file mode 100644 index 0000000..eb2e678 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaParams.java @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.nsga; + +import com.beust.jcommander.Parameter; +import org.clueminer.clustering.benchmark.AbsParams; + +/** + * + * @author Tomas Barton + */ +public class NsgaParams extends AbsParams { + + @Parameter(names = "--test", description = "test only on one dataset") + public boolean test = false; + + @Parameter(names = "--generations", description = "number of generations in evolution") + public int generations = 10; + + @Parameter(names = "--population", description = "size of population in each generation") + public int population = 20; + + @Parameter(names = "--solutions", description = "number of final solutions which will be returned as result") + public int solutions = 10; + + @Parameter(names = "--supervised", description = "supervised criterion for external evaluation") + public String supervised = "Adjusted Rand"; + + @Parameter(names = "--mutation", description = "probability of mutation") + public double mutation = 0.5; + + @Parameter(names = "--crossover", description = "probability of crossover") + public double crossover = 0.5; + + @Parameter(names = "--dataset", description = "use specific dataset") + public String dataset = null; + + @Parameter(names = "--limit-k", description = "limit max. clusterings size to sqrt(n)") + public boolean limitK = false; + +} diff --git a/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaScore.java b/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaScore.java new file mode 100644 index 0000000..35b8167 --- /dev/null +++ b/src/main/java/org/clueminer/clustering/benchmark/nsga/NsgaScore.java @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2015 clueminer.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.clueminer.clustering.benchmark.nsga; + +import com.beust.jcommander.JCommander; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import org.clueminer.clustering.api.ClusterEvaluation; +import org.clueminer.clustering.api.InternalEvaluator; +import org.clueminer.clustering.api.factory.InternalEvaluatorFactory; +import org.clueminer.clustering.benchmark.Bench; +import static org.clueminer.clustering.benchmark.Bench.ensureFolder; +import static org.clueminer.clustering.benchmark.Bench.printUsage; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; + +/** + * + * @author Tomas Barton + */ +public class NsgaScore extends Bench { + + public static final String name = "nsga-scores"; + + protected static NsgaParams parseArguments(String[] args) { + NsgaParams params = new NsgaParams(); + JCommander cmd = new JCommander(params); + printUsage(args, cmd, params); + return params; + } + + @Override + public void main(String[] args) { + NsgaParams params = parseArguments(args); + if (params.test || params.dataset != null) { + if (params.test) { + load("iris"); + } else { + load(params.dataset); + } + } else { + loadDatasets(); + } + System.out.println("loaded dataset"); + setupLogging(params); + + int i = 0; + for (Map.Entry, Integer>> e : availableDatasets.entrySet()) { + System.out.println((i++) + ":" + e.getKey()); + } + + benchmarkFolder = params.home + '/' + "benchmark" + '/' + name; + ensureFolder(benchmarkFolder); + System.out.println("writing results to: " + benchmarkFolder); + + System.out.println("=== starting " + name); + List eval = InternalEvaluatorFactory.getInstance().getAll(); + ClusterEvaluation[] scores = eval.toArray(new ClusterEvaluation[eval.size()]); + System.out.println("scores size: " + scores.length); + NsgaExp exp = new NsgaExp(params, benchmarkFolder, scores, availableDatasets); + ExecutorService execService = Executors.newFixedThreadPool(1); + execService.submit(exp); + execService.shutdown(); + } +} diff --git a/src/main/nbm/manifest.mf b/src/main/nbm/manifest.mf new file mode 100644 index 0000000..9b5d283 --- /dev/null +++ b/src/main/nbm/manifest.mf @@ -0,0 +1,2 @@ +Manifest-Version: 1.0 +OpenIDE-Module-Localizing-Bundle: org/clueminer/clustering/benchmark/Bundle.properties diff --git a/src/main/resources/org/clueminer/clustering/benchmark/Bundle.properties b/src/main/resources/org/clueminer/clustering/benchmark/Bundle.properties new file mode 100644 index 0000000..e999176 --- /dev/null +++ b/src/main/resources/org/clueminer/clustering/benchmark/Bundle.properties @@ -0,0 +1,5 @@ +# Localized module labels. Defaults taken from POM (, , ) if unset. +#OpenIDE-Module-Name= +#OpenIDE-Module-Short-Description= +#OpenIDE-Module-Long-Description= +#OpenIDE-Module-Display-Category= diff --git a/src/test/java/org/clueminer/clustering/benchmark/ExperimentTest.java b/src/test/java/org/clueminer/clustering/benchmark/ExperimentTest.java new file mode 100644 index 0000000..8838e09 --- /dev/null +++ b/src/test/java/org/clueminer/clustering/benchmark/ExperimentTest.java @@ -0,0 +1,50 @@ +package org.clueminer.clustering.benchmark; + +import org.clueminer.clustering.aggl.HAC; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.math.Matrix; +import org.junit.After; +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.Test; + +/** + * + * @author deric + */ +public class ExperimentTest { + + private Experiment subject; + + public ExperimentTest() { + } + + @Before + public void setUp() { + } + + @After + public void tearDown() { + } + + @Test + public void testRun() { + } + + @Test + public void testGenerateData() { + BenchParams params = new BenchParams(); + params.n = 15; + subject = new Experiment(params, null, new AgglomerativeClustering[]{new HAC()}); + Dataset data = subject.generateData(params.n, params.dimension); + assertEquals(params.n, data.size()); + assertEquals(params.dimension, data.attributeCount()); + Matrix m = data.asMatrix(); + assertEquals(params.n, m.rowsCount()); + assertEquals(params.dimension, m.columnsCount()); + m.print(5, 2); + } + +} diff --git a/src/test/java/org/clueminer/clustering/benchmark/HclustBenchmarkTest.java b/src/test/java/org/clueminer/clustering/benchmark/HclustBenchmarkTest.java new file mode 100644 index 0000000..916b9dd --- /dev/null +++ b/src/test/java/org/clueminer/clustering/benchmark/HclustBenchmarkTest.java @@ -0,0 +1,128 @@ +package org.clueminer.clustering.benchmark; + +import java.util.logging.ConsoleHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.clueminer.clustering.aggl.HAC; +import org.clueminer.clustering.aggl.HACLW; +import org.clueminer.clustering.aggl.HACLWMS; +import org.clueminer.clustering.api.AgglomerativeClustering; +import org.clueminer.dataset.api.Dataset; +import org.clueminer.dataset.api.Instance; +import org.clueminer.fixtures.clustering.FakeDatasets; +import org.clueminer.clustering.aggl.linkage.AverageLinkage; +import org.clueminer.clustering.aggl.linkage.MedianLinkage; +import org.clueminer.clustering.aggl.linkage.SingleLinkage; +import org.clueminer.report.NanoBench; +import static org.junit.Assert.assertEquals; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * @author deric + */ +public class HclustBenchmarkTest { + + private final AgglomerativeClustering[] algorithms; + + public HclustBenchmarkTest() { + //algorithms = new AgglomerativeClustering[]{new HAC(), new HACLW(), new HCL(), new HACLWMS()}; + algorithms = new AgglomerativeClustering[]{new HAC(), new HACLW()}; + } + + @BeforeClass + public static void setUp() { + Logger logger = NanoBench.getLogger(); + logger.setUseParentHandlers(false); + logger.setLevel(Level.INFO); + logger.addHandler(new ConsoleHandler()); + } + + @Test + public void testSingleLinkage() { + Dataset dataset = FakeDatasets.irisDataset(); + for (AgglomerativeClustering alg : algorithms) { + NanoBench.create().measurements(2).cpuAndMemory().measure( + alg.getName() + " single link - " + dataset.getName(), + new HclustBenchmark().singleLinkage(alg, dataset) + ); + } + } + + @Test + public void testCompleteLinkage() { + Dataset dataset = FakeDatasets.irisDataset(); + for (AgglomerativeClustering alg : algorithms) { + NanoBench.create().cpuAndMemory().measurements(2).measure( + alg.getName() + " complete link - " + dataset.getName(), + new HclustBenchmark().completeLinkage(alg, dataset) + ); + } + } + + @Test + public void testSingleLinkageSameResultTwoAlg() { + //Dataset dataset = FakeDatasets.schoolData(); + Dataset dataset = FakeDatasets.kumarData(); + //use one algorithm as reference one + AgglomerativeClustering alg1 = new HAC(); + Container ref = new HclustBenchmark().completeLinkage(alg1, dataset); + ref.run(); + Container other; + + AgglomerativeClustering alg2 = new HACLW(); + other = new HclustBenchmark().completeLinkage(alg2, dataset); + other.run(); + System.out.println("comparing " + algorithms[0].getName() + " vs " + alg2.getName()); + assertEquals(true, ref.equals(other)); + + } + + /** + * TODO: single linkage gives different results + */ + //@Test + public void testSingleLinkageSameResult() { + //Dataset dataset = FakeDatasets.schoolData(); + Dataset dataset = FakeDatasets.kumarData(); + String linkage = SingleLinkage.name; + compareTreeResults(dataset, linkage, algorithms); + } + + @Test + public void testAverageLinkageResult() { + String linkage = AverageLinkage.name; + Dataset dataset = FakeDatasets.schoolData(); + + compareTreeResults(dataset, linkage, algorithms); + } + + /** + * TODO: median (centroid) linkage is broken + */ + //@Test + public void testMedianLinkageResult() { + String linkage = MedianLinkage.name; + Dataset dataset = FakeDatasets.schoolData(); + + compareTreeResults(dataset, linkage, new AgglomerativeClustering[]{new HAC(), new HACLW()}); + } + + private void compareTreeResults(Dataset dataset, String linkage, AgglomerativeClustering[] algs) { + //use one algorithm as reference one + Container ref = new HclustBenchmark().hclust(algs[0], dataset, linkage); + ref.run(); + Container other; + + //compare result to others + for (int i = 1; i < algs.length; i++) { + AgglomerativeClustering algorithm = algs[i]; + other = new HclustBenchmark().hclust(algorithm, dataset, linkage); + other.run(); + System.out.println("comparing " + algs[0].getName() + " vs " + algorithm.getName() + " linkage: " + linkage); + assertEquals(true, ref.equals(other)); + } + } + +}