Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#2 added CvsImageDataProcessor, refactored ParallelSolrIndexer #4

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -23,3 +23,5 @@ classes/
/.project
/.settings
/dist/
/*.csv
/*_out.xml
6 changes: 3 additions & 3 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#Thu Apr 20 17:07:03 CEST 2017
buildNumber=161
#Wed Apr 26 18:36:11 CEST 2017
versionString=6.4.0_b01
buildDate=2017-04-20-1707
buildNumber=162
buildDate=2017-04-26-1836
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package net.semanticmetadata.lire.indexers.parallel;

import java.util.List;

import net.semanticmetadata.lire.imageanalysis.features.LocalFeature;
import net.semanticmetadata.lire.solr.indexing.ImageDataProcessor;

public class SolrWorkItem extends WorkItem {

ImageDataProcessor dataProcessor;

public ImageDataProcessor getImageDataProcessor() {
return dataProcessor;
}

public SolrWorkItem(ImageDataProcessor dataProcessor, byte[] buffer) {
super(null, buffer);
this.dataProcessor = dataProcessor;
setFileName(dataProcessor.getFilePath());
}

public SolrWorkItem(ImageDataProcessor dataProcessor, List<? extends LocalFeature> listOfFeatures) {
super(null, listOfFeatures);
this.dataProcessor = dataProcessor;
setFileName(dataProcessor.getFilePath());
}

}
Original file line number Diff line number Diff line change
@@ -25,6 +25,10 @@ public static void init() {
MetricSpaces.loadReferencePoints(new GZIPInputStream(classloader.getResourceAsStream("metricspaces/logos-ca-ee_OpponentHistogram.msd.gz")));
MetricSpaces.loadReferencePoints(new GZIPInputStream(classloader.getResourceAsStream("metricspaces/logos-ca-ee_PHOG.msd.gz")));
MetricSpaces.loadReferencePoints(new GZIPInputStream(classloader.getResourceAsStream("metricspaces/jpg_us_filter_JCD.msd.gz")));

// TODO: Scalable color? replaced by JCD, but could still be usefull


} catch (Exception e) {
e.printStackTrace();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package net.semanticmetadata.lire.solr.indexing;

public abstract class AbstractImageDataProcessor implements ImageDataProcessor{

String imageData;

@Override
public String getAdditionalFields() {
return "";
}

@Override
public String getFilePath() {
return getImageData();
}

@Override
public void setImageData(String imageData) {
this.imageData = imageData;
}

@Override
public String getImageData() {
return imageData;
}

@Override
public void appendSolrFields(StringBuilder sb) {
sb.append("<field name=\"id\">");
sb.append(getIdentifier());
sb.append("</field>");

sb.append("<field name=\"title\">");
sb.append(getTitle());
sb.append("</field>");

sb.append(getAdditionalFields());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package net.semanticmetadata.lire.solr.indexing;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;

public class CsvImageDataProcessor extends AbstractImageDataProcessor {

public static int POS_FILE_PATH = 0;
public static int POS_ID = 1;
public static int POS_IMAGE_TITLE = 2;
//additional fields: e.g. web url for image
public static int POS_ADDITIONAL_FIELDS = 3;
public static final String CSV_SEPARATOR = ";";
public static String POS_ADDITIONAL_FIELD_IMAGE_URL = "imgurl";


String[] imageDataValues;

/**
* pre-processes the provided image data
* @param csvRow
* @return
*/
protected boolean preprocessData(){
if(getImageData() == null)
return false;

//allow empty strings as values in the CSV file
this.imageDataValues = StringUtils.splitByWholeSeparatorPreserveAllTokens(
getImageData(), CSV_SEPARATOR, 4);

return true;
}

public void setImageData(String imageData) {
super.setImageData(imageData);
preprocessData();
}

public String[] getImageDataValues() {
return imageDataValues;
}

void setImageDataValues(String[] imageDataValues) {
this.imageDataValues = imageDataValues;
}

@Override
public String getFilePath() {
if(getImageDataValues() == null || getImageDataValues().length <= POS_FILE_PATH)
return null;
return getImageDataValues()[POS_FILE_PATH];
}

@Override
public String getIdentifier() {
if(getImageDataValues() == null || getImageDataValues().length <= POS_ID)
return null;
return getImageDataValues()[POS_ID];
}

@Override
public String getTitle() {
if(getImageDataValues() == null || getImageDataValues().length <= POS_IMAGE_TITLE)
return null;

return StringEscapeUtils.escapeXml(getImageDataValues()[POS_IMAGE_TITLE]);
}

@Override
public String getAdditionalFields() {
if(getImageDataValues() == null || getImageDataValues().length <= POS_ADDITIONAL_FIELDS)
return null;

return "<field name=\""+ POS_ADDITIONAL_FIELD_IMAGE_URL +"\">"
+ StringEscapeUtils.escapeXml(getImageDataValues()[POS_ADDITIONAL_FIELDS]) + "</field>";
}


}
Original file line number Diff line number Diff line change
@@ -6,7 +6,11 @@
* @author Mathias Lux, [email protected], 08.12.2014
*/
public interface ImageDataProcessor {
public CharSequence getTitle(String filename);
public CharSequence getIdentifier(String filename);
public CharSequence getAdditionalFields(String filename);
public String getTitle();
public String getIdentifier();
public String getAdditionalFields();
public String getFilePath();
public void setImageData(String imageData);
public String getImageData();
public void appendSolrFields(StringBuilder sb);
}
Original file line number Diff line number Diff line change
@@ -5,19 +5,15 @@
*
* @author Mathias Lux, [email protected], 08.12.2014
*/
public class LogoCaImageDataProcessor implements ImageDataProcessor {
@Override
public CharSequence getTitle(String filename) {
return filename.substring(filename.lastIndexOf("converted-")+"converted-".length()).replaceAll("\\\\", "/");
}

@Override
public CharSequence getIdentifier(String filename) {
return filename.substring(filename.lastIndexOf("converted-")+"converted-".length()).replaceAll("\\\\", "/");
public class LogoCaImageDataProcessor extends AbstractImageDataProcessor {

@Override
public String getTitle() {
return getFilePath().substring(getFilePath().lastIndexOf("converted-")+"converted-".length()).replaceAll("\\\\", "/");
}

@Override
public CharSequence getAdditionalFields(String filename) {
return "";
public String getIdentifier() {
return getFilePath().substring(getFilePath().lastIndexOf("converted-")+"converted-".length()).replaceAll("\\\\", "/");
}
}
Original file line number Diff line number Diff line change
@@ -10,23 +10,23 @@
*
* @author Mathias Lux, [email protected], 08.12.2014
*/
public class MirFlickrImageDataProcessor implements ImageDataProcessor {
public class MirFlickrImageDataProcessor extends AbstractImageDataProcessor {
@Override
public CharSequence getTitle(String filename) {
public String getTitle() {
// return filename.replace("G:\\", "").replaceAll("\\\\", "/");
return filename.replace("D:\\DataSets\\MirFlickr\\", "").replaceAll("\\\\", "/");
return getFilePath().replace("D:\\DataSets\\MirFlickr\\", "").replaceAll("\\\\", "/");
}

@Override
public CharSequence getIdentifier(String filename) {
public String getIdentifier() {
// return filename.replace("G:\\", "").replaceAll("\\\\", "/");
return filename.replace("D:\\DataSets\\MirFlickr\\", "").replaceAll("\\\\", "/");
return getFilePath().replace("D:\\DataSets\\MirFlickr\\", "").replaceAll("\\\\", "/");
}

@Override
public CharSequence getAdditionalFields(String filename) {
public String getAdditionalFields() {
StringBuilder sb = new StringBuilder(1024);
int fileNumber = Integer.parseInt(filename.substring(filename.lastIndexOf("\\")).replaceAll("[^0-9]", "")) - 1;
int fileNumber = Integer.parseInt(getFilePath().substring(getFilePath().lastIndexOf("\\")).replaceAll("[^0-9]", "")) - 1;
String tagFileName = "D:\\DataSets\\MirFlickr\\tags_raw\\" + (fileNumber/10000) + "\\" + fileNumber + ".txt";
// String tagFileName = filename.substring(0, filename.lastIndexOf("\\")) + "\\" + fileNumber + ".txt";
try {
@@ -39,6 +39,7 @@ public CharSequence getAdditionalFields(String filename) {
e.printStackTrace();
return "";
}
return sb;
return sb.toString();
}

}
Loading