Skip to content

Commit 5eed1aa

Browse files
committed
Ignoring .group files
1 parent 83716e1 commit 5eed1aa

7 files changed

+41
-28
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ data/*
99

1010
project/
1111
.DS_Store
12-
target/
12+
target/
13+
*.group

src/main/scala/CandidateGenerator.scala

+9-3
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,20 @@ class CandidateGenerator(dataset: Dataset, backgroundData: BackgroundData) {
4747
: mutable.LinkedHashMap[String, List[String]] =
4848
mutable.LinkedHashMap.empty
4949

50-
def generateCandidates(synthetic: Int = 0) = {
50+
def generateCandidates(synthetic: Int = 0): Unit = {
5151
candidateHash.clear()
5252
synthethicCandidateHash.clear()
5353

54-
println("Now generating candidates: ")
54+
println(s"Now generating candidates for ${dataset.filename}, synthetic: $synthetic: ")
5555
val candidateFile = new File(
5656
candidate_file.replace(".txt", s"_${synthetic}.txt"))
57-
if (!candidateFile.exists()) candidateFile.createNewFile()
57+
if (!candidateFile.exists()) {
58+
candidateFile.createNewFile()
59+
}
60+
else {
61+
println(s"Candidates already exists in ${candidateFile.getName}. Exiting.")
62+
return
63+
}
5864

5965
val bufferedWriter: BufferedWriter = new BufferedWriter(
6066
new FileWriter(candidateFile))

src/main/scala/Dataset.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ object Dataset {
33
val TEST = "test.txt"
44
val BACKGROUND = "background.txt"
55
val VALIDATION = "validation.txt"
6-
val TRAINING = "training.xt"
6+
val TRAINING = "train.txt"
77
}
88
class Dataset(val filename: String) {
99
def queries = Source.fromFile(filename).getLines.drop(0)

src/main/scala/Feature.scala

+2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ object Feature {
6262
currentGroupId += 1
6363
currentGroupSize = 0
6464
}
65+
66+
currentGroupSize += 1
6567
val builder = new StringBuilder()
6668
builder.append(feature.relevant + " ")
6769
builder.append(s"qid:${currentGroupId} ")

src/main/scala/FeatureGenerator.scala

+8-2
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,19 @@ import scala.io.Source
55
class FeatureGenerator(filename: String) {
66

77
private val writeFilename = filename.replace(".txt", "_features.txt")
8-
private val writeGroupname = filename.replace(".txt", "_groups.txt")
8+
private val writeGroupname = filename.replace(".txt", "_features.txt.group")
99

1010
def generateAndWriteFeatures(): Unit = {
1111
val featureFile = new File(writeFilename)
1212
val groupFile = new File(writeGroupname)
1313

14-
if (!featureFile.exists()) featureFile.createNewFile()
14+
println("Now generating features.")
15+
if (!featureFile.exists()) {
16+
featureFile.createNewFile()
17+
} else {
18+
println(s"Features already exist for ${featureFile.getName}.")
19+
return
20+
}
1521
if (!groupFile.exists()) groupFile.createNewFile()
1622

1723
var i = 0

src/main/scala/LambdaMart.scala

+3-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import java.io.File
33
import ml.dmlc.xgboost4j.scala.{DMatrix, XGBoost}
44

55
import scala.collection.mutable
6+
import scala.io.Source
67

78
object LambdaMart {
89

@@ -13,7 +14,6 @@ object LambdaMart {
1314
val validateM = new DMatrix(validationFile)
1415

1516
val watches = new mutable.HashMap[String, DMatrix]
16-
watches += "train" -> trainM
1717
watches += "validation" -> validateM
1818

1919
val params = new mutable.HashMap[String, Any]()
@@ -26,6 +26,7 @@ object LambdaMart {
2626

2727
val round = 4
2828
// train a model
29+
println("Now training the model!")
2930
val booster = XGBoost.train(trainM, params.toMap, round, watches.toMap)
3031

3132
// predict
@@ -34,11 +35,8 @@ object LambdaMart {
3435
if (!file.exists()) {
3536
file.mkdirs()
3637
}
37-
booster.saveModel(file.getAbsolutePath + "/xgb.model")
38-
// dump model with feature map
39-
val modelInfos =
40-
booster.getModelDump(file.getAbsolutePath + "/featmap.txt", false)
4138

39+
booster.saveModel(file.getAbsolutePath + s"/$modelName")
4240
}
4341

4442
def evaluateModel(pathModel: String, pathTest: String) = {

src/main/scala/Main.scala

+16-16
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,26 @@ object Main {
44

55
def main(args: Array[String]): Unit = {
66
// Generate all candidates. Note: this takes quite a while.
7-
genAllCandidates(Dataset.TEST)
8-
// genAllCandidates(Dataset.VALIDATION)
9-
// genAllCandidates(Dataset.TRAINING)
7+
// genAllCandidates(Dataset.TEST)
8+
// genAllCandidates(Dataset.VALIDATION)
9+
// genAllCandidates(Dataset.TRAINING)
1010

1111

1212
// Compute MPC for the test candidates WITHOUT synthetic candidates. We use this as baseline.
13-
val mpc = new MPC("test_candidates_0.txt")
14-
mpc.computeMPC()
13+
// val mpc = new MPC("test_candidates_0.txt")
14+
// mpc.computeMPC()
1515

1616
// This probably takes a bit of time.
17-
genAllFeatures(Dataset.TEST)
17+
// genAllFeatures(Dataset.TEST)
1818
// genAllFeatures(Dataset.TRAINING)
1919
// genAllFeatures(Dataset.VALIDATION)
2020

2121

22-
//
23-
// val lambda = LambdaMart.trainModel("train_candidates_0_features.txt",
24-
// "validation_candidates_0_features.txt",
25-
// "model.txt")
26-
//
22+
23+
val lambda = LambdaMart.trainModel("train_candidates_0_features.txt",
24+
"validation_candidates_0_features.txt",
25+
"no_ngrams_0_synthetic")
26+
2727
// LambdaMart.evaluateModel("./model/xgb.model",
2828
// "test_candidates_0_features.txt")
2929

@@ -42,11 +42,11 @@ object Main {
4242
val featureGenerator = new FeatureGenerator(s"${file.replace(".txt", "")}_candidates_0.txt")
4343
featureGenerator.generateAndWriteFeatures()
4444

45-
val featureGenerator2 = new FeatureGenerator(s"${file.replace(".txt", "")}_candidates_1.txt")
46-
featureGenerator2.generateAndWriteFeatures()
47-
48-
val featureGenerator3 = new FeatureGenerator(s"${file.replace(".txt", "")}_candidates_2.txt")
49-
featureGenerator3.generateAndWriteFeatures()
45+
// val featureGenerator2 = new FeatureGenerator(s"${file.replace(".txt", "")}_candidates_1.txt")
46+
// featureGenerator2.generateAndWriteFeatures()
47+
//
48+
// val featureGenerator3 = new FeatureGenerator(s"${file.replace(".txt", "")}_candidates_2.txt")
49+
// featureGenerator3.generateAndWriteFeatures()
5050
}
5151

5252
}

0 commit comments

Comments
 (0)