Skip to content

Commit

Permalink
GH-15572: Check if headers were set before setting them to null in si…
Browse files Browse the repository at this point in the history
…ngle entry ARFF parsing [nocheck] (#15589)

* Do not let xgboost loading exception to go to void (#15560)

* Check if headers were set before setting them to null in single entry ARFF parsing

---------

Co-authored-by: Adam Valenta <[email protected]>
  • Loading branch information
tomasfryda and valenad1 authored Jun 20, 2023
1 parent 7bef068 commit e6ca341
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 1 deletion.
2 changes: 1 addition & 1 deletion h2o-core/src/main/java/water/parser/ARFFParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static ParseSetup guessSetup(ByteVec bv, byte[] bits, byte sep, boolean singleQu
}
data[0] = determineTokens(datalines[0], sep, singleQuotes, escapechar);
ncols = (ncols > 0) ? ncols : data[0].length;
labels = null;
labels = labels[0] == null ? null : labels;
} else { // 2 or more lines
if (sep == GUESS_SEP) { // first guess the separator
//FIXME if last line is incomplete, this logic fails
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ private boolean initXgboost() {
return true;
} catch (IOException e) {
// Ups no lib loaded or load failed
LOG.debug("Cause of the xgboost unsuccessful load", e);
LOG.warn("Cannot initialize XGBoost backend! " + XGBOOST_MIN_REQUIREMENTS);
return false;
}
Expand Down
33 changes: 33 additions & 0 deletions h2o-py/tests/testdir_parser/pyunit_parse_single_entry_arff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os
import sys
import tempfile

sys.path.insert(1, "../../")
import h2o
from tests import pyunit_utils


def test_single_entry_arff_file():
with open(pyunit_utils.locate("smalldata/junit/arff/iris.arff"), "r") as input_:
arff = input_.readlines()
data_start = arff.index("@DATA\n")
subsample = arff[:data_start + 2]
print(subsample[-3:])
_fd, tmp = tempfile.mkstemp(".arff")
try:
with open(tmp, "w") as output:
output.write(''.join(subsample))

train = h2o.import_file(pyunit_utils.locate("smalldata/junit/arff/iris.arff"))
test = h2o.import_file(tmp)

print(f"{train.columns} == {test.columns}: {train.columns == test.columns}")
assert train.columns == test.columns
finally:
os.unlink(tmp)


if __name__ == "__main__":
pyunit_utils.standalone_test(test_single_entry_arff_file)
else:
test_single_entry_arff_file()

0 comments on commit e6ca341

Please sign in to comment.