forked from jeongyoonlee/kaggler-template
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Makefile
64 lines (46 loc) · 1.57 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# XXX: competition name
COMPETITION := cat-in-the-dat-ii
# gsed on macOS. sed on LINUX
SED := gsed
# directories
DIR_DATA := input
DIR_BUILD := build
DIR_FEATURE := $(DIR_BUILD)/feature
DIR_METRIC := $(DIR_BUILD)/metric
DIR_MODEL := $(DIR_BUILD)/model
# directories for the cross validation and ensembling
DIR_VAL := $(DIR_BUILD)/val
DIR_TST := $(DIR_BUILD)/tst
DIR_SUB := $(DIR_BUILD)/sub
DIRS := $(DIR_DATA) $(DIR_BUILD) $(DIR_FEATURE) $(DIR_METRIC) $(DIR_MODEL) \
$(DIR_VAL) $(DIR_TST) $(DIR_SUB)
# data files for training and predict
DATA_TRN := $(DIR_DATA)/train.csv
DATA_TST := $(DIR_DATA)/test.csv
SAMPLE_SUBMISSION := $(DIR_DATA)/sample_submission.csv
# XXX: the index of the target column in $(DATA_TRN). First column is 1.
LABEL_IDX = 25
ID_TST := $(DIR_DATA)/id.tst.csv
HEADER := $(DIR_DATA)/header.csv
Y_TRN:= $(DIR_FEATURE)/y.trn.txt
Y_TST:= $(DIR_FEATURE)/y.tst.txt
data: $(DATA_TRN) $(DATA_TST) $(SAMPLE_SUBMISSION)
$(DIRS):
mkdir -p $@
$(DATA_TRN) $(DATA_TST) $(SAMPLE_SUBMISSION): | $(DIR_DATA)
kaggle competitions download -c $(COMPETITION) -p $(DIR_DATA)
find . -name "*.zip" -exec sh -c 'unzip -d `dirname {}` {}' ';'
$(HEADER): $(SAMPLE_SUBMISSION)
head -1 $< > $@
$(ID_TST): $(SAMPLE_SUBMISSION)
cut -d, -f1 $< | tail -n +2 > $@
$(Y_TST): $(SAMPLE_SUBMISSION) | $(DIR_FEATURE)
cut -d, -f2 $< | tail -n +2 > $@
$(Y_TRN): $(DATA_TRN) | $(DIR_FEATURE)
cut -d, -f$(LABEL_IDX) $< | tail -n +2 > $@
# cleanup
clean::
find . -name '*.pyc' -delete
clobber: clean
-rm -rf $(DIR_DATA) $(DIR_BUILD)
.PHONY: clean clobber mac.setup ubuntu.setup apt.setup pip.setup