diff --git a/config_files/run_ml-100k_enriched.yml b/config_files/run_ml-100k_enriched.yml new file mode 100644 index 0000000..99e2187 --- /dev/null +++ b/config_files/run_ml-100k_enriched.yml @@ -0,0 +1,362 @@ +experiment: + dataset: + name: ml-100k + item: # infos related to item dataset (mandatory, at least item_id) + path: datasets/ml-100k/processed/item.csv + extra_features: [movie_year, movie_title] # features(columns) beside item_id to be used + user: # mandatory (at least user_id) + path: datasets/ml-100k/processed/user.csv + extra_features: [gender, occupation] # features beside user_id + ratings: # mandatory (at least [user_id, item_id, rating]) + path: datasets/ml-100k/processed/rating.csv + timestamp: True + enrich: + map_path: datasets/ml-100k/processed/map.csv + enrich_path: datasets/ml-100k/processed/enriched.csv + remove_unmatched: False + properties: [subject, director] + + preprocess: + # - method: filter_by_rating + # parameters: + # threshold: 20 + # - method: binarize + # parameters: + # threshold: 4 + - method: filter_kcore + parameters: + k: 20 + iterations: 1 + target: user # user or rating + + split: + seed: 42 + # test: + # method: random_by_ratio + # level: global + # p: 0.2 + # validation: + # method: random_by_ratio + # level: global + # p: 0.2 + + # test: + # method: timestamp_by_ratio + # level: user + # p: 0.1 + # validation: + # level: user + # method: timestamp_by_ratio + # p: 0.2 + + # test: + # method: fixed_timestamp + # # type: global_level + # timestamp: 890000000 + # validation: + # method: fixed_timestamp + # timestamp: 880000000 + + test: + method: k_fold + k: 5 + level: "user" + + models: + - name: deepwalk_based + config: + save_weights: True + parameters: + walk_len: 10 + p: 1.0 + q: 1.0 + n_walks: 50 + embedding_size: 64 + epochs: 1 + - name: deepwalk_based + config: + parameters: + walk_len: 10 + p: 0.8 + q: 0.6 + n_walks: 50 + embedding_size: 64 + epochs: 1 + - name: complEx + config: + save_weights: True + parameters: + embedding_dim: 100 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + - name: distMult + config: + save_weights: True + parameters: + embedding_dim: 50 + epochs: 5 + seed: 42 + triples: all # only (ratings) or (all) triples for training + - name: rESCAL + config: + save_weights: True + parameters: + embedding_dim: 50 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + - name: rotatE + config: + save_weights: True + parameters: + embedding_dim: 200 + epochs: 5 + seed: 42 + triples: all # only (ratings) or (all) triples for training + - name: transD + config: + save_weights: True + parameters: + embedding_dim: 150 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + - name: transE + config: + save_weights: True + parameters: + embedding_dim: 150 + scoring_fct_norm: 1 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + - name: transH + config: + save_weights: True + parameters: + embedding_dim: 150 + scoring_fct_norm: 2 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + - name: transR + config: + save_weights: True + parameters: + embedding_dim: 150 + relation_dim: 90 + scoring_fct_norm: 2 + epochs: 5 + seed: 42 + triples: all # only (ratings) or (all) triples for training + - name: tuckER + config: + save_weights: True + parameters: + embedding_dim: 200 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: "sentence-transformers/distiluse-base-multilingual-cased-v2" + embed_with: abstract + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: deepwalk_based + embedding_model_kwargs: + config: + save_weights: True + parameters: + walk_len: 10 + p: 1.0 + q: 1.0 + n_walks: 50 + embedding_size: 64 + epochs: 1 + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: deepwalk_based + embedding_model_kwargs: + config: + save_weights: True + parameters: + walk_len: 10 + p: 0.8 + q: 0.6 + n_walks: 50 + embedding_size: 64 + epochs: 1 + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: complEx + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 100 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: distMult + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 50 + epochs: 5 + seed: 42 + triples: all # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: rESCAL + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 50 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: rotatE + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 200 + epochs: 5 + seed: 42 + triples: all # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: transD + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 150 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: transE + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 150 + scoring_fct_norm: 1 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: transH + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 150 + scoring_fct_norm: 2 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: transR + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 150 + relation_dim: 90 + scoring_fct_norm: 2 + epochs: 5 + seed: 42 + triples: all # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + - name: ePHEN + config: + save_weights: True + parameters: + embedding_model: tuckER + embedding_model_kwargs: + config: + save_weights: True + parameters: + embedding_dim: 200 + epochs: 5 + seed: 42 + triples: ratings # only (ratings) or (all) triples for training + embed_with: graph + iterations: 30 + mi: 0.5 + + evaluation: + k: 5 + relevance_threshold: 0 + metrics: [MAP, nDCG] + + report: + file: "experiment_results/ml-100k/run1.csv" + execution_times: + file: "experiment_results/ml-100k_execution-times/run1.csv" \ No newline at end of file