Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add option to run multimodels template with array of balancing strategies #56

Merged
merged 5 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,18 @@ optional arguments:
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
--n_runs N_RUNS Number of runs. Default: 1.
--no_wordclouds Disables the generation of wordclouds.
--balance_strategy BALANCE_STRATEGY Balance strategy to use. Default: double.
--instances_per_query INSTANCES_PER_QUERY Number of instances per query. Default: 1.
--stop_if STOP_IF The number of label actions to simulate. Default 'min' will stop simulating when all relevant records are found.
--classifiers CLASSIFIERS Classifiers to use Default: ['logistic', 'nb', 'rf', 'svm']
--feature_extractors FEATURE_EXTRACTOR Feature extractors to use Default: ['doc2vec', 'sbert', 'tfidf']
--query_strategies QUERY_STRATEGY Query strategies to use Default: ['max']
--balancing_strategies BALANCE_STRATEGY Balance strategies to use Default: ['double']
BjarneJesse marked this conversation as resolved.
Show resolved Hide resolved
--impossible_models IMPOSSIBLE_MODELS Model combinations to exclude Default: ['nb,doc2vec', 'nb,sbert']
```

If you want to specify certain combinations of classifiers and feature
extractors that should and should not be used, you can use the `--classifiers`,
`--feature_extractors`, `--query_strategies` and `--impossible_models` option. For instance, if you
`--feature_extractors`, `--query_strategies`, `--balancing_strategies` and `--impossible_models` option. For instance, if you
want to exclude the combinations of `nb` with `doc2vec` and `logistic` with
`tfidf`, use the following command:

Expand Down
9 changes: 8 additions & 1 deletion asreviewcontrib/makita/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ def execute(self, argv): # noqa: C901
help="Query strategies to use. Only for template 'multimodel'. "
"Default: ['max']",
)
parser_template.add_argument(
"--balancing_strategies",
nargs="+",
default=["double"],
help="Balancing strategies to use. Only for template 'multimodel'. "
"Default: ['double']",
)
parser_template.add_argument(
"--impossible_models",
nargs="+",
Expand Down Expand Up @@ -275,8 +282,8 @@ def _template(self, args):
all_classifiers=args.classifiers,
all_feature_extractors=args.feature_extractors,
all_query_strategies=args.query_strategies,
all_balancing_strategies=args.balancing_strategies,
impossible_models=args.impossible_models,
balance_strategy=args.balance_strategy,
instances_per_query=args.instances_per_query,
stop_if=args.stop_if,
fp_template=fp_template,
Expand Down
7 changes: 5 additions & 2 deletions asreviewcontrib/makita/template_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def render_jobs_multimodel(
all_classifiers=None,
all_feature_extractors=None,
all_query_strategies=None,
all_balancing_strategies=None,
impossible_models=None,
balance_strategy="double",
instances_per_query=1,
stop_if='min',
fp_template=None,
Expand All @@ -39,6 +39,9 @@ def render_jobs_multimodel(
if all_query_strategies is None:
all_query_strategies = ["max"]

if all_balancing_strategies is None:
all_balancing_strategies = ["double"]

if impossible_models is None:
impossible_models = ["nb,doc2vec", "nb,sbert"]

Expand Down Expand Up @@ -108,7 +111,6 @@ def render_jobs_multimodel(
{
"datasets": params,
"create_wordclouds": create_wordclouds,
"balance_strategy": balance_strategy,
"instances_per_query": instances_per_query,
"stop_if": stop_if,
"output_folder": output_folder,
Expand All @@ -119,6 +121,7 @@ def render_jobs_multimodel(
"all_query_strategies": all_query_strategies,
"all_classifiers": all_classifiers,
"all_feature_extractors": all_feature_extractors,
"all_balancing_strategies": all_balancing_strategies,
"impossible_models": [i.split(",") for i in impossible_models],
}
)
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,20 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files
{% for classifier in all_classifiers %}
{% for feature_extraction in all_feature_extractors %}
{% for query_strategy in all_query_strategies %}
{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}{{ temp.append(query_strategy)|default("", True) }}
{% for balance_strategy in all_balancing_strategies %}
{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}{{ temp.append(query_strategy)|default("", True) }}{{ temp.append(balance_strategy)|default("", True) }}
jteijema marked this conversation as resolved.
Show resolved Hide resolved
{% if temp in impossible_models %}

# Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}, Balance strategy = {{balance_strategy}}
{% for run in range(n_runs) %}
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.json
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --balance_strategy {{balance_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
BjarneJesse marked this conversation as resolved.
Show resolved Hide resolved
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.json
{% endfor %}{% endif %}
{% endfor %}
{% endfor %}
{% endfor %}

BjarneJesse marked this conversation as resolved.
Show resolved Hide resolved
{% endfor %}

# Generate plot and tables for dataset
python {{ scripts_folder }}/get_plot.py -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/ -o {{ output_folder }}/figures/plot_recall_sim_{{ dataset.input_file_stem }}.png --show_legend model
Expand Down
Loading