From 53ce6a8dab76e31ff4ece5d845ee7045e4c414d8 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 14:32:23 +0100 Subject: [PATCH 01/95] Create base class --- asreviewcontrib/makita/template_base.py | 72 +++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 asreviewcontrib/makita/template_base.py diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py new file mode 100644 index 00000000..7a4e5116 --- /dev/null +++ b/asreviewcontrib/makita/template_base.py @@ -0,0 +1,72 @@ +"""Rendering base class for templates.""" + +import os +import platform +from pathlib import Path + +from cfgtemplater.config_template import ConfigTemplate + +from asreviewcontrib.makita import __version__ +from asreviewcontrib.makita.utils import FileHandler +from asreviewcontrib.makita.utils import check_filename_dataset + + +class RenderTemplateBase: + def __init__(self, datasets, output_folder="output", scripts_folder="scripts", + create_wordclouds=True, allow_overwrite=False, init_seed=535, + model_seed=165, query_strategy="max", balance_strategy="double", + instances_per_query=1, stop_if='min', fp_template=None, job_file=None, + platform_sys=None): + self.datasets = datasets + self.output_folder = output_folder + self.scripts_folder = scripts_folder + self.create_wordclouds = create_wordclouds + self.init_seed = init_seed + self.model_seed = model_seed + self.query_strategy = query_strategy + self.balance_strategy = balance_strategy + self.instances_per_query = instances_per_query + self.stop_if = stop_if + self.fp_template = fp_template + self.job_file = job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" + self.platform_sys = platform_sys if platform_sys else platform.system() + self.file_handler = FileHandler() + self.file_handler.overwrite_all = allow_overwrite + self.template = ConfigTemplate(fp_template) + self.__version__ = __version__ + + def render(self): + raise NotImplementedError("Subclasses should implement this method to render specific templates.") + + def prepare_common_params(self): + params = [] + for i, fp_dataset in enumerate(sorted(self.datasets)): + check_filename_dataset(fp_dataset) + fp_dataset = Path(fp_dataset) + params.append(self.prepare_dataset_params(i, fp_dataset)) + return params + + def prepare_dataset_params(self, index, fp_dataset): + raise NotImplementedError("Subclasses should implement this method to prepare dataset-specific parameters.") + + def render_scripts(self, scripts: list): + for s in scripts: + t_script = self.file_handler.render_file_from_template( + s, "script", output_folder=self.output_folder + ) + export_fp = Path(self.scripts_folder, s) + self.file_handler.add_file(t_script, export_fp) + + def render_docs(self, docs: list, name: str): + for s in docs: + t_docs = self.file_handler.render_file_from_template( + s, + "doc", + datasets=self.datasets, + template_name=self.template.name if self.template.name == name else "custom", + template_name_long=self.template.name_long, + template_scripts=self.template.scripts, + output_folder=self.output_folder, + job_file=self.job_file, + ) + self.file_handler.add_file(t_docs, s) From 92812e9ddaaab27e249d8fdb3fcb823401ae23bd Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 14:32:40 +0100 Subject: [PATCH 02/95] Change basic --- asreviewcontrib/makita/entrypoint.py | 9 +- asreviewcontrib/makita/template_basic.py | 141 +++++++---------------- 2 files changed, 44 insertions(+), 106 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 0838b4c0..d04550ad 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -6,9 +6,7 @@ from asreviewcontrib.makita import __version__ from asreviewcontrib.makita.config import TEMPLATES_FP -from asreviewcontrib.makita.template_arfi import render_jobs_arfi from asreviewcontrib.makita.template_basic import render_jobs_basic -from asreviewcontrib.makita.template_multimodel import render_jobs_multimodel from asreviewcontrib.makita.utils import FileHandler @@ -217,8 +215,9 @@ def _template(self, args): Path(args.o).parent.mkdir(parents=True, exist_ok=True) if args.name in ["basic"]: - # render jobs - job = render_jobs_basic( + from asreviewcontrib.makita.template_basic import RenderJobsBasic + + job = RenderJobsBasic( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, @@ -234,7 +233,7 @@ def _template(self, args): fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, - ) + ).render() elif args.name in ["arfi"]: # render jobs diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 38643176..9c40c2a1 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -1,108 +1,47 @@ """Render basic template.""" -import os -import platform -from pathlib import Path - -from cfgtemplater.config_template import ConfigTemplate - -from asreviewcontrib.makita import __version__ -from asreviewcontrib.makita.utils import FileHandler -from asreviewcontrib.makita.utils import check_filename_dataset - - -def render_jobs_basic( - datasets, - output_folder="output", - scripts_folder="scripts", - create_wordclouds=True, - n_runs=1, - init_seed=535, - model_seed=165, - classifier="nb", - feature_extractor="tfidf", - query_strategy="max", - balance_strategy="double", - instances_per_query=1, - stop_if='min', - fp_template=None, - job_file=None, - platform_sys=None, -): - """Render jobs.""" - - if not platform_sys: - platform_sys = platform.system() - if not job_file: - job_file = "jobs.bat" if os.name == "nt" else "jobs.sh" - - params = [] - - # initialize file handler - file_handler = FileHandler() - - # generate params for all simulations - for i, fp_dataset in enumerate(sorted(datasets)): - check_filename_dataset(fp_dataset) - - fp_dataset = Path(fp_dataset) - - # params for single dataset - params.append( - { - "input_file": fp_dataset.as_posix(), - "input_file_stem": fp_dataset.stem, - "model_seed": model_seed + i, - "init_seed": init_seed, - "n_runs": n_runs, - } - ) - - # Instantiate a ConfigTemplate object, initializing a Jinja2 environment and - # setting up template variables and extensions. - template = ConfigTemplate(fp_template) +from asreviewcontrib.makita.template_base import RenderTemplateBase + + +class RenderJobsBasic(RenderTemplateBase): + def __init__(self, *args, **kwargs): + self.n_runs = kwargs.pop('n_runs', 1) + self.classifier = kwargs.pop('classifier', "nb") + self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") + super().__init__(*args, **kwargs) + + def prepare_dataset_params(self, index, fp_dataset): + return { + "input_file": fp_dataset.as_posix(), + "input_file_stem": fp_dataset.stem, + "model_seed": self.model_seed + index, + "init_seed": self.init_seed, + "n_runs": self.n_runs, + } - # render scripts - if template.scripts is not None: - for s in template.scripts: - t_script = file_handler.render_file_from_template( - s, "script", output_folder=output_folder - ) - export_fp = Path(scripts_folder, s) - file_handler.add_file(t_script, export_fp) + def render(self): + self.file_handler.print_summary() + params = self.prepare_common_params() - # render docs - if template.docs is not None: - for s in template.docs: - t_docs = file_handler.render_file_from_template( - s, - "doc", - datasets=datasets, - template_name=template.name if template.name == "basic" else "custom", - template_name_long=template.name_long, - template_scripts=template.scripts, - output_folder=output_folder, - job_file=job_file, - ) - file_handler.add_file(t_docs, s) + if self.template.scripts: + self.render_scripts(self.template.scripts) - # print summary to console - file_handler.print_summary() + if self.template.docs: + self.render_docs(self.template.docs, "basic") - # render file and return - return template.render( - { + rendered_output = self.template.render({ "datasets": params, - "create_wordclouds": create_wordclouds, - "classifier": classifier, - "feature_extractor": feature_extractor, - "query_strategy": query_strategy, - "balance_strategy": balance_strategy, - "instances_per_query": instances_per_query, - "stop_if": stop_if, - "output_folder": output_folder, - "scripts_folder": scripts_folder, - "platform_sys": platform_sys, - "version": __version__, - } - ) + "create_wordclouds": self.create_wordclouds, + "classifier": self.classifier, + "feature_extractor": self.feature_extractor, + "query_strategy": self.query_strategy, + "balance_strategy": self.balance_strategy, + "instances_per_query": self.instances_per_query, + "stop_if": self.stop_if, + "output_folder": self.output_folder, + "scripts_folder": self.scripts_folder, + "platform": self.platform_sys, + "version": self.__version__, + }) + + return rendered_output From 7dd722f79b32788a376a02d4b3f551169082c962 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 14:32:53 +0100 Subject: [PATCH 03/95] Change arfi --- asreviewcontrib/makita/entrypoint.py | 6 +- asreviewcontrib/makita/template_arfi.py | 157 ++++++++---------------- 2 files changed, 53 insertions(+), 110 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index d04550ad..16abc633 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -6,7 +6,6 @@ from asreviewcontrib.makita import __version__ from asreviewcontrib.makita.config import TEMPLATES_FP -from asreviewcontrib.makita.template_basic import render_jobs_basic from asreviewcontrib.makita.utils import FileHandler @@ -236,8 +235,9 @@ def _template(self, args): ).render() elif args.name in ["arfi"]: + from asreviewcontrib.makita.template_arfi import RenderJobsARFI # render jobs - job = render_jobs_arfi( + job = RenderJobsARFI( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, @@ -253,7 +253,7 @@ def _template(self, args): fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, - ) + ).render() elif args.name in ["multimodel"]: # render jobs diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 9506fec0..45517edc 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -1,114 +1,57 @@ """Render ARFI template.""" -import os -import platform -from pathlib import Path - import numpy as np -from asreview import ASReviewData -from cfgtemplater.config_template import ConfigTemplate - -from asreviewcontrib.makita import __version__ -from asreviewcontrib.makita.utils import FileHandler -from asreviewcontrib.makita.utils import check_filename_dataset - - -def render_jobs_arfi( - datasets, - output_folder="output", - scripts_folder="scripts", - create_wordclouds=True, - n_priors=10, - init_seed=535, - model_seed=165, - classifier="nb", - feature_extractor="tfidf", - query_strategy="max", - balance_strategy="double", - instances_per_query=1, - stop_if='min', - fp_template=None, - job_file=None, - platform_sys=None, -): - """Render jobs.""" - - if not platform_sys: - platform_sys = platform.system() - if not job_file: - job_file = "jobs.bat" if os.name == "nt" else "jobs.sh" - - params = [] - - # initialize file handler - file_handler = FileHandler() - - # generate params for all simulations - for i, fp_dataset in enumerate(sorted(datasets)): - check_filename_dataset(fp_dataset) - - # render priors - priors = _get_priors(fp_dataset, init_seed=init_seed + i, n_priors=n_priors) - - # params for single dataset - params.append( - { - "input_file": fp_dataset.as_posix(), - "input_file_stem": fp_dataset.stem, - "priors": priors, - "model_seed": model_seed + i, - } - ) - - # Instantiate a ConfigTemplate object, initializing a Jinja2 environment and - # setting up template variables and extensions. - template = ConfigTemplate(fp_template) - - # render scripts - if template.scripts is not None: - for s in template.scripts: - t_script = file_handler.render_file_from_template( - s, "script", output_folder=output_folder - ) - export_fp = Path(scripts_folder, s) - file_handler.add_file(t_script, export_fp) - - # render docs - if template.docs is not None: - for s in template.docs: - t_docs = file_handler.render_file_from_template( - s, - "doc", - datasets=datasets, - template_name=template.name if template.name == "ARFI" else "custom", - template_name_long=template.name_long, - template_scripts=template.scripts, - output_folder=output_folder, - job_file=job_file, - ) - file_handler.add_file(t_docs, s) - - # print summary to console - file_handler.print_summary() - - # render file and return - return template.render( - { - "datasets": params, - "create_wordclouds": create_wordclouds, - "classifier": classifier, - "feature_extractor": feature_extractor, - "query_strategy": query_strategy, - "balance_strategy": balance_strategy, - "instances_per_query": instances_per_query, - "stop_if": stop_if, - "init_seed": init_seed, - "output_folder": output_folder, - "scripts_folder": scripts_folder, - "platform": platform_sys, - "version": __version__, +from asreview.data import ASReviewData + +from asreviewcontrib.makita.template_base import RenderTemplateBase + + +class RenderJobsARFI(RenderTemplateBase): + def __init__(self, *args, **kwargs): + self.n_runs = kwargs.pop('n_runs', 1) + self.classifier = kwargs.pop('classifier', "nb") + self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") + self.n_priors = kwargs.pop('n_priors', 10) + super().__init__(*args, **kwargs) + + def prepare_dataset_params(self, index, fp_dataset): + priors = _get_priors(fp_dataset, + init_seed=self.init_seed + index, + n_priors=self.n_priors) + return { + "input_file": fp_dataset.as_posix(), + "input_file_stem": fp_dataset.stem, + "priors": priors, + "model_seed": self.model_seed + index, } - ) + + def render(self): + self.file_handler.print_summary() + params = self.prepare_common_params() + + if self.template.scripts: + self.render_scripts(self.template.scripts) + + if self.template.docs: + self.render_docs(self.template.docs, "ARFI") + + rendered_output = self.template.render({ + "datasets": params, + "create_wordclouds": self.create_wordclouds, + "classifier": self.classifier, + "feature_extractor": self.feature_extractor, + "query_strategy": self.query_strategy, + "balance_strategy": self.balance_strategy, + "instances_per_query": self.instances_per_query, + "stop_if": self.stop_if, + "init_seed": self.init_seed, + "output_folder": self.output_folder, + "scripts_folder": self.scripts_folder, + "platform": self.platform_sys, + "version": self.__version__, + }) + + return rendered_output def _get_priors(dataset, init_seed, n_priors): From feb9498fc80b9334a103c948e197dd87f610da54 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 14:52:56 +0100 Subject: [PATCH 04/95] Move render to base class --- asreviewcontrib/makita/template_arfi.py | 18 ++++-------------- asreviewcontrib/makita/template_base.py | 19 ++++++++++++++++++- asreviewcontrib/makita/template_basic.py | 18 ++++-------------- 3 files changed, 26 insertions(+), 29 deletions(-) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 45517edc..228af590 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -12,6 +12,7 @@ def __init__(self, *args, **kwargs): self.classifier = kwargs.pop('classifier', "nb") self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") self.n_priors = kwargs.pop('n_priors', 10) + self.template_name = "ARFI" super().__init__(*args, **kwargs) def prepare_dataset_params(self, index, fp_dataset): @@ -25,17 +26,8 @@ def prepare_dataset_params(self, index, fp_dataset): "model_seed": self.model_seed + index, } - def render(self): - self.file_handler.print_summary() - params = self.prepare_common_params() - - if self.template.scripts: - self.render_scripts(self.template.scripts) - - if self.template.docs: - self.render_docs(self.template.docs, "ARFI") - - rendered_output = self.template.render({ + def prepare_template_params(self, params): + return { "datasets": params, "create_wordclouds": self.create_wordclouds, "classifier": self.classifier, @@ -49,9 +41,7 @@ def render(self): "scripts_folder": self.scripts_folder, "platform": self.platform_sys, "version": self.__version__, - }) - - return rendered_output + } def _get_priors(dataset, init_seed, n_priors): diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 7a4e5116..f55b9fc0 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -33,10 +33,27 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", self.file_handler = FileHandler() self.file_handler.overwrite_all = allow_overwrite self.template = ConfigTemplate(fp_template) + self.template_name = None self.__version__ = __version__ + assert self.template is not None, "Template is None." + assert self.fp_template is not None, "Template file is None." + def render(self): - raise NotImplementedError("Subclasses should implement this method to render specific templates.") + params = self.prepare_common_params() + + if self.template.scripts: + self.render_scripts(self.template.scripts) + + if self.template.docs: + self.render_docs(self.template.docs, self.template_name) + + rendered_output = self.template.render( + self.prepare_template_params(params) + ) + + self.file_handler.print_summary() + return rendered_output def prepare_common_params(self): params = [] diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 9c40c2a1..b441a9fd 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -8,6 +8,7 @@ def __init__(self, *args, **kwargs): self.n_runs = kwargs.pop('n_runs', 1) self.classifier = kwargs.pop('classifier', "nb") self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") + self.template_name = "basic" super().__init__(*args, **kwargs) def prepare_dataset_params(self, index, fp_dataset): @@ -19,17 +20,8 @@ def prepare_dataset_params(self, index, fp_dataset): "n_runs": self.n_runs, } - def render(self): - self.file_handler.print_summary() - params = self.prepare_common_params() - - if self.template.scripts: - self.render_scripts(self.template.scripts) - - if self.template.docs: - self.render_docs(self.template.docs, "basic") - - rendered_output = self.template.render({ + def prepare_template_params(self, params): + return { "datasets": params, "create_wordclouds": self.create_wordclouds, "classifier": self.classifier, @@ -42,6 +34,4 @@ def render(self): "scripts_folder": self.scripts_folder, "platform": self.platform_sys, "version": self.__version__, - }) - - return rendered_output + } From 31ba8475c37bcd50734fc4caa87b83c078eb33a2 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 15:33:30 +0100 Subject: [PATCH 05/95] Remove messy naming of templates --- asreviewcontrib/makita/entrypoint.py | 19 ++++----- asreviewcontrib/makita/template_arfi.py | 3 +- asreviewcontrib/makita/template_base.py | 54 +++++++++++++++--------- asreviewcontrib/makita/template_basic.py | 3 +- 4 files changed, 45 insertions(+), 34 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 16abc633..b5023a60 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -6,6 +6,8 @@ from asreviewcontrib.makita import __version__ from asreviewcontrib.makita.config import TEMPLATES_FP +from asreviewcontrib.makita.template_arfi import RenderJobsARFI +from asreviewcontrib.makita.template_basic import RenderJobsBasic from asreviewcontrib.makita.utils import FileHandler @@ -208,14 +210,12 @@ def _template(self, args): # throw exception if no datasets are found if len(datasets) == 0: - raise ValueError("No datasets found in the specified folder.") + raise ValueError("No datasets found in the selected data folder.") # create output folder Path(args.o).parent.mkdir(parents=True, exist_ok=True) - if args.name in ["basic"]: - from asreviewcontrib.makita.template_basic import RenderJobsBasic - + if args.name in [RenderJobsBasic.template_name]: job = RenderJobsBasic( datasets, output_folder=Path(args.o), @@ -234,9 +234,7 @@ def _template(self, args): platform_sys=args.platform, ).render() - elif args.name in ["arfi"]: - from asreviewcontrib.makita.template_arfi import RenderJobsARFI - # render jobs + elif args.name in [RenderJobsARFI.template_name]: job = RenderJobsARFI( datasets, output_folder=Path(args.o), @@ -256,7 +254,7 @@ def _template(self, args): ).render() elif args.name in ["multimodel"]: - # render jobs + job = render_jobs_multimodel( datasets, output_folder=Path(args.o), @@ -277,8 +275,7 @@ def _template(self, args): ) else: - # render jobs - job = render_jobs_basic( + job = RenderJobsBasic( datasets, output_folder=Path(args.o), init_seed=args.init_seed, @@ -286,7 +283,7 @@ def _template(self, args): fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, - ) + ).render() if args.platform == "Windows" or (args.platform is None and os.name == "nt"): job = _shell_to_batch(job) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 228af590..33dcbbc7 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -7,12 +7,13 @@ class RenderJobsARFI(RenderTemplateBase): + template_name = "ARFI" + def __init__(self, *args, **kwargs): self.n_runs = kwargs.pop('n_runs', 1) self.classifier = kwargs.pop('classifier', "nb") self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") self.n_priors = kwargs.pop('n_priors', 10) - self.template_name = "ARFI" super().__init__(*args, **kwargs) def prepare_dataset_params(self, index, fp_dataset): diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index f55b9fc0..21c39345 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -33,29 +33,18 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", self.file_handler = FileHandler() self.file_handler.overwrite_all = allow_overwrite self.template = ConfigTemplate(fp_template) - self.template_name = None self.__version__ = __version__ assert self.template is not None, "Template is None." assert self.fp_template is not None, "Template file is None." - def render(self): - params = self.prepare_common_params() - - if self.template.scripts: - self.render_scripts(self.template.scripts) - - if self.template.docs: - self.render_docs(self.template.docs, self.template_name) - - rendered_output = self.template.render( - self.prepare_template_params(params) - ) + def prepare_dataset_params(self, index, fp_dataset): + raise NotImplementedError("Subclasses should implement this method to prepare dataset-specific parameters.") # noqa - self.file_handler.print_summary() - return rendered_output + def prepare_template_params(self, params): + raise NotImplementedError("Subclasses should implement this method to prepare template-specific parameters.") # noqa - def prepare_common_params(self): + def gather_dataset_params(self): params = [] for i, fp_dataset in enumerate(sorted(self.datasets)): check_filename_dataset(fp_dataset) @@ -63,9 +52,6 @@ def prepare_common_params(self): params.append(self.prepare_dataset_params(i, fp_dataset)) return params - def prepare_dataset_params(self, index, fp_dataset): - raise NotImplementedError("Subclasses should implement this method to prepare dataset-specific parameters.") - def render_scripts(self, scripts: list): for s in scripts: t_script = self.file_handler.render_file_from_template( @@ -74,16 +60,42 @@ def render_scripts(self, scripts: list): export_fp = Path(self.scripts_folder, s) self.file_handler.add_file(t_script, export_fp) - def render_docs(self, docs: list, name: str): + def render_docs(self, docs: list): for s in docs: t_docs = self.file_handler.render_file_from_template( s, "doc", datasets=self.datasets, - template_name=self.template.name if self.template.name == name else "custom", + template_name=self.template.name, template_name_long=self.template.name_long, template_scripts=self.template.scripts, output_folder=self.output_folder, job_file=self.job_file, ) self.file_handler.add_file(t_docs, s) + + def render(self): + dataset_params = self.gather_dataset_params() + + if self.template.scripts: + self.render_scripts(self.template.scripts) + + if self.template.docs: + self.render_docs(self.template.docs) + + try: + rendered_output = self.template.render( + self.prepare_template_params(dataset_params) + ) + except Exception as e: + if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": + if self.template_name is None: + print("\033[31mERROR: A rendering exception occurred -", e) + print("The rendering process failed due to undefined parameters in the template.") # noqa + print("\033[33mPlease verify that the chosen base template is compatible with the selected template.\033[0m") # noqa + exit(1) + else: + raise + + self.file_handler.print_summary() + return rendered_output diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index b441a9fd..15ec9dbc 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -4,11 +4,12 @@ class RenderJobsBasic(RenderTemplateBase): + template_name = "basic" + def __init__(self, *args, **kwargs): self.n_runs = kwargs.pop('n_runs', 1) self.classifier = kwargs.pop('classifier', "nb") self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") - self.template_name = "basic" super().__init__(*args, **kwargs) def prepare_dataset_params(self, index, fp_dataset): From 5a46597b6e07302fe8bf1e962cc91e59bc47ed02 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 15:48:16 +0100 Subject: [PATCH 06/95] cleanup filehandler --- asreviewcontrib/makita/entrypoint.py | 16 ++++++++++++---- asreviewcontrib/makita/template_base.py | 3 +-- asreviewcontrib/makita/utils.py | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index b5023a60..6fa6714a 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -8,6 +8,7 @@ from asreviewcontrib.makita.config import TEMPLATES_FP from asreviewcontrib.makita.template_arfi import RenderJobsARFI from asreviewcontrib.makita.template_basic import RenderJobsBasic +from asreviewcontrib.makita.template_multimodel import RenderJobsMultiModel from asreviewcontrib.makita.utils import FileHandler @@ -102,6 +103,11 @@ def execute(self, argv): # noqa: C901 action="store_false", help="Disables the generation of wordclouds. " ) + parser_template.add_argument( + "--overwrite", + action="store_true", + help="Overwrite existing files in the output folder. ", + ) parser_template.add_argument( "--classifier", type=str, @@ -220,6 +226,7 @@ def _template(self, args): datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, + allow_overwrite=args.overwrite, n_runs=args.n_runs, init_seed=args.init_seed, model_seed=args.model_seed, @@ -239,6 +246,7 @@ def _template(self, args): datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, + allow_overwrite=args.overwrite, n_priors=args.n_priors, init_seed=args.init_seed, model_seed=args.model_seed, @@ -253,12 +261,12 @@ def _template(self, args): platform_sys=args.platform, ).render() - elif args.name in ["multimodel"]: - - job = render_jobs_multimodel( + elif args.name in [RenderJobsMultiModel.template_name]: + job = RenderJobsMultiModel( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, + allow_overwrite=args.overwrite, n_runs=args.n_runs, init_seed=args.init_seed, model_seed=args.model_seed, @@ -272,7 +280,7 @@ def _template(self, args): fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, - ) + ).render() else: job = RenderJobsBasic( diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 21c39345..5b3540a7 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -30,8 +30,7 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", self.fp_template = fp_template self.job_file = job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" self.platform_sys = platform_sys if platform_sys else platform.system() - self.file_handler = FileHandler() - self.file_handler.overwrite_all = allow_overwrite + self.file_handler = FileHandler(allow_overwrite) self.template = ConfigTemplate(fp_template) self.__version__ = __version__ diff --git a/asreviewcontrib/makita/utils.py b/asreviewcontrib/makita/utils.py index 4169248e..f3478d0d 100644 --- a/asreviewcontrib/makita/utils.py +++ b/asreviewcontrib/makita/utils.py @@ -12,8 +12,8 @@ class FileHandler: scripts. """ - def __init__(self): - self.overwrite_all = False + def __init__(self, allow_overwrite=False): + self.overwrite_all = allow_overwrite self.total_files = 0 def add_file(self, content, export_fp): From f0537941d4c5528ee10eab8206338c007e1154ac Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 15:48:31 +0100 Subject: [PATCH 07/95] pass wordcloud to doc renderer --- asreviewcontrib/makita/template_base.py | 1 + asreviewcontrib/makita/templates/doc_README.md.template | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 5b3540a7..29823a92 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -68,6 +68,7 @@ def render_docs(self, docs: list): template_name=self.template.name, template_name_long=self.template.name_long, template_scripts=self.template.scripts, + create_wordclouds=self.create_wordclouds, output_folder=self.output_folder, job_file=self.job_file, ) diff --git a/asreviewcontrib/makita/templates/doc_README.md.template b/asreviewcontrib/makita/templates/doc_README.md.template index df9667a8..9186dac5 100644 --- a/asreviewcontrib/makita/templates/doc_README.md.template +++ b/asreviewcontrib/makita/templates/doc_README.md.template @@ -13,13 +13,13 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview ```sh pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` - -If wordcloud images are required, install the following dependencies. +{% if create_wordclouds %} +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud ``` - +{% endif %} ## Data The performance on the following datasets is evaluated: From 04172cd3b136ad9db87354a6e0384c651e56580f Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Fri, 29 Mar 2024 15:48:48 +0100 Subject: [PATCH 08/95] Start on multimodel class --- asreviewcontrib/makita/template_multimodel.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index c702cd67..87f748fa 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -119,3 +119,42 @@ def render_jobs_multimodel( "impossible_models": [i.split(",") for i in impossible_models], } ) + +"""Render multimodel template.""" + +from asreviewcontrib.makita.template_base import RenderTemplateBase + + +class RenderJobsMultiModel(RenderTemplateBase): + template_name = "multimodel" + + def __init__(self, *args, **kwargs): + self.n_runs = kwargs.pop('n_runs', 1) + self.classifier = kwargs.pop('classifier', "nb") + self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") + super().__init__(*args, **kwargs) + + def prepare_dataset_params(self, index, fp_dataset): + return { + "input_file": fp_dataset.as_posix(), + "input_file_stem": fp_dataset.stem, + "model_seed": self.model_seed + index, + "init_seed": self.init_seed, + "n_runs": self.n_runs, + } + + def prepare_template_params(self, params): + return { + "datasets": params, + "create_wordclouds": self.create_wordclouds, + "classifier": self.classifier, + "feature_extractor": self.feature_extractor, + "query_strategy": self.query_strategy, + "balance_strategy": self.balance_strategy, + "instances_per_query": self.instances_per_query, + "stop_if": self.stop_if, + "output_folder": self.output_folder, + "scripts_folder": self.scripts_folder, + "platform": self.platform_sys, + "version": self.__version__, + } From c60bcf9c5c0026f1ddddb2dbe96cba8be4af117e Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:10:26 +0100 Subject: [PATCH 09/95] Update multimodel --- asreviewcontrib/makita/template_arfi.py | 4 +- asreviewcontrib/makita/template_base.py | 12 +- asreviewcontrib/makita/template_basic.py | 4 +- asreviewcontrib/makita/template_multimodel.py | 141 ++---------------- 4 files changed, 21 insertions(+), 140 deletions(-) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 33dcbbc7..08c2dbc5 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -16,7 +16,7 @@ def __init__(self, *args, **kwargs): self.n_priors = kwargs.pop('n_priors', 10) super().__init__(*args, **kwargs) - def prepare_dataset_params(self, index, fp_dataset): + def get_dynamic_params(self, index, fp_dataset): priors = _get_priors(fp_dataset, init_seed=self.init_seed + index, n_priors=self.n_priors) @@ -27,7 +27,7 @@ def prepare_dataset_params(self, index, fp_dataset): "model_seed": self.model_seed + index, } - def prepare_template_params(self, params): + def get_static_params(self, params): return { "datasets": params, "create_wordclouds": self.create_wordclouds, diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 29823a92..60192f40 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -37,18 +37,18 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", assert self.template is not None, "Template is None." assert self.fp_template is not None, "Template file is None." - def prepare_dataset_params(self, index, fp_dataset): + def get_dynamic_params(self, index, fp_dataset): raise NotImplementedError("Subclasses should implement this method to prepare dataset-specific parameters.") # noqa - def prepare_template_params(self, params): + def get_static_params(self, params): raise NotImplementedError("Subclasses should implement this method to prepare template-specific parameters.") # noqa - def gather_dataset_params(self): + def collect_dynamic_params(self): params = [] for i, fp_dataset in enumerate(sorted(self.datasets)): check_filename_dataset(fp_dataset) fp_dataset = Path(fp_dataset) - params.append(self.prepare_dataset_params(i, fp_dataset)) + params.append(self.get_dynamic_params(i, fp_dataset)) return params def render_scripts(self, scripts: list): @@ -75,8 +75,6 @@ def render_docs(self, docs: list): self.file_handler.add_file(t_docs, s) def render(self): - dataset_params = self.gather_dataset_params() - if self.template.scripts: self.render_scripts(self.template.scripts) @@ -85,7 +83,7 @@ def render(self): try: rendered_output = self.template.render( - self.prepare_template_params(dataset_params) + self.get_static_params(self.collect_dynamic_params()) ) except Exception as e: if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 15ec9dbc..97389fc5 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -12,7 +12,7 @@ def __init__(self, *args, **kwargs): self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") super().__init__(*args, **kwargs) - def prepare_dataset_params(self, index, fp_dataset): + def get_dynamic_params(self, index, fp_dataset): return { "input_file": fp_dataset.as_posix(), "input_file_stem": fp_dataset.stem, @@ -21,7 +21,7 @@ def prepare_dataset_params(self, index, fp_dataset): "n_runs": self.n_runs, } - def prepare_template_params(self, params): + def get_static_params(self, params): return { "datasets": params, "create_wordclouds": self.create_wordclouds, diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 87f748fa..fd519f2d 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -1,127 +1,5 @@ """Render multimodel template.""" -import os -import platform -from pathlib import Path - -from cfgtemplater.config_template import ConfigTemplate - -from asreviewcontrib.makita import __version__ -from asreviewcontrib.makita.utils import FileHandler -from asreviewcontrib.makita.utils import check_filename_dataset - - -def render_jobs_multimodel( - datasets, - output_folder="output", - n_runs=1, - scripts_folder="scripts", - create_wordclouds=True, - init_seed=535, - model_seed=165, - all_classifiers=None, - all_feature_extractors=None, - impossible_models=None, - query_strategy="max", - balance_strategy="double", - instances_per_query=1, - stop_if='min', - fp_template=None, - job_file=None, - platform_sys=None, -): - if all_classifiers is None: - all_classifiers = ["logistic", "nb", "rf", "svm"] - - if all_feature_extractors is None: - all_feature_extractors = ["doc2vec", "sbert", "tfidf"] - - if impossible_models is None: - impossible_models = ["nb,doc2vec", "nb,sbert"] - - """Render jobs.""" - - if not platform_sys: - platform_sys = platform.system() - if not job_file: - job_file = "jobs.bat" if os.name == "nt" else "jobs.sh" - - params = [] - - # initialize file handler - file_handler = FileHandler() - - # generate params for all simulations - for i, fp_dataset in enumerate(sorted(datasets)): - check_filename_dataset(fp_dataset) - - fp_dataset = Path(fp_dataset) - - # params for single dataset - params.append( - { - "input_file": fp_dataset.as_posix(), - "input_file_stem": fp_dataset.stem, - "model_seed": model_seed + i, - "init_seed": init_seed, - } - ) - - # Instantiate a ConfigTemplate object, initializing a Jinja2 environment and - # setting up template variables and extensions. - template = ConfigTemplate(fp_template) - - # render scripts - if template.scripts is not None: - for s in template.scripts: - t_script = file_handler.render_file_from_template( - s, "script", output_folder=output_folder - ) - export_fp = Path(scripts_folder, s) - file_handler.add_file(t_script, export_fp) - - # render docs - if template.docs is not None: - for s in template.docs: - t_docs = file_handler.render_file_from_template( - s, - "doc", - datasets=datasets, - template_name=template.name - if template.name == "multimodel" - else "custom", - template_name_long=template.name_long, - template_scripts=template.scripts, - output_folder=output_folder, - job_file=job_file, - ) - file_handler.add_file(t_docs, s) - - # print summary to console - file_handler.print_summary() - - # render file and return - return template.render( - { - "datasets": params, - "create_wordclouds": create_wordclouds, - "query_strategy": query_strategy, - "balance_strategy": balance_strategy, - "instances_per_query": instances_per_query, - "stop_if": stop_if, - "output_folder": output_folder, - "n_runs": n_runs, - "scripts_folder": scripts_folder, - "platform": platform_sys, - "version": __version__, - "all_classifiers": all_classifiers, - "all_feature_extractors": all_feature_extractors, - "impossible_models": [i.split(",") for i in impossible_models], - } - ) - -"""Render multimodel template.""" - from asreviewcontrib.makita.template_base import RenderTemplateBase @@ -130,31 +8,36 @@ class RenderJobsMultiModel(RenderTemplateBase): def __init__(self, *args, **kwargs): self.n_runs = kwargs.pop('n_runs', 1) - self.classifier = kwargs.pop('classifier', "nb") - self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") + self.all_classifiers = kwargs.pop('all_classifiers', + ["logistic", "nb", "rf", "svm"]) + self.all_feature_extractors = kwargs.pop('all_feature_extractors', + ["doc2vec", "sbert", "tfidf"]) + self.impossible_models = kwargs.pop('impossible_models', + ["nb,doc2vec", "nb,sbert"]) super().__init__(*args, **kwargs) - def prepare_dataset_params(self, index, fp_dataset): + def get_dynamic_params(self, index, fp_dataset): return { "input_file": fp_dataset.as_posix(), "input_file_stem": fp_dataset.stem, "model_seed": self.model_seed + index, "init_seed": self.init_seed, - "n_runs": self.n_runs, } - def prepare_template_params(self, params): + def get_static_params(self, params): return { "datasets": params, "create_wordclouds": self.create_wordclouds, - "classifier": self.classifier, - "feature_extractor": self.feature_extractor, "query_strategy": self.query_strategy, "balance_strategy": self.balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, + "n_runs": self.n_runs, "scripts_folder": self.scripts_folder, "platform": self.platform_sys, "version": self.__version__, + "all_classifiers": self.all_classifiers, + "all_feature_extractors": self.all_feature_extractors, + "impossible_models": [i.split(",") for i in self.impossible_models], } From f84c2376f217b289d1c378620c878e94fab5acae Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:12:12 +0100 Subject: [PATCH 10/95] Update template_basic.py --- asreviewcontrib/makita/template_basic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 97389fc5..88cc21bb 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -13,6 +13,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_dynamic_params(self, index, fp_dataset): + """Prepare dataset-specific parameters. These parameters are provided to the + template once for each dataset.""" + return { "input_file": fp_dataset.as_posix(), "input_file_stem": fp_dataset.stem, @@ -22,6 +25,9 @@ def get_dynamic_params(self, index, fp_dataset): } def get_static_params(self, params): + """Prepare template-specific parameters. These parameters are provided to the + template only once.""" + return { "datasets": params, "create_wordclouds": self.create_wordclouds, From a71bc39889d96f9d81e456ad7d21eed2ad9a6134 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:21:16 +0100 Subject: [PATCH 11/95] lowcase fix --- asreviewcontrib/makita/entrypoint.py | 28 +++++++++++++++++-------- asreviewcontrib/makita/template_arfi.py | 2 +- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 6fa6714a..53f6c7bd 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -203,6 +203,9 @@ def _template(self, args): if args.name == "multiple_models": args.name = "multimodel" + # lowcase name + args.name = args.name.lower() + # check if a custom template is used, otherwise use the default template fp_template = args.template or (args.name and _get_template_fp(args.name)) _is_valid_template(fp_template) @@ -283,15 +286,22 @@ def _template(self, args): ).render() else: - job = RenderJobsBasic( - datasets, - output_folder=Path(args.o), - init_seed=args.init_seed, - model_seed=args.model_seed, - fp_template=fp_template, - job_file=args.job_file, - platform_sys=args.platform, - ).render() + try: + job = RenderJobsBasic( + datasets, + output_folder=Path(args.o), + create_wordclouds=args.no_wordclouds, + allow_overwrite=args.overwrite, + init_seed=args.init_seed, + model_seed=args.model_seed, + stop_if=args.stop_if, + fp_template=fp_template, + job_file=args.job_file, + platform_sys=args.platform, + ).render() + except Exception: + print(f"\u001b[31mERROR: Template {args.name} not found.\u001b[0m") + return if args.platform == "Windows" or (args.platform is None and os.name == "nt"): job = _shell_to_batch(job) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 08c2dbc5..a68eac3a 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -7,7 +7,7 @@ class RenderJobsARFI(RenderTemplateBase): - template_name = "ARFI" + template_name = "arfi" def __init__(self, *args, **kwargs): self.n_runs = kwargs.pop('n_runs', 1) From 152f71978487a873323c187714f659f5d4b6e766 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:24:57 +0100 Subject: [PATCH 12/95] Update ruff workflow --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 5c257afa..be3e7cd4 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -51,4 +51,4 @@ jobs: asreview makita add-script --all - name: Lint python with ruff run: | - ruff . + ruff check . From ecea6495e3ed98a3740c6ee0f517e38178850905 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:25:06 +0100 Subject: [PATCH 13/95] Noqa base tempalte --- asreviewcontrib/makita/template_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 60192f40..d6740560 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -28,7 +28,7 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", self.instances_per_query = instances_per_query self.stop_if = stop_if self.fp_template = fp_template - self.job_file = job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" + self.job_file = job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" # noqa self.platform_sys = platform_sys if platform_sys else platform.system() self.file_handler = FileHandler(allow_overwrite) self.template = ConfigTemplate(fp_template) From c1b036afe27944a40861672c32675c29978c6c74 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:29:22 +0100 Subject: [PATCH 14/95] Update workflows --- .github/workflows/ci-workflow.yml | 6 +++--- .github/workflows/pythonpackage.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index be3e7cd4..8aa54b79 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -7,10 +7,10 @@ jobs: os: [macos-latest, windows-latest, ubuntu-latest] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@master - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.x' architecture: 'x64' - name: Install makita run: | diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index effda0e0..ab84a01d 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -13,9 +13,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies From 973ab2f6af537a1073846eda8c98607af5d68e88 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:29:41 +0100 Subject: [PATCH 15/95] move dynamic parameter collection to render func --- asreviewcontrib/makita/template_base.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index d6740560..2799a288 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -43,14 +43,6 @@ def get_dynamic_params(self, index, fp_dataset): def get_static_params(self, params): raise NotImplementedError("Subclasses should implement this method to prepare template-specific parameters.") # noqa - def collect_dynamic_params(self): - params = [] - for i, fp_dataset in enumerate(sorted(self.datasets)): - check_filename_dataset(fp_dataset) - fp_dataset = Path(fp_dataset) - params.append(self.get_dynamic_params(i, fp_dataset)) - return params - def render_scripts(self, scripts: list): for s in scripts: t_script = self.file_handler.render_file_from_template( @@ -75,22 +67,34 @@ def render_docs(self, docs: list): self.file_handler.add_file(t_docs, s) def render(self): + """Render the template.""" + + # render scripts if self.template.scripts: self.render_scripts(self.template.scripts) + # render docs if self.template.docs: self.render_docs(self.template.docs) + # collect dynamic parameters + params = [] + for i, fp_dataset in enumerate(sorted(self.datasets)): + check_filename_dataset(fp_dataset) + fp_dataset = Path(fp_dataset) + params.append(self.get_dynamic_params(i, fp_dataset)) + + # render template try: rendered_output = self.template.render( - self.get_static_params(self.collect_dynamic_params()) + self.get_static_params(params) ) except Exception as e: if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": if self.template_name is None: print("\033[31mERROR: A rendering exception occurred -", e) print("The rendering process failed due to undefined parameters in the template.") # noqa - print("\033[33mPlease verify that the chosen base template is compatible with the selected template.\033[0m") # noqa + print("\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m") # noqa exit(1) else: raise From e36e46cce26e9845df7601f148c66c92d04f03d4 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:45:45 +0100 Subject: [PATCH 16/95] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 7fd234d8..f8277982 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ optional arguments: --platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates. --n_runs N_RUNS Number of runs. Default: 1. --no_wordclouds Disables the generation of wordclouds. + --overwrite Automatically accepts all overwrite requests. --classifier CLASSIFIER Classifier to use. Default: nb. --feature_extractor FEATURE_EXTRACTOR Feature_extractor to use. Default: tfidf. --query_strategy QUERY_STRATEGY Query strategy to use. Default: max. @@ -148,6 +149,7 @@ optional arguments: --platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates. --n_priors N_PRIORS Number of priors. Default: 10. --no_wordclouds Disables the generation of wordclouds. + --overwrite Automatically accepts all overwrite requests. --classifier CLASSIFIER Classifier to use. Default: nb. --feature_extractor FEATURE_EXTRACTOR Feature_extractor to use. Default: tfidf. --query_strategy QUERY_STRATEGY Query strategy to use. Default: max. @@ -175,6 +177,7 @@ optional arguments: --platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates. --n_runs N_RUNS Number of runs. Default: 1. --no_wordclouds Disables the generation of wordclouds. + --overwrite Automatically accepts all overwrite requests. --query_strategy QUERY_STRATEGY Query strategy to use. Default: max. --balance_strategy BALANCE_STRATEGY Balance strategy to use. Default: double. --instances_per_query INSTANCES_PER_QUERY Number of instances per query. Default: 1. From 10e68359badace52ebe9b3a8de7e3854719e941d Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 00:46:01 +0100 Subject: [PATCH 17/95] Add some text --- asreviewcontrib/makita/entrypoint.py | 5 ++-- asreviewcontrib/makita/template_arfi.py | 6 +++++ asreviewcontrib/makita/template_base.py | 25 +++++++++++++------ asreviewcontrib/makita/template_multimodel.py | 8 +++++- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 53f6c7bd..1b1df6c1 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -299,9 +299,8 @@ def _template(self, args): job_file=args.job_file, platform_sys=args.platform, ).render() - except Exception: - print(f"\u001b[31mERROR: Template {args.name} not found.\u001b[0m") - return + except Exception as e: + raise e if args.platform == "Windows" or (args.platform is None and os.name == "nt"): job = _shell_to_batch(job) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index a68eac3a..8c67a4ef 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -17,6 +17,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_dynamic_params(self, index, fp_dataset): + """Prepare dataset-specific parameters. These parameters are provided to the + template once for each dataset.""" + priors = _get_priors(fp_dataset, init_seed=self.init_seed + index, n_priors=self.n_priors) @@ -28,6 +31,9 @@ def get_dynamic_params(self, index, fp_dataset): } def get_static_params(self, params): + """Prepare template-specific parameters. These parameters are provided to the + template only once.""" + return { "datasets": params, "create_wordclouds": self.create_wordclouds, diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 2799a288..9396f0fb 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -38,12 +38,20 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", assert self.fp_template is not None, "Template file is None." def get_dynamic_params(self, index, fp_dataset): + """Prepare dataset-specific parameters. These parameters are provided to the + template once for each dataset.""" + raise NotImplementedError("Subclasses should implement this method to prepare dataset-specific parameters.") # noqa def get_static_params(self, params): + """Prepare template-specific parameters. These parameters are provided to the + template only once.""" + raise NotImplementedError("Subclasses should implement this method to prepare template-specific parameters.") # noqa def render_scripts(self, scripts: list): + """Render scripts.""" + for s in scripts: t_script = self.file_handler.render_file_from_template( s, "script", output_folder=self.output_folder @@ -52,6 +60,8 @@ def render_scripts(self, scripts: list): self.file_handler.add_file(t_script, export_fp) def render_docs(self, docs: list): + """Render docs.""" + for s in docs: t_docs = self.file_handler.render_file_from_template( s, @@ -67,7 +77,7 @@ def render_docs(self, docs: list): self.file_handler.add_file(t_docs, s) def render(self): - """Render the template.""" + """Render template.""" # render scripts if self.template.scripts: @@ -91,13 +101,12 @@ def render(self): ) except Exception as e: if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": - if self.template_name is None: - print("\033[31mERROR: A rendering exception occurred -", e) - print("The rendering process failed due to undefined parameters in the template.") # noqa - print("\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m") # noqa - exit(1) - else: - raise + print("\033[31mERROR: A rendering exception occurred -", e) + print("The rendering process failed due to undefined parameters in the template.") # noqa + print("\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m") # noqa + exit(1) + else: + raise e self.file_handler.print_summary() return rendered_output diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index fd519f2d..0caa91d0 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -9,7 +9,7 @@ class RenderJobsMultiModel(RenderTemplateBase): def __init__(self, *args, **kwargs): self.n_runs = kwargs.pop('n_runs', 1) self.all_classifiers = kwargs.pop('all_classifiers', - ["logistic", "nb", "rf", "svm"]) + ["logistic", "nb", "rf"]) self.all_feature_extractors = kwargs.pop('all_feature_extractors', ["doc2vec", "sbert", "tfidf"]) self.impossible_models = kwargs.pop('impossible_models', @@ -17,6 +17,9 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def get_dynamic_params(self, index, fp_dataset): + """Prepare dataset-specific parameters. These parameters are provided to the + template once for each dataset.""" + return { "input_file": fp_dataset.as_posix(), "input_file_stem": fp_dataset.stem, @@ -25,6 +28,9 @@ def get_dynamic_params(self, index, fp_dataset): } def get_static_params(self, params): + """Prepare template-specific parameters. These parameters are provided to the + template only once.""" + return { "datasets": params, "create_wordclouds": self.create_wordclouds, From 295762278cd73c99d173d9b652a83ca24d7139f2 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 01:33:41 +0100 Subject: [PATCH 18/95] refactor template finder --- asreviewcontrib/makita/entrypoint.py | 57 ++++++++++++------------- asreviewcontrib/makita/template_base.py | 4 +- asreviewcontrib/makita/utils.py | 15 ------- 3 files changed, 29 insertions(+), 47 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 1b1df6c1..49aa929e 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -12,17 +12,6 @@ from asreviewcontrib.makita.utils import FileHandler -def _get_template_fp(name): - return Path(TEMPLATES_FP, f"template_{name}.txt.template") - - -def _is_valid_template(fp): - if fp and Path(fp).is_file(): - return True - else: - raise ValueError(f"Template {fp} not found") - - def _shell_to_batch(job): job = f"@ echo off\nCOLOR E0{job}" job = job.replace("#", "::") @@ -203,12 +192,22 @@ def _template(self, args): if args.name == "multiple_models": args.name = "multimodel" - # lowcase name + # lowercase name args.name = args.name.lower() - # check if a custom template is used, otherwise use the default template - fp_template = args.template or (args.name and _get_template_fp(args.name)) - _is_valid_template(fp_template) + # check if the template exists + fp_template = Path(TEMPLATES_FP, f"template_{args.name}.txt.template") + if not fp_template.is_file(): + raise ValueError(f"Template {args.name} not found") + + # if a custom template is provided, check if it exists + if args.template: + fp_template = Path(args.template) + if not fp_template.is_file(): + raise ValueError(f"Custom template {args.template} not found") + print(f"\033[33mRendering custom template {args.template}.\u001b[0m\n") + else: + print(f"\033[33mRendering template {args.name}.\u001b[0m\n") # load datasets datasets = ( @@ -286,21 +285,19 @@ def _template(self, args): ).render() else: - try: - job = RenderJobsBasic( - datasets, - output_folder=Path(args.o), - create_wordclouds=args.no_wordclouds, - allow_overwrite=args.overwrite, - init_seed=args.init_seed, - model_seed=args.model_seed, - stop_if=args.stop_if, - fp_template=fp_template, - job_file=args.job_file, - platform_sys=args.platform, - ).render() - except Exception as e: - raise e + print("\033[33mUsing with basic template.\u001b[0m\n") + job = RenderJobsBasic( + datasets, + output_folder=Path(args.o), + create_wordclouds=args.no_wordclouds, + allow_overwrite=args.overwrite, + init_seed=args.init_seed, + model_seed=args.model_seed, + stop_if=args.stop_if, + fp_template=fp_template, + job_file=args.job_file, + platform_sys=args.platform, + ).render() if args.platform == "Windows" or (args.platform is None and os.name == "nt"): job = _shell_to_batch(job) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 9396f0fb..d6959cf5 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -8,7 +8,6 @@ from asreviewcontrib.makita import __version__ from asreviewcontrib.makita.utils import FileHandler -from asreviewcontrib.makita.utils import check_filename_dataset class RenderTemplateBase: @@ -90,7 +89,8 @@ def render(self): # collect dynamic parameters params = [] for i, fp_dataset in enumerate(sorted(self.datasets)): - check_filename_dataset(fp_dataset) + if " " in Path(fp_dataset).stem: + raise ValueError(f"Dataset filename '{fp_dataset}' cannot contain whitespace.") fp_dataset = Path(fp_dataset) params.append(self.get_dynamic_params(i, fp_dataset)) diff --git a/asreviewcontrib/makita/utils.py b/asreviewcontrib/makita/utils.py index f3478d0d..d32b1902 100644 --- a/asreviewcontrib/makita/utils.py +++ b/asreviewcontrib/makita/utils.py @@ -85,18 +85,3 @@ def render_file_from_template(self, name, file_type, **kwargs): template = Template(f.read()) return template.render({**params, **kwargs}) - - -def check_filename_dataset(fp): - """ - Check if the filename of the dataset contains any whitespace. - - Args: - fp (str): The file path of the dataset. - - Raises: - ValueError: If the filename of the dataset contains whitespace. - """ - - if " " in Path(fp).stem: - raise ValueError(f"Dataset filename '{fp}' cannot contain whitespace.") From c005ccea2efcf8d3635eb955d12f18a59f938998 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 01:38:42 +0100 Subject: [PATCH 19/95] Rewrite default makita console output --- asreviewcontrib/makita/entrypoint.py | 6 +++++- asreviewcontrib/makita/utils.py | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 49aa929e..0d0fd10d 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -205,7 +205,11 @@ def _template(self, args): fp_template = Path(args.template) if not fp_template.is_file(): raise ValueError(f"Custom template {args.template} not found") - print(f"\033[33mRendering custom template {args.template}.\u001b[0m\n") + + # print rendering message + if args.template: + print(f"\ +\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n") else: print(f"\033[33mRendering template {args.name}.\u001b[0m\n") diff --git a/asreviewcontrib/makita/utils.py b/asreviewcontrib/makita/utils.py index d32b1902..e500dac6 100644 --- a/asreviewcontrib/makita/utils.py +++ b/asreviewcontrib/makita/utils.py @@ -51,7 +51,8 @@ def allow_overwrite(): with open(export_fp, "w") as f: f.write(content) - print(f"Added {export_fp}") + print(f"Created {export_fp}") + self.total_files += 1 def print_summary(self): @@ -59,7 +60,7 @@ def print_summary(self): Print the total number of files created by the FileHandler object. """ - print(f"{self.total_files} file(s) created.") + print(f"\n{self.total_files} file(s) created.") def render_file_from_template(self, name, file_type, **kwargs): """ @@ -78,7 +79,6 @@ def render_file_from_template(self, name, file_type, **kwargs): "version": __version__, } - print(f"Loading {file_type} {name}") # open template with open(Path(TEMPLATES_FP, f"{file_type}_{name}.template")) as f: From f7830e6fa91a21c875bf5687b3b402eff1374152 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 01:39:12 +0100 Subject: [PATCH 20/95] Ruff! --- asreviewcontrib/makita/template_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index d6959cf5..8ad53b86 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -90,7 +90,7 @@ def render(self): params = [] for i, fp_dataset in enumerate(sorted(self.datasets)): if " " in Path(fp_dataset).stem: - raise ValueError(f"Dataset filename '{fp_dataset}' cannot contain whitespace.") + raise ValueError(f"Dataset filename '{fp_dataset}' cannot contain whitespace.") # noqa fp_dataset = Path(fp_dataset) params.append(self.get_dynamic_params(i, fp_dataset)) From d5bc734879911cd21029f3e3b2dd1bec162a8580 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Sat, 30 Mar 2024 01:39:59 +0100 Subject: [PATCH 21/95] Ruff! 2 --- asreviewcontrib/makita/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asreviewcontrib/makita/utils.py b/asreviewcontrib/makita/utils.py index e500dac6..4e566b1c 100644 --- a/asreviewcontrib/makita/utils.py +++ b/asreviewcontrib/makita/utils.py @@ -79,7 +79,6 @@ def render_file_from_template(self, name, file_type, **kwargs): "version": __version__, } - # open template with open(Path(TEMPLATES_FP, f"{file_type}_{name}.template")) as f: template = Template(f.read()) From aa0868bb133d8c07de969a86b369f1bb99597ebd Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 12:52:47 +0200 Subject: [PATCH 22/95] Rename Template Classes --- asreviewcontrib/makita/entrypoint.py | 20 +++++++++---------- asreviewcontrib/makita/template_arfi.py | 4 ++-- asreviewcontrib/makita/template_base.py | 2 +- asreviewcontrib/makita/template_basic.py | 4 ++-- asreviewcontrib/makita/template_multimodel.py | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 0d0fd10d..15b2b8cf 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -6,9 +6,9 @@ from asreviewcontrib.makita import __version__ from asreviewcontrib.makita.config import TEMPLATES_FP -from asreviewcontrib.makita.template_arfi import RenderJobsARFI -from asreviewcontrib.makita.template_basic import RenderJobsBasic -from asreviewcontrib.makita.template_multimodel import RenderJobsMultiModel +from asreviewcontrib.makita.template_arfi import TemplateARFI +from asreviewcontrib.makita.template_basic import TemplateBasic +from asreviewcontrib.makita.template_multimodel import TemplateMultiModel from asreviewcontrib.makita.utils import FileHandler @@ -227,8 +227,8 @@ def _template(self, args): # create output folder Path(args.o).parent.mkdir(parents=True, exist_ok=True) - if args.name in [RenderJobsBasic.template_name]: - job = RenderJobsBasic( + if args.name in [TemplateBasic.template_name]: + job = TemplateBasic( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, @@ -247,8 +247,8 @@ def _template(self, args): platform_sys=args.platform, ).render() - elif args.name in [RenderJobsARFI.template_name]: - job = RenderJobsARFI( + elif args.name in [TemplateARFI.template_name]: + job = TemplateARFI( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, @@ -267,8 +267,8 @@ def _template(self, args): platform_sys=args.platform, ).render() - elif args.name in [RenderJobsMultiModel.template_name]: - job = RenderJobsMultiModel( + elif args.name in [TemplateMultiModel.template_name]: + job = TemplateMultiModel( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, @@ -290,7 +290,7 @@ def _template(self, args): else: print("\033[33mUsing with basic template.\u001b[0m\n") - job = RenderJobsBasic( + job = TemplateBasic( datasets, output_folder=Path(args.o), create_wordclouds=args.no_wordclouds, diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 8c67a4ef..15afe859 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -3,10 +3,10 @@ import numpy as np from asreview.data import ASReviewData -from asreviewcontrib.makita.template_base import RenderTemplateBase +from asreviewcontrib.makita.template_base import TemplateBase -class RenderJobsARFI(RenderTemplateBase): +class TemplateARFI(TemplateBase): template_name = "arfi" def __init__(self, *args, **kwargs): diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 8ad53b86..4a1d8ad6 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -10,7 +10,7 @@ from asreviewcontrib.makita.utils import FileHandler -class RenderTemplateBase: +class TemplateBase: def __init__(self, datasets, output_folder="output", scripts_folder="scripts", create_wordclouds=True, allow_overwrite=False, init_seed=535, model_seed=165, query_strategy="max", balance_strategy="double", diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 88cc21bb..fca4362c 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -1,9 +1,9 @@ """Render basic template.""" -from asreviewcontrib.makita.template_base import RenderTemplateBase +from asreviewcontrib.makita.template_base import TemplateBase -class RenderJobsBasic(RenderTemplateBase): +class TemplateBasic(TemplateBase): template_name = "basic" def __init__(self, *args, **kwargs): diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 0caa91d0..7b9b2573 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -1,9 +1,9 @@ """Render multimodel template.""" -from asreviewcontrib.makita.template_base import RenderTemplateBase +from asreviewcontrib.makita.template_base import TemplateBase -class RenderJobsMultiModel(RenderTemplateBase): +class TemplateMultiModel(TemplateBase): template_name = "multimodel" def __init__(self, *args, **kwargs): From 7002fa9b8cc2cdbf5902c43ffffb4b976880479a Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 12:57:53 +0200 Subject: [PATCH 23/95] Format with Ruff --- asreviewcontrib/makita/entrypoint.py | 25 ++++------ asreviewcontrib/makita/template_arfi.py | 14 +++--- asreviewcontrib/makita/template_base.py | 50 +++++++++++++------ asreviewcontrib/makita/template_basic.py | 6 +-- asreviewcontrib/makita/template_multimodel.py | 15 +++--- 5 files changed, 65 insertions(+), 45 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 15b2b8cf..d1e805cd 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -84,13 +84,12 @@ def execute(self, argv): # noqa: C901 "--n_priors", type=int, default=10, - help="Number of priors. Only for template 'arfi'. " - "Default: 10.", + help="Number of priors. Only for template 'arfi'. " "Default: 10.", ) parser_template.add_argument( "--no_wordclouds", action="store_false", - help="Disables the generation of wordclouds. " + help="Disables the generation of wordclouds. ", ) parser_template.add_argument( "--overwrite", @@ -115,22 +114,19 @@ def execute(self, argv): # noqa: C901 "--query_strategy", type=str, default="max", - help="Query strategy to use. " - "Default: max.", + help="Query strategy to use. " "Default: max.", ) parser_template.add_argument( "--balance_strategy", type=str, default="double", - help="Balance strategy to use. " - "Default: double.", + help="Balance strategy to use. " "Default: double.", ) parser_template.add_argument( "--instances_per_query", type=int, default=1, - help="Number of instances per query. " - "Default: 1.", + help="Number of instances per query. " "Default: 1.", ) parser_template.add_argument( "--stop_if", @@ -188,13 +184,13 @@ def _template_cli(self, args): def _template(self, args): """Generate a template.""" + # lowercase name + args.name = args.name.lower() + # backwards compatibility for 'multiple_models' if args.name == "multiple_models": args.name = "multimodel" - # lowercase name - args.name = args.name.lower() - # check if the template exists fp_template = Path(TEMPLATES_FP, f"template_{args.name}.txt.template") if not fp_template.is_file(): @@ -208,8 +204,9 @@ def _template(self, args): # print rendering message if args.template: - print(f"\ -\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n") + print( + f"\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n" + ) # noqa else: print(f"\033[33mRendering template {args.name}.\u001b[0m\n") diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 15afe859..e00d317b 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -10,19 +10,19 @@ class TemplateARFI(TemplateBase): template_name = "arfi" def __init__(self, *args, **kwargs): - self.n_runs = kwargs.pop('n_runs', 1) - self.classifier = kwargs.pop('classifier', "nb") - self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") - self.n_priors = kwargs.pop('n_priors', 10) + self.n_runs = kwargs.pop("n_runs", 1) + self.classifier = kwargs.pop("classifier", "nb") + self.feature_extractor = kwargs.pop("feature_extractor", "tfidf") + self.n_priors = kwargs.pop("n_priors", 10) super().__init__(*args, **kwargs) def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" - priors = _get_priors(fp_dataset, - init_seed=self.init_seed + index, - n_priors=self.n_priors) + priors = _get_priors( + fp_dataset, init_seed=self.init_seed + index, n_priors=self.n_priors + ) return { "input_file": fp_dataset.as_posix(), "input_file_stem": fp_dataset.stem, diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 4a1d8ad6..1eacb6d0 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -11,11 +11,23 @@ class TemplateBase: - def __init__(self, datasets, output_folder="output", scripts_folder="scripts", - create_wordclouds=True, allow_overwrite=False, init_seed=535, - model_seed=165, query_strategy="max", balance_strategy="double", - instances_per_query=1, stop_if='min', fp_template=None, job_file=None, - platform_sys=None): + def __init__( + self, + datasets, + output_folder="output", + scripts_folder="scripts", + create_wordclouds=True, + allow_overwrite=False, + init_seed=535, + model_seed=165, + query_strategy="max", + balance_strategy="double", + instances_per_query=1, + stop_if="min", + fp_template=None, + job_file=None, + platform_sys=None, + ): self.datasets = datasets self.output_folder = output_folder self.scripts_folder = scripts_folder @@ -27,7 +39,9 @@ def __init__(self, datasets, output_folder="output", scripts_folder="scripts", self.instances_per_query = instances_per_query self.stop_if = stop_if self.fp_template = fp_template - self.job_file = job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" # noqa + self.job_file = ( + job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" + ) # noqa self.platform_sys = platform_sys if platform_sys else platform.system() self.file_handler = FileHandler(allow_overwrite) self.template = ConfigTemplate(fp_template) @@ -40,13 +54,17 @@ def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" - raise NotImplementedError("Subclasses should implement this method to prepare dataset-specific parameters.") # noqa + raise NotImplementedError( + "Subclasses should implement this method to prepare dataset-specific parameters." + ) # noqa def get_static_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" - raise NotImplementedError("Subclasses should implement this method to prepare template-specific parameters.") # noqa + raise NotImplementedError( + "Subclasses should implement this method to prepare template-specific parameters." + ) # noqa def render_scripts(self, scripts: list): """Render scripts.""" @@ -90,20 +108,24 @@ def render(self): params = [] for i, fp_dataset in enumerate(sorted(self.datasets)): if " " in Path(fp_dataset).stem: - raise ValueError(f"Dataset filename '{fp_dataset}' cannot contain whitespace.") # noqa + raise ValueError( + f"Dataset filename '{fp_dataset}' cannot contain whitespace." + ) # noqa fp_dataset = Path(fp_dataset) params.append(self.get_dynamic_params(i, fp_dataset)) # render template try: - rendered_output = self.template.render( - self.get_static_params(params) - ) + rendered_output = self.template.render(self.get_static_params(params)) except Exception as e: if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": print("\033[31mERROR: A rendering exception occurred -", e) - print("The rendering process failed due to undefined parameters in the template.") # noqa - print("\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m") # noqa + print( + "The rendering process failed due to undefined parameters in the template." + ) # noqa + print( + "\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m" + ) # noqa exit(1) else: raise e diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index fca4362c..a3941cf5 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -7,9 +7,9 @@ class TemplateBasic(TemplateBase): template_name = "basic" def __init__(self, *args, **kwargs): - self.n_runs = kwargs.pop('n_runs', 1) - self.classifier = kwargs.pop('classifier', "nb") - self.feature_extractor = kwargs.pop('feature_extractor', "tfidf") + self.n_runs = kwargs.pop("n_runs", 1) + self.classifier = kwargs.pop("classifier", "nb") + self.feature_extractor = kwargs.pop("feature_extractor", "tfidf") super().__init__(*args, **kwargs) def get_dynamic_params(self, index, fp_dataset): diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 7b9b2573..f13a2993 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -7,13 +7,14 @@ class TemplateMultiModel(TemplateBase): template_name = "multimodel" def __init__(self, *args, **kwargs): - self.n_runs = kwargs.pop('n_runs', 1) - self.all_classifiers = kwargs.pop('all_classifiers', - ["logistic", "nb", "rf"]) - self.all_feature_extractors = kwargs.pop('all_feature_extractors', - ["doc2vec", "sbert", "tfidf"]) - self.impossible_models = kwargs.pop('impossible_models', - ["nb,doc2vec", "nb,sbert"]) + self.n_runs = kwargs.pop("n_runs", 1) + self.all_classifiers = kwargs.pop("all_classifiers", ["logistic", "nb", "rf"]) + self.all_feature_extractors = kwargs.pop( + "all_feature_extractors", ["doc2vec", "sbert", "tfidf"] + ) + self.impossible_models = kwargs.pop( + "impossible_models", ["nb,doc2vec", "nb,sbert"] + ) super().__init__(*args, **kwargs) def get_dynamic_params(self, index, fp_dataset): From 97cf357ecae0d5a3c2821cdc4af0625e4c60c443 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 13:01:35 +0200 Subject: [PATCH 24/95] Update noqa --- asreviewcontrib/makita/entrypoint.py | 4 ++-- asreviewcontrib/makita/template_base.py | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index d1e805cd..f572ce2b 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -205,8 +205,8 @@ def _template(self, args): # print rendering message if args.template: print( - f"\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n" - ) # noqa + f"\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n" # noqa + ) else: print(f"\033[33mRendering template {args.name}.\u001b[0m\n") diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 1eacb6d0..7d529f1b 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -55,16 +55,16 @@ def get_dynamic_params(self, index, fp_dataset): template once for each dataset.""" raise NotImplementedError( - "Subclasses should implement this method to prepare dataset-specific parameters." - ) # noqa + "Subclasses should implement this method to prepare dataset-specific parameters." # noqa + ) def get_static_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" raise NotImplementedError( - "Subclasses should implement this method to prepare template-specific parameters." - ) # noqa + "Subclasses should implement this method to prepare template-specific parameters." # noqa + ) def render_scripts(self, scripts: list): """Render scripts.""" @@ -121,11 +121,11 @@ def render(self): if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": print("\033[31mERROR: A rendering exception occurred -", e) print( - "The rendering process failed due to undefined parameters in the template." - ) # noqa + "The rendering process failed due to undefined parameters in the template." # noqa + ) print( - "\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m" - ) # noqa + "\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m" # noqa + ) exit(1) else: raise e From dd14db44cc92b450163ef2905018aac806e96c71 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 13:04:02 +0200 Subject: [PATCH 25/95] noqa E501 --- asreviewcontrib/makita/entrypoint.py | 2 +- asreviewcontrib/makita/template_base.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index f572ce2b..bc762490 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -205,7 +205,7 @@ def _template(self, args): # print rendering message if args.template: print( - f"\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n" # noqa + f"\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n" # noqa: E501 ) else: print(f"\033[33mRendering template {args.name}.\u001b[0m\n") diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 7d529f1b..dbdea603 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -55,7 +55,7 @@ def get_dynamic_params(self, index, fp_dataset): template once for each dataset.""" raise NotImplementedError( - "Subclasses should implement this method to prepare dataset-specific parameters." # noqa + "Subclasses should implement this method to prepare dataset-specific parameters." # noqa: E501 ) def get_static_params(self, params): @@ -63,7 +63,7 @@ def get_static_params(self, params): template only once.""" raise NotImplementedError( - "Subclasses should implement this method to prepare template-specific parameters." # noqa + "Subclasses should implement this method to prepare template-specific parameters." # noqa: E501 ) def render_scripts(self, scripts: list): @@ -121,10 +121,10 @@ def render(self): if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": print("\033[31mERROR: A rendering exception occurred -", e) print( - "The rendering process failed due to undefined parameters in the template." # noqa + "The rendering process failed due to undefined parameters in the template." # noqa: E501 ) print( - "\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m" # noqa + "\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m" # noqa: E501 ) exit(1) else: From bcbe68c74f8a0160b1b53cd8d5858ac4f4015e5e Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:08:17 +0200 Subject: [PATCH 26/95] Update add Fallback term to fallback print --- asreviewcontrib/makita/entrypoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index bc762490..cc7d11ec 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -286,7 +286,9 @@ def _template(self, args): ).render() else: - print("\033[33mUsing with basic template.\u001b[0m\n") + # Fallback to basic template + # This case can occur if a user adds a new template to the templates folder + print("\u001b[31mFallback: \033[33mUsing the basic template.\u001b[0m\n") job = TemplateBasic( datasets, output_folder=Path(args.o), From 05a04d2a6bc9baaee63b648b217d32e00444351d Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:15:40 +0200 Subject: [PATCH 27/95] format the scripts --- .../templates/script_get_plot.py.template | 30 +++++++------ ...script_get_settings_from_state.py.template | 19 ++------- .../script_merge_descriptives.py.template | 14 +++---- .../script_merge_metrics.py.template | 24 +++++------ .../templates/script_merge_tds.py.template | 42 ++++++++----------- 5 files changed, 55 insertions(+), 74 deletions(-) diff --git a/asreviewcontrib/makita/templates/script_get_plot.py.template b/asreviewcontrib/makita/templates/script_get_plot.py.template index 1108a226..4faf4a8a 100644 --- a/asreviewcontrib/makita/templates/script_get_plot.py.template +++ b/asreviewcontrib/makita/templates/script_get_plot.py.template @@ -34,10 +34,13 @@ def _set_legend(ax, state, legend_option, label_to_line, state_file): label = state_file.stem elif legend_option == "model": label = " - ".join( - [metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"]]) + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) elif legend_option == "classifier": label = metadata["settings"]["model"] else: @@ -82,27 +85,22 @@ def get_plot_from_states(states, filename, legend=None): _set_legend(ax, state, legend, label_to_line, state_file) if legend: - ax.legend(loc=4, prop={'size': 8}) + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) if __name__ == "__main__": - parser = argparse.ArgumentParser( description="Generate an ASReview plot from the found state files." ) + parser.add_argument("-s", type=str, help="States location") + parser.add_argument("-o", type=str, help="Output location") parser.add_argument( - "-s", - type=str, - help="States location") - parser.add_argument( - "-o", + "--show_legend", + "-l", type=str, - help="Output location") - parser.add_argument( - "--show_legend", "-l", - type=str, - help="Add a legend to the plot, based on the given parameter.") + help="Add a legend to the plot, based on the given parameter.", + ) args = parser.parse_args() # load states diff --git a/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template b/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template index d3f2f8c0..88bb47cc 100644 --- a/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template +++ b/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template @@ -36,23 +36,13 @@ def get_settings_from_state(state): return state.settings.to_dict() -if __name__ == '__main__': - - parser = argparse.ArgumentParser( - description='Convert ASReview state file to CSV' - ) - parser.add_argument( - 's', - type=str, - help='State file location') - parser.add_argument( - 'o', - type=str, - help='Export file location (json)') +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert ASReview state file to CSV") + parser.add_argument("s", type=str, help="State file location") + parser.add_argument("o", type=str, help="Export file location (json)") args = parser.parse_args() with open_state(args.s) as state: - result = get_settings_from_state(state) # store result in output folder @@ -60,4 +50,3 @@ if __name__ == '__main__': with open(Path(args.o), "w") as f: json.dump(result, f) - diff --git a/asreviewcontrib/makita/templates/script_merge_descriptives.py.template b/asreviewcontrib/makita/templates/script_merge_descriptives.py.template index eb5930f8..2b2992c5 100644 --- a/asreviewcontrib/makita/templates/script_merge_descriptives.py.template +++ b/asreviewcontrib/makita/templates/script_merge_descriptives.py.template @@ -35,10 +35,10 @@ def create_table_descriptives(datasets): for ds in datasets: with open(ds) as f: - data = json.load(f)['data']['items'] + data = json.load(f)["data"]["items"] values = {} for item in data: - values[item['id']] = item['value'] + values[item["id"]] = item["value"] stats.append(values) df = pd.DataFrame(stats, index=[Path(ds).name for ds in datasets]) @@ -46,7 +46,6 @@ def create_table_descriptives(datasets): if __name__ == "__main__": - parser = argparse.ArgumentParser( description="Merge descriptives of multiple files into single table." ) @@ -54,12 +53,14 @@ if __name__ == "__main__": "-s", type=str, default="{{ output_folder }}/simulation/*/descriptives/", - help="Datasets location") + help="Datasets location", + ) parser.add_argument( "-o", type=str, default="{{ output_folder }}/tables/data_descriptives_all.csv", - help="Output table location") + help="Output table location", + ) args = parser.parse_args() # load datasets @@ -75,5 +76,4 @@ if __name__ == "__main__": # store result in output folder Path(args.o).parent.mkdir(parents=True, exist_ok=True) result.to_csv(Path(args.o)) - result.to_excel(Path(args.o).with_suffix('.xlsx')) - + result.to_excel(Path(args.o).with_suffix(".xlsx")) diff --git a/asreviewcontrib/makita/templates/script_merge_metrics.py.template b/asreviewcontrib/makita/templates/script_merge_metrics.py.template index 83bd7efb..512857bb 100644 --- a/asreviewcontrib/makita/templates/script_merge_metrics.py.template +++ b/asreviewcontrib/makita/templates/script_merge_metrics.py.template @@ -33,25 +33,24 @@ def create_table_state_metrics(metric_files): for metric in metric_files: with open(metric) as f: - data = json.load(f)['data']['items'] + data = json.load(f)["data"]["items"] values = {} - values['file_name'] = Path(metric).name + values["file_name"] = Path(metric).name for item in data: - if item['id'] == 'td': + if item["id"] == "td": continue # check if value is a list - if item['value'] is not None and isinstance(item['value'], list): - for value in item['value']: - values[item['id'] + "_" + str(value[0])] = value[1] + if item["value"] is not None and isinstance(item["value"], list): + for value in item["value"]: + values[item["id"] + "_" + str(value[0])] = value[1] else: - values[item['id']] = item['value'] + values[item["id"]] = item["value"] metrics.append(values) return pd.DataFrame(metrics) if __name__ == "__main__": - parser = argparse.ArgumentParser( description="Merge metrics of multiple states into single table." ) @@ -59,12 +58,14 @@ if __name__ == "__main__": "-s", type=str, default="{{ output_folder }}/simulation/*/metrics/", - help="states location") + help="states location", + ) parser.add_argument( "-o", type=str, default="{{ output_folder }}/tables/metrics_sim_all.csv", - help="Output table location") + help="Output table location", + ) args = parser.parse_args() # load metric files @@ -80,5 +81,4 @@ if __name__ == "__main__": # store result in output folder Path(args.o).parent.mkdir(parents=True, exist_ok=True) result.to_csv(Path(args.o)) - result.to_excel(Path(args.o).with_suffix('.xlsx')) - + result.to_excel(Path(args.o).with_suffix(".xlsx")) diff --git a/asreviewcontrib/makita/templates/script_merge_tds.py.template b/asreviewcontrib/makita/templates/script_merge_tds.py.template index 28af7cc1..17971a1b 100644 --- a/asreviewcontrib/makita/templates/script_merge_tds.py.template +++ b/asreviewcontrib/makita/templates/script_merge_tds.py.template @@ -36,25 +36,28 @@ def create_table_state_tds(metrics): for metric in metrics: with open(metric) as f: - i = next(filter(lambda x: x['id'] == 'td', json.load(f)['data']['items']))['value'] # noqa + i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ + "value" + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 - df = pd.DataFrame(values, columns=['record_id', 'td', 'metric_file']) - pivoted = df.pivot_table(index='record_id', - columns='metric_file', - values='td', - aggfunc='first', - fill_value=nan) - pivoted.columns = [f'td_sim_{col}' for col in pivoted.columns] + df = pd.DataFrame(values, columns=["record_id", "td", "metric_file"]) + pivoted = df.pivot_table( + index="record_id", + columns="metric_file", + values="td", + aggfunc="first", + fill_value=nan, + ) + pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) - df['record_atd'] = df.mean(axis=1) - - df.loc['average_simulation_TD'] = df.iloc[:, :-1].mean(axis=0) + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) return df @@ -63,16 +66,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", - type=str, - required=True, - help="metrics location") - parser.add_argument( - "-o", - type=str, - required=True, - help="Output table location") + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -83,7 +78,7 @@ if __name__ == "__main__": raise FileNotFoundError("No metrics found in " + args.s) # check if output file has .csv extension - if Path(args.o).suffix != '.csv': + if Path(args.o).suffix != ".csv": raise ValueError("Output file should have .csv extension") td_table = create_table_state_tds(metric_files) @@ -92,5 +87,4 @@ if __name__ == "__main__": # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) atd_table.to_csv(Path(args.o)) - atd_table.to_excel(Path(args.o).with_suffix('.xlsx')) - + atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) From fd25f7ab0ad816c5091b315dff315a0323f86759 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:20:46 +0200 Subject: [PATCH 28/95] Extra space at the end of templates --- asreviewcontrib/makita/templates/script_get_plot.py.template | 1 + .../makita/templates/script_get_settings_from_state.py.template | 1 + .../makita/templates/script_merge_descriptives.py.template | 1 + .../makita/templates/script_merge_metrics.py.template | 1 + asreviewcontrib/makita/templates/script_merge_tds.py.template | 1 + 5 files changed, 5 insertions(+) diff --git a/asreviewcontrib/makita/templates/script_get_plot.py.template b/asreviewcontrib/makita/templates/script_get_plot.py.template index 4faf4a8a..48b5cc33 100644 --- a/asreviewcontrib/makita/templates/script_get_plot.py.template +++ b/asreviewcontrib/makita/templates/script_get_plot.py.template @@ -112,3 +112,4 @@ if __name__ == "__main__": # generate plot and save results get_plot_from_states(states, args.o, args.show_legend) + diff --git a/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template b/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template index 88bb47cc..dde02081 100644 --- a/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template +++ b/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template @@ -50,3 +50,4 @@ if __name__ == "__main__": with open(Path(args.o), "w") as f: json.dump(result, f) + diff --git a/asreviewcontrib/makita/templates/script_merge_descriptives.py.template b/asreviewcontrib/makita/templates/script_merge_descriptives.py.template index 2b2992c5..05998197 100644 --- a/asreviewcontrib/makita/templates/script_merge_descriptives.py.template +++ b/asreviewcontrib/makita/templates/script_merge_descriptives.py.template @@ -77,3 +77,4 @@ if __name__ == "__main__": Path(args.o).parent.mkdir(parents=True, exist_ok=True) result.to_csv(Path(args.o)) result.to_excel(Path(args.o).with_suffix(".xlsx")) + diff --git a/asreviewcontrib/makita/templates/script_merge_metrics.py.template b/asreviewcontrib/makita/templates/script_merge_metrics.py.template index 512857bb..ee16e5f7 100644 --- a/asreviewcontrib/makita/templates/script_merge_metrics.py.template +++ b/asreviewcontrib/makita/templates/script_merge_metrics.py.template @@ -82,3 +82,4 @@ if __name__ == "__main__": Path(args.o).parent.mkdir(parents=True, exist_ok=True) result.to_csv(Path(args.o)) result.to_excel(Path(args.o).with_suffix(".xlsx")) + diff --git a/asreviewcontrib/makita/templates/script_merge_tds.py.template b/asreviewcontrib/makita/templates/script_merge_tds.py.template index 17971a1b..b71db632 100644 --- a/asreviewcontrib/makita/templates/script_merge_tds.py.template +++ b/asreviewcontrib/makita/templates/script_merge_tds.py.template @@ -88,3 +88,4 @@ if __name__ == "__main__": Path(args.o).parent.mkdir(parents=True, exist_ok=True) atd_table.to_csv(Path(args.o)) atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) + From 0ead8413c51b7e54b3f7d842f0146bf7252be166 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:23:08 +0200 Subject: [PATCH 29/95] Update basic example --- examples/basic_example/README.md | 41 +++--- examples/basic_example/jobs.bat | 71 +++++++++ examples/basic_example/jobs.sh | 135 +++++++++--------- examples/basic_example/scripts/get_plot.py | 96 +++++++------ .../basic_example/scripts/merge_metrics.py | 5 +- examples/basic_example/scripts/merge_tds.py | 35 +++-- 6 files changed, 238 insertions(+), 145 deletions(-) create mode 100644 examples/basic_example/jobs.bat diff --git a/examples/basic_example/README.md b/examples/basic_example/README.md index 1f2f52fc..1249ffc6 100644 --- a/examples/basic_example/README.md +++ b/examples/basic_example/README.md @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -If wordcloud images are required, install the following dependencies. +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud @@ -30,10 +30,7 @@ The performance on the following datasets is evaluated: ## Run simulation To start the simulation, run the following command in the project directory. - -```sh -sh jobs.sh -``` +To start the simulation, run the `jobs.bat` file. ## Structure @@ -43,8 +40,8 @@ The following files are found in this project: ├── 📜README.md ├── 📜jobs.sh ├── 📂data - │ ├── 📜van_de_Schoot_2018.csv │ ├── 📜Smid_2020.csv + │ ├── 📜van_de_Schoot_2018.csv ├── 📂scripts │ ├── 📜get_plot.py │ ├── 📜merge_descriptives.py @@ -53,45 +50,45 @@ The following files are found in this project: │ └── 📜... └── 📂output ├── 📂simulation - | └── 📂van_de_Schoot_2018 + | └── 📂Smid_2020 | ├── 📂descriptives - | | └── 📜data_stats_van_de_Schoot_2018.json + | | └── 📜data_stats_Smid_2020.json | ├── 📂state_files - | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview + | | ├── 📜sim_Smid_2020_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json + | ├── 📜metrics_sim_Smid_2020_`x`.json | └── 📜... - | └── 📂Smid_2020 + | └── 📂van_de_Schoot_2018 | ├── 📂descriptives - | | └── 📜data_stats_Smid_2020.json + | | └── 📜data_stats_van_de_Schoot_2018.json | ├── 📂state_files - | | ├── 📜sim_Smid_2020_`x`.asreview + | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_Smid_2020_`x`.json + | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json | └── 📜... ├── 📂tables | ├── 📜data_descriptives.csv | ├── 📜data_descriptives.xlsx - | ├── 📜tds_sim_van_de_Schoot_2018.csv - | ├── 📜tds_sim_van_de_Schoot_2018.xlsx | ├── 📜tds_sim_Smid_2020.csv | ├── 📜tds_sim_Smid_2020.xlsx + | ├── 📜tds_sim_van_de_Schoot_2018.csv + | ├── 📜tds_sim_van_de_Schoot_2018.xlsx | ├── 📜tds_summary.csv | ├── 📜tds_summary.xlsx - | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv - | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx | ├── 📜metrics_sim_Smid_2020_metrics.csv | ├── 📜metrics_sim_Smid_2020_metrics.xlsx + | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv + | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx | ├── 📜metrics_summary.csv | └── 📜metrics_summary.xlsx └── 📂figures - ├── 📈plot_recall_van_de_Schoot_2018.png ├── 📈plot_recall_Smid_2020.png - ├── 📈wordcloud_van_de_Schoot_2018.png - ├── 📈wordcloud_relevant_van_de_Schoot_2018.png - └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png + ├── 📈plot_recall_van_de_Schoot_2018.png ├── 📈wordcloud_Smid_2020.png ├── 📈wordcloud_relevant_Smid_2020.png └── 📈wordcloud_irrelevant_Smid_2020.png + ├── 📈wordcloud_van_de_Schoot_2018.png + ├── 📈wordcloud_relevant_van_de_Schoot_2018.png + └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png diff --git a/examples/basic_example/jobs.bat b/examples/basic_example/jobs.bat new file mode 100644 index 00000000..e737c184 --- /dev/null +++ b/examples/basic_example/jobs.bat @@ -0,0 +1,71 @@ +@ echo off +COLOR E0 + +:: version 0.0.0 + +:: Create folder structure. By default, the folder 'output' is used to store output. +mkdir output +mkdir output\simulation +mkdir output\tables +mkdir output\tables\metrics +mkdir output\tables\time_to_discovery +mkdir output\figures + + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: Smid_2020 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +:: Create output folder +mkdir output\simulation\Smid_2020\ +mkdir output\simulation\Smid_2020\metrics + +:: Collect descriptives about the dataset Smid_2020 +mkdir output\simulation\Smid_2020\descriptives +python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant + +:: Simulate runs +mkdir output\simulation\Smid_2020\state_files +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview --init_seed 535 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_0.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png +python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv +python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: van_de_Schoot_2018 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +:: Create output folder +mkdir output\simulation\van_de_Schoot_2018\ +mkdir output\simulation\van_de_Schoot_2018\metrics + +:: Collect descriptives about the dataset van_de_Schoot_2018 +mkdir output\simulation\van_de_Schoot_2018\descriptives +python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant + +:: Simulate runs +mkdir output\simulation\van_de_Schoot_2018\state_files +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_0.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png +python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv +python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv + +:: Merge descriptives and metrics +python scripts\merge_descriptives.py +python scripts\merge_metrics.py diff --git a/examples/basic_example/jobs.sh b/examples/basic_example/jobs.sh index e5327cd9..e737c184 100644 --- a/examples/basic_example/jobs.sh +++ b/examples/basic_example/jobs.sh @@ -1,70 +1,71 @@ +@ echo off +COLOR E0 +:: version 0.0.0 -# version 0.0.0 - -# Create folder structure. By default, the folder 'output' is used to store output. +:: Create folder structure. By default, the folder 'output' is used to store output. mkdir output -mkdir output/simulation -mkdir output/tables -mkdir output/tables/metrics -mkdir output/tables/time_to_discovery -mkdir output/figures - - -################################## -### DATASET: Smid_2020 -################################## - -# Create output folder -mkdir output/simulation/Smid_2020/ -mkdir output/simulation/Smid_2020/metrics - -# Collect descriptives about the dataset Smid_2020 -mkdir output/simulation/Smid_2020/descriptives -asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json - -# Generate wordcloud visualizations of all datasets -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant - -# Simulate runs -mkdir output/simulation/Smid_2020/state_files -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_0.asreview --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_0.json - -# Generate plot and tables for dataset -python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png -python scripts/merge_metrics.py -s output/simulation/Smid_2020/metrics/ -o output/tables/metrics/metrics_sim_Smid_2020.csv -python scripts/merge_tds.py -s output/simulation/Smid_2020/metrics/ -o output/tables/time_to_discovery/tds_sim_Smid_2020.csv - -################################## -### DATASET: van_de_Schoot_2018 -################################## - -# Create output folder -mkdir output/simulation/van_de_Schoot_2018/ -mkdir output/simulation/van_de_Schoot_2018/metrics - -# Collect descriptives about the dataset van_de_Schoot_2018 -mkdir output/simulation/van_de_Schoot_2018/descriptives -asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json - -# Generate wordcloud visualizations of all datasets -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant - -# Simulate runs -mkdir output/simulation/van_de_Schoot_2018/state_files -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_0.json - -# Generate plot and tables for dataset -python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png -python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/metrics/metrics_sim_van_de_Schoot_2018.csv -python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv - -# Merge descriptives and metrics -python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv -python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv +mkdir output\simulation +mkdir output\tables +mkdir output\tables\metrics +mkdir output\tables\time_to_discovery +mkdir output\figures + + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: Smid_2020 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +:: Create output folder +mkdir output\simulation\Smid_2020\ +mkdir output\simulation\Smid_2020\metrics + +:: Collect descriptives about the dataset Smid_2020 +mkdir output\simulation\Smid_2020\descriptives +python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant + +:: Simulate runs +mkdir output\simulation\Smid_2020\state_files +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview --init_seed 535 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_0.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png +python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv +python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: van_de_Schoot_2018 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +:: Create output folder +mkdir output\simulation\van_de_Schoot_2018\ +mkdir output\simulation\van_de_Schoot_2018\metrics + +:: Collect descriptives about the dataset van_de_Schoot_2018 +mkdir output\simulation\van_de_Schoot_2018\descriptives +python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant + +:: Simulate runs +mkdir output\simulation\van_de_Schoot_2018\state_files +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_0.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png +python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv +python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv + +:: Merge descriptives and metrics +python scripts\merge_descriptives.py +python scripts\merge_metrics.py diff --git a/examples/basic_example/scripts/get_plot.py b/examples/basic_example/scripts/get_plot.py index 7d29468d..64d2f8db 100644 --- a/examples/basic_example/scripts/get_plot.py +++ b/examples/basic_example/scripts/get_plot.py @@ -20,58 +20,72 @@ import argparse from pathlib import Path -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files.""" +def _set_legend(ax, state, legend_option, label_to_line, state_file): + metadata = state.settings_metadata + label = None + + if legend_option == "filename": + label = state_file.stem + elif legend_option == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend_option == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend_option] + except KeyError as err: + raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 + + if label: + # add label to line + if label not in label_to_line: + ax.lines[-2].set_label(label) + label_to_line[label] = ax.lines[-2] + # set color of line to the color of the first line with the same label + else: + ax.lines[-2].set_color(label_to_line[label].get_color()) + ax.lines[-2].set_label("_no_legend_") - fig, ax = plt.subplots() - labels = [] - colors = list(mcolors.TABLEAU_COLORS.values()) +def get_plot_from_states(states, filename, legend=None): + """Generate an ASReview plot from state files. + + Arguments + --------- + states: list + List of state files. + filename: str + Filename of the plot. + legend: str + Add a legend to the plot, based on the given parameter. + Possible values: "filename", "model", "feature_extraction", + "balance_strategy", "query_strategy", "classifier". + """ + states = sorted(states) + fig, ax = plt.subplots() + label_to_line = {} for state_file in states: with open_state(state_file) as state: - # draw the plot plot_recall(ax, state) + if legend: + _set_legend(ax, state, legend, label_to_line, state_file) - # set the label - if legend == "filename": - ax.lines[-2].set_label(state_file.stem) - ax.legend(loc=4, prop={"size": 8}) - elif legend: - metadata = state.settings_metadata - - if legend == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend] - except KeyError as exc: - raise ValueError( - f"Legend setting '{legend}' " - "not found in state file settings." - ) from exc - if label not in labels: - ax.lines[-2].set_label(label) - labels.append(label) - ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) - ax.legend(loc=4, prop={"size": 8}) - + if legend: + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) @@ -90,10 +104,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = Path(args.s).glob("*.asreview") + states = list(Path(args.s).glob("*.asreview")) # check if states are found - if len(list(states)) == 0: + if len(states) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/basic_example/scripts/merge_metrics.py b/examples/basic_example/scripts/merge_metrics.py index aa031461..d8ed971b 100644 --- a/examples/basic_example/scripts/merge_metrics.py +++ b/examples/basic_example/scripts/merge_metrics.py @@ -55,7 +55,10 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="states location" + "-s", + type=str, + default="output/simulation/*/metrics/", + help="states location", ) parser.add_argument( "-o", diff --git a/examples/basic_example/scripts/merge_tds.py b/examples/basic_example/scripts/merge_tds.py index 1beb52c6..b705ed01 100644 --- a/examples/basic_example/scripts/merge_tds.py +++ b/examples/basic_example/scripts/merge_tds.py @@ -24,6 +24,7 @@ import argparse import glob import json +from math import nan from pathlib import Path import pandas as pd @@ -37,7 +38,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] # noqa + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -47,25 +48,26 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=0, + fill_value=nan, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted +def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) + + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) + + return df + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" - ) - parser.add_argument( - "-o", - type=str, - default="output/tables/tds_sim_all.csv", - help="Output table location", - ) + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -75,9 +77,14 @@ def create_table_state_tds(metrics): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - states_table = create_table_state_tds(metric_files) + # check if output file has .csv extension + if Path(args.o).suffix != ".csv": + raise ValueError("Output file should have .csv extension") + + td_table = create_table_state_tds(metric_files) + atd_table = get_atd_values(td_table) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - states_table.to_csv(Path(args.o)) - states_table.to_excel(Path(args.o).with_suffix(".xlsx")) + atd_table.to_csv(Path(args.o)) + atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) From 78fa2bebf90033f5e3593ad185681b9ea6715f56 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:23:21 +0200 Subject: [PATCH 30/95] Update basic readme --- examples/basic_example/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/basic_example/README.md b/examples/basic_example/README.md index 1249ffc6..ad37fd43 100644 --- a/examples/basic_example/README.md +++ b/examples/basic_example/README.md @@ -29,7 +29,6 @@ The performance on the following datasets is evaluated: ## Run simulation -To start the simulation, run the following command in the project directory. To start the simulation, run the `jobs.bat` file. ## Structure From 040bc81faaa3d5e1da9d566f6c9bac179d908caa Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:25:53 +0200 Subject: [PATCH 31/95] Update arfi example --- examples/arfi_example/README.md | 35 +- examples/arfi_example/jobs.bat | 194 ++++++++++ examples/arfi_example/jobs.sh | 359 +++++++++--------- examples/arfi_example/scripts/get_plot.py | 96 +++-- .../arfi_example/scripts/merge_metrics.py | 5 +- examples/arfi_example/scripts/merge_tds.py | 35 +- 6 files changed, 472 insertions(+), 252 deletions(-) create mode 100644 examples/arfi_example/jobs.bat diff --git a/examples/arfi_example/README.md b/examples/arfi_example/README.md index 73fbeaf8..bc3be312 100644 --- a/examples/arfi_example/README.md +++ b/examples/arfi_example/README.md @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -If wordcloud images are required, install the following dependencies. +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud @@ -45,6 +45,7 @@ The following files are found in this project: ├── 📂data │ ├── 📜van_de_Schoot_2018.csv │ ├── 📜Smid_2020.csv + │ ├── 📜van_de_Schoot_2018.csv ├── 📂scripts │ ├── 📜get_plot.py │ ├── 📜merge_descriptives.py @@ -53,45 +54,45 @@ The following files are found in this project: │ └── 📜... └── 📂output ├── 📂simulation - | └── 📂van_de_Schoot_2018 + | └── 📂Smid_2020 | ├── 📂descriptives - | | └── 📜data_stats_van_de_Schoot_2018.json + | | └── 📜data_stats_Smid_2020.json | ├── 📂state_files - | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview + | | ├── 📜sim_Smid_2020_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json + | ├── 📜metrics_sim_Smid_2020_`x`.json | └── 📜... - | └── 📂Smid_2020 + | └── 📂van_de_Schoot_2018 | ├── 📂descriptives - | | └── 📜data_stats_Smid_2020.json + | | └── 📜data_stats_van_de_Schoot_2018.json | ├── 📂state_files - | | ├── 📜sim_Smid_2020_`x`.asreview + | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_Smid_2020_`x`.json + | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json | └── 📜... ├── 📂tables | ├── 📜data_descriptives.csv | ├── 📜data_descriptives.xlsx - | ├── 📜tds_sim_van_de_Schoot_2018.csv - | ├── 📜tds_sim_van_de_Schoot_2018.xlsx | ├── 📜tds_sim_Smid_2020.csv | ├── 📜tds_sim_Smid_2020.xlsx + | ├── 📜tds_sim_van_de_Schoot_2018.csv + | ├── 📜tds_sim_van_de_Schoot_2018.xlsx | ├── 📜tds_summary.csv | ├── 📜tds_summary.xlsx - | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv - | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx | ├── 📜metrics_sim_Smid_2020_metrics.csv | ├── 📜metrics_sim_Smid_2020_metrics.xlsx + | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv + | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx | ├── 📜metrics_summary.csv | └── 📜metrics_summary.xlsx └── 📂figures - ├── 📈plot_recall_van_de_Schoot_2018.png ├── 📈plot_recall_Smid_2020.png - ├── 📈wordcloud_van_de_Schoot_2018.png - ├── 📈wordcloud_relevant_van_de_Schoot_2018.png - └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png + ├── 📈plot_recall_van_de_Schoot_2018.png ├── 📈wordcloud_Smid_2020.png ├── 📈wordcloud_relevant_Smid_2020.png └── 📈wordcloud_irrelevant_Smid_2020.png + ├── 📈wordcloud_van_de_Schoot_2018.png + ├── 📈wordcloud_relevant_van_de_Schoot_2018.png + └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png diff --git a/examples/arfi_example/jobs.bat b/examples/arfi_example/jobs.bat new file mode 100644 index 00000000..00fb8a21 --- /dev/null +++ b/examples/arfi_example/jobs.bat @@ -0,0 +1,194 @@ +@ echo off +COLOR E0 + +:: version 0.0.0 + +:: Create folder structure. By default, the folder 'output' is used to store output. +mkdir output +mkdir output\simulation +mkdir output\tables +mkdir output\tables\metrics +mkdir output\tables\time_to_discovery +mkdir output\figures + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: Smid_2020 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:: Create output folder +mkdir output\simulation\Smid_2020\ +mkdir output\simulation\Smid_2020\metrics + +:: Collect descriptives about the dataset +mkdir output\simulation\Smid_2020\descriptives +python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant + +:: Simulate runs, collect metrics and create plots +mkdir output\simulation\Smid_2020\state_files +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_31.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_121.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_122.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_216.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_520.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_526.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_672.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_763.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_810.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1000.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1063.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1195.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1203.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1257.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1429.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1534.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1809.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1820.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1876.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1877.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2067.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2070.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2241.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2276.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2279.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2307.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2452.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv +python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: van_de_Schoot_2018 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:: Create output folder +mkdir output\simulation\van_de_Schoot_2018\ +mkdir output\simulation\van_de_Schoot_2018\metrics + +:: Collect descriptives about the dataset +mkdir output\simulation\van_de_Schoot_2018\descriptives +python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant + +:: Simulate runs, collect metrics and create plots +mkdir output\simulation\van_de_Schoot_2018\state_files +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_51.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_116.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_462.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_730.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_767.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_831.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_902.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_953.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1033.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1180.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1248.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1356.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1429.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1514.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1554.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1565.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1746.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1881.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1994.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2279.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2496.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2545.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2624.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2740.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2803.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3076.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3314.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3442.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3608.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3680.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3769.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3842.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4154.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4201.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4269.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4327.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4377.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4461.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv +python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv + +:: Merge descriptives and metrics +python scripts\merge_descriptives.py +python scripts\merge_metrics.py diff --git a/examples/arfi_example/jobs.sh b/examples/arfi_example/jobs.sh index cbb07843..00fb8a21 100644 --- a/examples/arfi_example/jobs.sh +++ b/examples/arfi_example/jobs.sh @@ -1,193 +1,194 @@ +@ echo off +COLOR E0 +:: version 0.0.0 -# version 0.0.0 - -# Create folder structure. By default, the folder 'output' is used to store output. +:: Create folder structure. By default, the folder 'output' is used to store output. mkdir output -mkdir output/simulation -mkdir output/tables -mkdir output/tables/metrics -mkdir output/tables/time_to_discovery -mkdir output/figures +mkdir output\simulation +mkdir output\tables +mkdir output\tables\metrics +mkdir output\tables\time_to_discovery +mkdir output\figures -################################## -### DATASET: Smid_2020 -################################## -# Create output folder -mkdir output/simulation/Smid_2020/ -mkdir output/simulation/Smid_2020/metrics +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: Smid_2020 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:: Create output folder +mkdir output\simulation\Smid_2020\ +mkdir output\simulation\Smid_2020\metrics -# Collect descriptives about the dataset -mkdir output/simulation/Smid_2020/descriptives -asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json +:: Collect descriptives about the dataset +mkdir output\simulation\Smid_2020\descriptives +python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json -# Generate wordcloud visualizations of all datasets -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant -# Simulate runs, collect metrics and create plots -mkdir output/simulation/Smid_2020/state_files -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_31.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_121.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_122.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_216.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_520.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_526.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_672.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_763.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_810.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1000.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1063.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1195.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1203.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1257.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1429.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1534.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1809.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1820.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1876.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1877.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2067.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2070.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2241.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2276.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2279.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2307.json -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2452.json +:: Simulate runs, collect metrics and create plots +mkdir output\simulation\Smid_2020\state_files +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_31.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_121.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_122.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_216.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_520.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_526.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_672.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_763.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_810.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1000.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1063.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1195.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1203.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1257.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1429.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1534.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1809.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1820.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1876.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1877.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2067.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2070.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2241.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2276.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2279.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2307.json +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2452.json -# Generate plot and tables for dataset -python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png --show_legend model -python scripts/merge_metrics.py -s output/simulation/Smid_2020/metrics/ -o output/tables/metrics/metrics_sim_Smid_2020.csv -python scripts/merge_tds.py -s output/simulation/Smid_2020/metrics/ -o output/tables/time_to_discovery/tds_sim_Smid_2020.csv +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv +python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv -################################## -### DATASET: van_de_Schoot_2018 -################################## -# Create output folder -mkdir output/simulation/van_de_Schoot_2018/ -mkdir output/simulation/van_de_Schoot_2018/metrics +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: van_de_Schoot_2018 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:: Create output folder +mkdir output\simulation\van_de_Schoot_2018\ +mkdir output\simulation\van_de_Schoot_2018\metrics -# Collect descriptives about the dataset -mkdir output/simulation/van_de_Schoot_2018/descriptives -asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json +:: Collect descriptives about the dataset +mkdir output\simulation\van_de_Schoot_2018\descriptives +python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json -# Generate wordcloud visualizations of all datasets -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant -# Simulate runs, collect metrics and create plots -mkdir output/simulation/van_de_Schoot_2018/state_files -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_51.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_116.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_462.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_730.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_767.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_831.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_902.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_953.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1033.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1180.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1248.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1356.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1429.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1514.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1554.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1565.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1746.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1881.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1994.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2279.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2496.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2545.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2624.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2740.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2803.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3076.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3314.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3442.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3608.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3680.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3769.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3842.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4154.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4201.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4269.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4327.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4377.json -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4461.json +:: Simulate runs, collect metrics and create plots +mkdir output\simulation\van_de_Schoot_2018\state_files +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_51.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_116.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_462.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_730.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_767.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_831.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_902.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_953.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1033.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1180.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1248.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1356.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1429.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1514.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1554.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1565.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1746.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1881.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1994.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2279.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2496.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2545.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2624.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2740.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2803.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3076.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3314.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3442.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3608.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3680.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3769.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3842.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4154.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4201.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4269.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4327.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4377.json +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4461.json -# Generate plot and tables for dataset -python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png --show_legend model -python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/metrics/metrics_sim_van_de_Schoot_2018.csv -python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv +python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv -# Merge descriptives and metrics -python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv -python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv +:: Merge descriptives and metrics +python scripts\merge_descriptives.py +python scripts\merge_metrics.py diff --git a/examples/arfi_example/scripts/get_plot.py b/examples/arfi_example/scripts/get_plot.py index 7d29468d..64d2f8db 100644 --- a/examples/arfi_example/scripts/get_plot.py +++ b/examples/arfi_example/scripts/get_plot.py @@ -20,58 +20,72 @@ import argparse from pathlib import Path -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files.""" +def _set_legend(ax, state, legend_option, label_to_line, state_file): + metadata = state.settings_metadata + label = None + + if legend_option == "filename": + label = state_file.stem + elif legend_option == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend_option == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend_option] + except KeyError as err: + raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 + + if label: + # add label to line + if label not in label_to_line: + ax.lines[-2].set_label(label) + label_to_line[label] = ax.lines[-2] + # set color of line to the color of the first line with the same label + else: + ax.lines[-2].set_color(label_to_line[label].get_color()) + ax.lines[-2].set_label("_no_legend_") - fig, ax = plt.subplots() - labels = [] - colors = list(mcolors.TABLEAU_COLORS.values()) +def get_plot_from_states(states, filename, legend=None): + """Generate an ASReview plot from state files. + + Arguments + --------- + states: list + List of state files. + filename: str + Filename of the plot. + legend: str + Add a legend to the plot, based on the given parameter. + Possible values: "filename", "model", "feature_extraction", + "balance_strategy", "query_strategy", "classifier". + """ + states = sorted(states) + fig, ax = plt.subplots() + label_to_line = {} for state_file in states: with open_state(state_file) as state: - # draw the plot plot_recall(ax, state) + if legend: + _set_legend(ax, state, legend, label_to_line, state_file) - # set the label - if legend == "filename": - ax.lines[-2].set_label(state_file.stem) - ax.legend(loc=4, prop={"size": 8}) - elif legend: - metadata = state.settings_metadata - - if legend == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend] - except KeyError as exc: - raise ValueError( - f"Legend setting '{legend}' " - "not found in state file settings." - ) from exc - if label not in labels: - ax.lines[-2].set_label(label) - labels.append(label) - ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) - ax.legend(loc=4, prop={"size": 8}) - + if legend: + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) @@ -90,10 +104,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = Path(args.s).glob("*.asreview") + states = list(Path(args.s).glob("*.asreview")) # check if states are found - if len(list(states)) == 0: + if len(states) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/arfi_example/scripts/merge_metrics.py b/examples/arfi_example/scripts/merge_metrics.py index aa031461..d8ed971b 100644 --- a/examples/arfi_example/scripts/merge_metrics.py +++ b/examples/arfi_example/scripts/merge_metrics.py @@ -55,7 +55,10 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="states location" + "-s", + type=str, + default="output/simulation/*/metrics/", + help="states location", ) parser.add_argument( "-o", diff --git a/examples/arfi_example/scripts/merge_tds.py b/examples/arfi_example/scripts/merge_tds.py index 1beb52c6..b705ed01 100644 --- a/examples/arfi_example/scripts/merge_tds.py +++ b/examples/arfi_example/scripts/merge_tds.py @@ -24,6 +24,7 @@ import argparse import glob import json +from math import nan from pathlib import Path import pandas as pd @@ -37,7 +38,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] # noqa + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -47,25 +48,26 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=0, + fill_value=nan, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted +def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) + + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) + + return df + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" - ) - parser.add_argument( - "-o", - type=str, - default="output/tables/tds_sim_all.csv", - help="Output table location", - ) + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -75,9 +77,14 @@ def create_table_state_tds(metrics): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - states_table = create_table_state_tds(metric_files) + # check if output file has .csv extension + if Path(args.o).suffix != ".csv": + raise ValueError("Output file should have .csv extension") + + td_table = create_table_state_tds(metric_files) + atd_table = get_atd_values(td_table) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - states_table.to_csv(Path(args.o)) - states_table.to_excel(Path(args.o).with_suffix(".xlsx")) + atd_table.to_csv(Path(args.o)) + atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) From d682adb6aca24b4a593b1d4238320de94c4abbe8 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:26:05 +0200 Subject: [PATCH 32/95] Update arfi readme --- examples/arfi_example/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/arfi_example/README.md b/examples/arfi_example/README.md index bc3be312..084f057c 100644 --- a/examples/arfi_example/README.md +++ b/examples/arfi_example/README.md @@ -43,7 +43,6 @@ The following files are found in this project: ├── 📜README.md ├── 📜jobs.sh ├── 📂data - │ ├── 📜van_de_Schoot_2018.csv │ ├── 📜Smid_2020.csv │ ├── 📜van_de_Schoot_2018.csv ├── 📂scripts From 6a0e8e96fd65c036229968b28a34852900fbdfae Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:27:32 +0200 Subject: [PATCH 33/95] Update MM example --- examples/multimodel_example/README.md | 36 +-- examples/multimodel_example/jobs.bat | 157 ++++++++++++ examples/multimodel_example/jobs.sh | 227 +++++++++--------- .../multimodel_example/scripts/get_plot.py | 96 ++++---- .../scripts/merge_metrics.py | 5 +- .../multimodel_example/scripts/merge_tds.py | 35 +-- 6 files changed, 369 insertions(+), 187 deletions(-) create mode 100644 examples/multimodel_example/jobs.bat diff --git a/examples/multimodel_example/README.md b/examples/multimodel_example/README.md index e0bf8f57..a7eab5d3 100644 --- a/examples/multimodel_example/README.md +++ b/examples/multimodel_example/README.md @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -If wordcloud images are required, install the following dependencies. +For generating wordclouds, install the following dependencies. ```sh pip install asreview-wordcloud @@ -43,8 +43,8 @@ The following files are found in this project: ├── 📜README.md ├── 📜jobs.sh ├── 📂data - │ ├── 📜van_de_Schoot_2018.csv │ ├── 📜Smid_2020.csv + │ ├── 📜van_de_Schoot_2018.csv ├── 📂scripts │ ├── 📜get_plot.py │ ├── 📜merge_descriptives.py @@ -53,45 +53,45 @@ The following files are found in this project: │ └── 📜... └── 📂output ├── 📂simulation - | └── 📂van_de_Schoot_2018 + | └── 📂Smid_2020 | ├── 📂descriptives - | | └── 📜data_stats_van_de_Schoot_2018.json + | | └── 📜data_stats_Smid_2020.json | ├── 📂state_files - | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview + | | ├── 📜sim_Smid_2020_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json + | ├── 📜metrics_sim_Smid_2020_`x`.json | └── 📜... - | └── 📂Smid_2020 + | └── 📂van_de_Schoot_2018 | ├── 📂descriptives - | | └── 📜data_stats_Smid_2020.json + | | └── 📜data_stats_van_de_Schoot_2018.json | ├── 📂state_files - | | ├── 📜sim_Smid_2020_`x`.asreview + | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_Smid_2020_`x`.json + | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json | └── 📜... ├── 📂tables | ├── 📜data_descriptives.csv | ├── 📜data_descriptives.xlsx - | ├── 📜tds_sim_van_de_Schoot_2018.csv - | ├── 📜tds_sim_van_de_Schoot_2018.xlsx | ├── 📜tds_sim_Smid_2020.csv | ├── 📜tds_sim_Smid_2020.xlsx + | ├── 📜tds_sim_van_de_Schoot_2018.csv + | ├── 📜tds_sim_van_de_Schoot_2018.xlsx | ├── 📜tds_summary.csv | ├── 📜tds_summary.xlsx - | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv - | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx | ├── 📜metrics_sim_Smid_2020_metrics.csv | ├── 📜metrics_sim_Smid_2020_metrics.xlsx + | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv + | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx | ├── 📜metrics_summary.csv | └── 📜metrics_summary.xlsx └── 📂figures - ├── 📈plot_recall_van_de_Schoot_2018.png ├── 📈plot_recall_Smid_2020.png - ├── 📈wordcloud_van_de_Schoot_2018.png - ├── 📈wordcloud_relevant_van_de_Schoot_2018.png - └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png + ├── 📈plot_recall_van_de_Schoot_2018.png ├── 📈wordcloud_Smid_2020.png ├── 📈wordcloud_relevant_Smid_2020.png └── 📈wordcloud_irrelevant_Smid_2020.png + ├── 📈wordcloud_van_de_Schoot_2018.png + ├── 📈wordcloud_relevant_van_de_Schoot_2018.png + └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png diff --git a/examples/multimodel_example/jobs.bat b/examples/multimodel_example/jobs.bat new file mode 100644 index 00000000..8c78726f --- /dev/null +++ b/examples/multimodel_example/jobs.bat @@ -0,0 +1,157 @@ +@ echo off +COLOR E0 +:: version 0.0.0 + +:: Create folder structure. By default, the folder 'output' is used to store output. +mkdir output +mkdir output\simulation +mkdir output\tables +mkdir output\tables\metrics +mkdir output\tables\time_to_discovery +mkdir output\figures + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: Smid_2020 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +:: Create output folder +mkdir output\simulation\Smid_2020\ +mkdir output\simulation\Smid_2020\metrics + +:: Collect descriptives about the dataset Smid_2020 +mkdir output\simulation\Smid_2020\descriptives +python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant + +:: Simulate runs +mkdir output\simulation\Smid_2020\state_files + +:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_doc2vec_0.json + +:: Classifier = logistic, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_sbert_0.json + +:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_tfidf_0.json + + +:: Skipped nb + doc2vec model + + +:: Skipped nb + sbert model + +:: Classifier = nb, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_nb_tfidf_0.json + +:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_doc2vec_0.json + +:: Classifier = rf, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_sbert_0.json + +:: Classifier = rf, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_tfidf_0.json + +:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_doc2vec_0.json + +:: Classifier = svm, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_sbert_0.json + +:: Classifier = svm, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_tfidf_0.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv +python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv + +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: van_de_Schoot_2018 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: + +:: Create output folder +mkdir output\simulation\van_de_Schoot_2018\ +mkdir output\simulation\van_de_Schoot_2018\metrics + +:: Collect descriptives about the dataset van_de_Schoot_2018 +mkdir output\simulation\van_de_Schoot_2018\descriptives +python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json + +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant + +:: Simulate runs +mkdir output\simulation\van_de_Schoot_2018\state_files + +:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json + +:: Classifier = logistic, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json + +:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json + + +:: Skipped nb + doc2vec model + + +:: Skipped nb + sbert model + +:: Classifier = nb, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json + +:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json + +:: Classifier = rf, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_sbert_0.json + +:: Classifier = rf, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json + +:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json + +:: Classifier = svm, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_sbert_0.json + +:: Classifier = svm, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json + +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv +python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv + +:: Merge descriptives and metrics +python scripts\merge_descriptives.py +python scripts\merge_metrics.py diff --git a/examples/multimodel_example/jobs.sh b/examples/multimodel_example/jobs.sh index 9e6c1894..8c78726f 100644 --- a/examples/multimodel_example/jobs.sh +++ b/examples/multimodel_example/jobs.sh @@ -1,156 +1,157 @@ +@ echo off +COLOR E0 +:: version 0.0.0 -# version 0.0.0 - -# Create folder structure. By default, the folder 'output' is used to store output. +:: Create folder structure. By default, the folder 'output' is used to store output. mkdir output -mkdir output/simulation -mkdir output/tables -mkdir output/tables/metrics -mkdir output/tables/time_to_discovery -mkdir output/figures +mkdir output\simulation +mkdir output\tables +mkdir output\tables\metrics +mkdir output\tables\time_to_discovery +mkdir output\figures -################################## -### DATASET: Smid_2020 -################################## +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: Smid_2020 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -# Create output folder -mkdir output/simulation/Smid_2020/ -mkdir output/simulation/Smid_2020/metrics +:: Create output folder +mkdir output\simulation\Smid_2020\ +mkdir output\simulation\Smid_2020\metrics -# Collect descriptives about the dataset Smid_2020 -mkdir output/simulation/Smid_2020/descriptives -asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json +:: Collect descriptives about the dataset Smid_2020 +mkdir output\simulation\Smid_2020\descriptives +python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json -# Generate wordcloud visualizations of all datasets -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant -# Simulate runs -mkdir output/simulation/Smid_2020/state_files +:: Simulate runs +mkdir output\simulation\Smid_2020\state_files -# Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_doc2vec_0.json +:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_doc2vec_0.json -# Classifier = logistic, Feature extractor = sbert , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_sbert_0.json +:: Classifier = logistic, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_sbert_0.json -# Classifier = logistic, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_tfidf_0.json +:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_tfidf_0.json -# Skipped nb + doc2vec model +:: Skipped nb + doc2vec model -# Skipped nb + sbert model +:: Skipped nb + sbert model -# Classifier = nb, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_nb_tfidf_0.json +:: Classifier = nb, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_nb_tfidf_0.json -# Classifier = rf, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_doc2vec_0.json +:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_doc2vec_0.json -# Classifier = rf, Feature extractor = sbert , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_sbert_0.json +:: Classifier = rf, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_sbert_0.json -# Classifier = rf, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_tfidf_0.json +:: Classifier = rf, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_tfidf_0.json -# Classifier = svm, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_doc2vec_0.json +:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_doc2vec_0.json -# Classifier = svm, Feature extractor = sbert , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_sbert_0.json +:: Classifier = svm, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_sbert_0.json -# Classifier = svm, Feature extractor = tfidf , Query strategy = max -asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_tfidf_0.json +:: Classifier = svm, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_tfidf_0.json -# Generate plot and tables for dataset -python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png --show_legend model -python scripts/merge_metrics.py -s output/simulation/Smid_2020/metrics/ -o output/tables/metrics/metrics_sim_Smid_2020.csv -python scripts/merge_tds.py -s output/simulation/Smid_2020/metrics/ -o output/tables/time_to_discovery/tds_sim_Smid_2020.csv +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv +python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv -################################## -### DATASET: van_de_Schoot_2018 -################################## +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +:::::: DATASET: van_de_Schoot_2018 +:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -# Create output folder -mkdir output/simulation/van_de_Schoot_2018/ -mkdir output/simulation/van_de_Schoot_2018/metrics +:: Create output folder +mkdir output\simulation\van_de_Schoot_2018\ +mkdir output\simulation\van_de_Schoot_2018\metrics -# Collect descriptives about the dataset van_de_Schoot_2018 -mkdir output/simulation/van_de_Schoot_2018/descriptives -asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json +:: Collect descriptives about the dataset van_de_Schoot_2018 +mkdir output\simulation\van_de_Schoot_2018\descriptives +python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json -# Generate wordcloud visualizations of all datasets -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +:: Generate wordcloud visualizations of all datasets +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant -# Simulate runs -mkdir output/simulation/van_de_Schoot_2018/state_files +:: Simulate runs +mkdir output\simulation\van_de_Schoot_2018\state_files -# Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json +:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json -# Classifier = logistic, Feature extractor = sbert , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json +:: Classifier = logistic, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json -# Classifier = logistic, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json +:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json -# Skipped nb + doc2vec model +:: Skipped nb + doc2vec model -# Skipped nb + sbert model +:: Skipped nb + sbert model -# Classifier = nb, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json +:: Classifier = nb, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json -# Classifier = rf, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json +:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json -# Classifier = rf, Feature extractor = sbert , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_sbert_0.json +:: Classifier = rf, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_sbert_0.json -# Classifier = rf, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json +:: Classifier = rf, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json -# Classifier = svm, Feature extractor = doc2vec , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json +:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json -# Classifier = svm, Feature extractor = sbert , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_sbert_0.json +:: Classifier = svm, Feature extractor = sbert , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_sbert_0.json -# Classifier = svm, Feature extractor = tfidf , Query strategy = max -asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json +:: Classifier = svm, Feature extractor = tfidf , Query strategy = max +python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min +python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json -# Generate plot and tables for dataset -python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png --show_legend model -python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/metrics/metrics_sim_van_de_Schoot_2018.csv -python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv +:: Generate plot and tables for dataset +python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model +python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv +python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv -# Merge descriptives and metrics -python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv -python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv +:: Merge descriptives and metrics +python scripts\merge_descriptives.py +python scripts\merge_metrics.py diff --git a/examples/multimodel_example/scripts/get_plot.py b/examples/multimodel_example/scripts/get_plot.py index 7d29468d..64d2f8db 100644 --- a/examples/multimodel_example/scripts/get_plot.py +++ b/examples/multimodel_example/scripts/get_plot.py @@ -20,58 +20,72 @@ import argparse from pathlib import Path -import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files.""" +def _set_legend(ax, state, legend_option, label_to_line, state_file): + metadata = state.settings_metadata + label = None + + if legend_option == "filename": + label = state_file.stem + elif legend_option == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend_option == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend_option] + except KeyError as err: + raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 + + if label: + # add label to line + if label not in label_to_line: + ax.lines[-2].set_label(label) + label_to_line[label] = ax.lines[-2] + # set color of line to the color of the first line with the same label + else: + ax.lines[-2].set_color(label_to_line[label].get_color()) + ax.lines[-2].set_label("_no_legend_") - fig, ax = plt.subplots() - labels = [] - colors = list(mcolors.TABLEAU_COLORS.values()) +def get_plot_from_states(states, filename, legend=None): + """Generate an ASReview plot from state files. + + Arguments + --------- + states: list + List of state files. + filename: str + Filename of the plot. + legend: str + Add a legend to the plot, based on the given parameter. + Possible values: "filename", "model", "feature_extraction", + "balance_strategy", "query_strategy", "classifier". + """ + states = sorted(states) + fig, ax = plt.subplots() + label_to_line = {} for state_file in states: with open_state(state_file) as state: - # draw the plot plot_recall(ax, state) + if legend: + _set_legend(ax, state, legend, label_to_line, state_file) - # set the label - if legend == "filename": - ax.lines[-2].set_label(state_file.stem) - ax.legend(loc=4, prop={"size": 8}) - elif legend: - metadata = state.settings_metadata - - if legend == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend] - except KeyError as exc: - raise ValueError( - f"Legend setting '{legend}' " - "not found in state file settings." - ) from exc - if label not in labels: - ax.lines[-2].set_label(label) - labels.append(label) - ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) - ax.legend(loc=4, prop={"size": 8}) - + if legend: + ax.legend(loc=4, prop={"size": 8}) fig.savefig(str(filename)) @@ -90,10 +104,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = Path(args.s).glob("*.asreview") + states = list(Path(args.s).glob("*.asreview")) # check if states are found - if len(list(states)) == 0: + if len(states) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/multimodel_example/scripts/merge_metrics.py b/examples/multimodel_example/scripts/merge_metrics.py index aa031461..d8ed971b 100644 --- a/examples/multimodel_example/scripts/merge_metrics.py +++ b/examples/multimodel_example/scripts/merge_metrics.py @@ -55,7 +55,10 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="states location" + "-s", + type=str, + default="output/simulation/*/metrics/", + help="states location", ) parser.add_argument( "-o", diff --git a/examples/multimodel_example/scripts/merge_tds.py b/examples/multimodel_example/scripts/merge_tds.py index 1beb52c6..b705ed01 100644 --- a/examples/multimodel_example/scripts/merge_tds.py +++ b/examples/multimodel_example/scripts/merge_tds.py @@ -24,6 +24,7 @@ import argparse import glob import json +from math import nan from pathlib import Path import pandas as pd @@ -37,7 +38,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] # noqa + ] values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -47,25 +48,26 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=0, + fill_value=nan, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted +def get_atd_values(df): + df["record_atd"] = df.mean(axis=1) + + df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) + + return df + + if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument( - "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" - ) - parser.add_argument( - "-o", - type=str, - default="output/tables/tds_sim_all.csv", - help="Output table location", - ) + parser.add_argument("-s", type=str, required=True, help="metrics location") + parser.add_argument("-o", type=str, required=True, help="Output table location") args = parser.parse_args() # load metric files @@ -75,9 +77,14 @@ def create_table_state_tds(metrics): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - states_table = create_table_state_tds(metric_files) + # check if output file has .csv extension + if Path(args.o).suffix != ".csv": + raise ValueError("Output file should have .csv extension") + + td_table = create_table_state_tds(metric_files) + atd_table = get_atd_values(td_table) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - states_table.to_csv(Path(args.o)) - states_table.to_excel(Path(args.o).with_suffix(".xlsx")) + atd_table.to_csv(Path(args.o)) + atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) From 229b106afe0fd6c5aefd9978c6ab23c751a9b794 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:36:48 +0200 Subject: [PATCH 34/95] Add linebreak at the end of the file using filehandler --- .github/workflows/ci-workflow.yml | 3 +++ asreviewcontrib/makita/templates/doc_README.md.template | 3 +-- asreviewcontrib/makita/templates/script_get_plot.py.template | 1 - .../templates/script_get_settings_from_state.py.template | 1 - .../makita/templates/script_merge_descriptives.py.template | 1 - .../makita/templates/script_merge_metrics.py.template | 1 - asreviewcontrib/makita/templates/script_merge_tds.py.template | 1 - .../script_split_data_with_multiple_labels.py.template | 1 - asreviewcontrib/makita/utils.py | 1 + 9 files changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 8aa54b79..3e3c93dc 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -49,6 +49,9 @@ jobs: - name: Generate makita scripts run: | asreview makita add-script --all + - name: rename .py.template files to .py + run: | + find . -name "*.py.template" -exec bash -c 'mv "$1" "${1%.template}"' - '{}' \; - name: Lint python with ruff run: | ruff check . diff --git a/asreviewcontrib/makita/templates/doc_README.md.template b/asreviewcontrib/makita/templates/doc_README.md.template index 9186dac5..e5658404 100644 --- a/asreviewcontrib/makita/templates/doc_README.md.template +++ b/asreviewcontrib/makita/templates/doc_README.md.template @@ -72,5 +72,4 @@ The following files are found in this project: ├── 📈plot_recall_{{ dataset.stem }}.png{% endfor %}{% for dataset in datasets %} ├── 📈wordcloud_{{ dataset.stem }}.png ├── 📈wordcloud_relevant_{{ dataset.stem }}.png - └── 📈wordcloud_irrelevant_{{ dataset.stem }}.png{% endfor %} -{%endif %} + └── 📈wordcloud_irrelevant_{{ dataset.stem }}.png{% endfor %}{%endif %} \ No newline at end of file diff --git a/asreviewcontrib/makita/templates/script_get_plot.py.template b/asreviewcontrib/makita/templates/script_get_plot.py.template index 48b5cc33..4faf4a8a 100644 --- a/asreviewcontrib/makita/templates/script_get_plot.py.template +++ b/asreviewcontrib/makita/templates/script_get_plot.py.template @@ -112,4 +112,3 @@ if __name__ == "__main__": # generate plot and save results get_plot_from_states(states, args.o, args.show_legend) - diff --git a/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template b/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template index dde02081..88bb47cc 100644 --- a/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template +++ b/asreviewcontrib/makita/templates/script_get_settings_from_state.py.template @@ -50,4 +50,3 @@ if __name__ == "__main__": with open(Path(args.o), "w") as f: json.dump(result, f) - diff --git a/asreviewcontrib/makita/templates/script_merge_descriptives.py.template b/asreviewcontrib/makita/templates/script_merge_descriptives.py.template index 05998197..2b2992c5 100644 --- a/asreviewcontrib/makita/templates/script_merge_descriptives.py.template +++ b/asreviewcontrib/makita/templates/script_merge_descriptives.py.template @@ -77,4 +77,3 @@ if __name__ == "__main__": Path(args.o).parent.mkdir(parents=True, exist_ok=True) result.to_csv(Path(args.o)) result.to_excel(Path(args.o).with_suffix(".xlsx")) - diff --git a/asreviewcontrib/makita/templates/script_merge_metrics.py.template b/asreviewcontrib/makita/templates/script_merge_metrics.py.template index ee16e5f7..512857bb 100644 --- a/asreviewcontrib/makita/templates/script_merge_metrics.py.template +++ b/asreviewcontrib/makita/templates/script_merge_metrics.py.template @@ -82,4 +82,3 @@ if __name__ == "__main__": Path(args.o).parent.mkdir(parents=True, exist_ok=True) result.to_csv(Path(args.o)) result.to_excel(Path(args.o).with_suffix(".xlsx")) - diff --git a/asreviewcontrib/makita/templates/script_merge_tds.py.template b/asreviewcontrib/makita/templates/script_merge_tds.py.template index b71db632..17971a1b 100644 --- a/asreviewcontrib/makita/templates/script_merge_tds.py.template +++ b/asreviewcontrib/makita/templates/script_merge_tds.py.template @@ -88,4 +88,3 @@ if __name__ == "__main__": Path(args.o).parent.mkdir(parents=True, exist_ok=True) atd_table.to_csv(Path(args.o)) atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) - diff --git a/asreviewcontrib/makita/templates/script_split_data_with_multiple_labels.py.template b/asreviewcontrib/makita/templates/script_split_data_with_multiple_labels.py.template index 6c13418b..a631c4c8 100644 --- a/asreviewcontrib/makita/templates/script_split_data_with_multiple_labels.py.template +++ b/asreviewcontrib/makita/templates/script_split_data_with_multiple_labels.py.template @@ -104,4 +104,3 @@ if __name__ == '__main__': args = parser.parse_args() etl(args.s, args.o, split=args.split, suffix=args.suffix) - diff --git a/asreviewcontrib/makita/utils.py b/asreviewcontrib/makita/utils.py index 4e566b1c..a21e67f8 100644 --- a/asreviewcontrib/makita/utils.py +++ b/asreviewcontrib/makita/utils.py @@ -50,6 +50,7 @@ def allow_overwrite(): with open(export_fp, "w") as f: f.write(content) + f.write("\n") print(f"Created {export_fp}") From 61574e37dbea64084b39c4c00c2d8aeb161e3f50 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:39:59 +0200 Subject: [PATCH 35/95] Update pyproject.toml --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5c10c86e..2ef1264d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ packages = ["asreviewcontrib"] write_to = "asreviewcontrib/makita/_version.py" [tool.ruff] -select = ["E", "F", "UP", "I", "B"] +lint.select = ["E", "F", "UP", "I", "B"] -[tool.ruff.isort] -force-single-line = true +[tool.ruff.lint.isort] +force-single-line = true \ No newline at end of file From 87219a6f4d0150832bc7736ace95542f539350a4 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:40:18 +0200 Subject: [PATCH 36/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 3e3c93dc..7ec75222 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -52,6 +52,7 @@ jobs: - name: rename .py.template files to .py run: | find . -name "*.py.template" -exec bash -c 'mv "$1" "${1%.template}"' - '{}' \; + tree - name: Lint python with ruff run: | ruff check . From 5f6a75e7faf487d01317f8d48a9af444f51a66e2 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:40:53 +0200 Subject: [PATCH 37/95] Remove file renaming --- .github/workflows/ci-workflow.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 7ec75222..8aa54b79 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -49,10 +49,6 @@ jobs: - name: Generate makita scripts run: | asreview makita add-script --all - - name: rename .py.template files to .py - run: | - find . -name "*.py.template" -exec bash -c 'mv "$1" "${1%.template}"' - '{}' \; - tree - name: Lint python with ruff run: | ruff check . From cec0e6ff1ec90d460675b86f60de9b0e98d4f0fd Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:43:44 +0200 Subject: [PATCH 38/95] Include py.template in ruff linter --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 2ef1264d..9ec5d007 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ write_to = "asreviewcontrib/makita/_version.py" [tool.ruff] lint.select = ["E", "F", "UP", "I", "B"] +include = ["**/*.py", "**/*.py.template"] [tool.ruff.lint.isort] force-single-line = true \ No newline at end of file From 595536277d2c9c7bbbc527cac90d510c8fdbbb20 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 17:59:52 +0200 Subject: [PATCH 39/95] Run Basic Template --- .github/workflows/ci-workflow.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 8aa54b79..6fa4f8da 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -31,7 +31,7 @@ jobs: - name: Test makita templates run: | cd tmp/basic - asreview makita template basic | tee output.txt + asreview makita template basic --instances_per_query 100 -no_wordcloud| tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt @@ -52,3 +52,10 @@ jobs: - name: Lint python with ruff run: | ruff check . + - name: Install asreview-datatools asreview-insights + run: | + pip install asreview-datatools asreview-insights + - name: Run basic template + run: | + cd tmp/basic + sh jobs.sh From ab3e789fc40dc5d1384391092375c3bcc0fa440f Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:03:20 +0200 Subject: [PATCH 40/95] Fix typo in workflow --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 6fa4f8da..85fcdec0 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -31,7 +31,7 @@ jobs: - name: Test makita templates run: | cd tmp/basic - asreview makita template basic --instances_per_query 100 -no_wordcloud| tee output.txt + asreview makita template basic --instances_per_query 100 -no_wordclouds| tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt From dc31bad5d5b77f46c01ecf3ebcb781b66ba172db Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:07:55 +0200 Subject: [PATCH 41/95] same as before --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 85fcdec0..5487c4a5 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -31,7 +31,7 @@ jobs: - name: Test makita templates run: | cd tmp/basic - asreview makita template basic --instances_per_query 100 -no_wordclouds| tee output.txt + asreview makita template basic --instances_per_query 100 --no_wordclouds| tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt From 9dfb057b2e2eaa00c89b5f675509724f6191d15c Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:14:34 +0200 Subject: [PATCH 42/95] increase simulation steps --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 5487c4a5..009b9d2e 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -31,7 +31,7 @@ jobs: - name: Test makita templates run: | cd tmp/basic - asreview makita template basic --instances_per_query 100 --no_wordclouds| tee output.txt + asreview makita template basic --instances_per_query 10 --no_wordclouds| tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt From 4da49e01a5514d82ef0e1d2cd6ed8898062e36c9 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:19:12 +0200 Subject: [PATCH 43/95] Add synergy to workflow --- .github/workflows/ci-workflow.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 009b9d2e..b2336154 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -31,7 +31,7 @@ jobs: - name: Test makita templates run: | cd tmp/basic - asreview makita template basic --instances_per_query 10 --no_wordclouds| tee output.txt + asreview makita template basic | tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt @@ -54,8 +54,10 @@ jobs: ruff check . - name: Install asreview-datatools asreview-insights run: | - pip install asreview-datatools asreview-insights + pip install asreview-datatools asreview-insights synergy-dataset - name: Run basic template run: | cd tmp/basic + synergy_dataset get -d van_de_Schoot_2018 -o /data -l + asreview makita template basic --instances_per_query 10 --no_wordclouds --overwrite sh jobs.sh From 13ce17192ac5ca3eb1e9982ca932fde190c3dc5a Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:26:57 +0200 Subject: [PATCH 44/95] a different dir for the test run --- .github/workflows/ci-workflow.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index b2336154..5642620d 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -52,12 +52,14 @@ jobs: - name: Lint python with ruff run: | ruff check . - - name: Install asreview-datatools asreview-insights + - name: Install asreview-datatools asreview-insights synergy-dataset run: | pip install asreview-datatools asreview-insights synergy-dataset - name: Run basic template run: | - cd tmp/basic + mkdir -p tmp/synergy + cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o /data -l - asreview makita template basic --instances_per_query 10 --no_wordclouds --overwrite + asreview makita template basic --instances_per_query 100 --no_wordclouds --overwrite sh jobs.sh + ls -R From 977a5f8d5750fcb9f3cbff27e362f82b96374f61 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:29:51 +0200 Subject: [PATCH 45/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 5642620d..0c6e74ff 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -57,9 +57,9 @@ jobs: pip install asreview-datatools asreview-insights synergy-dataset - name: Run basic template run: | - mkdir -p tmp/synergy + mkdir -p tmp/synergy/data cd tmp/synergy - synergy_dataset get -d van_de_Schoot_2018 -o /data -l + synergy_dataset get -d van_de_Schoot_2018 -o ./data -l asreview makita template basic --instances_per_query 100 --no_wordclouds --overwrite sh jobs.sh ls -R From 9efbe9c497b3a6d31d9584b0c66e162596b1c170 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 18:37:55 +0200 Subject: [PATCH 46/95] restrict sim to ubuntu --- .github/workflows/ci-workflow.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 0c6e74ff..599f92c8 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -52,14 +52,15 @@ jobs: - name: Lint python with ruff run: | ruff check . - - name: Install asreview-datatools asreview-insights synergy-dataset + - name: Install Simulation Tools run: | - pip install asreview-datatools asreview-insights synergy-dataset + pip install asreview-datatools asreview-insights synergy-dataset scitree - name: Run basic template + if: ${{ matrix.os == 'ubuntu-latest' }} run: | mkdir -p tmp/synergy/data cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l asreview makita template basic --instances_per_query 100 --no_wordclouds --overwrite sh jobs.sh - ls -R + scitree From 68d1c5d68be63bd46749bc9cc504401f592b34c8 Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 22:27:09 +0200 Subject: [PATCH 47/95] fix workflow for non ubuntu --- .github/workflows/ci-workflow.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 599f92c8..5287e59b 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -52,12 +52,10 @@ jobs: - name: Lint python with ruff run: | ruff check . - - name: Install Simulation Tools - run: | - pip install asreview-datatools asreview-insights synergy-dataset scitree - name: Run basic template if: ${{ matrix.os == 'ubuntu-latest' }} run: | + pip install asreview-datatools asreview-insights synergy-dataset scitree mkdir -p tmp/synergy/data cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l From 470f041faf0237fa1cd7195525556d04cef77f6c Mon Sep 17 00:00:00 2001 From: Jelle Teijema Date: Wed, 3 Apr 2024 22:29:12 +0200 Subject: [PATCH 48/95] Fix small ruff errors --- asreviewcontrib/makita/template_base.py | 2 +- pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index dbdea603..c11c8abb 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -41,7 +41,7 @@ def __init__( self.fp_template = fp_template self.job_file = ( job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" - ) # noqa + ) self.platform_sys = platform_sys if platform_sys else platform.system() self.file_handler = FileHandler(allow_overwrite) self.template = ConfigTemplate(fp_template) diff --git a/pyproject.toml b/pyproject.toml index 9ec5d007..b29f869d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,8 @@ classifiers = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11" + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12" ] license = {text = "MIT"} dependencies = ["asreview", "jinja2", "cfgtemplater"] From d9c48567e522319b5e05a7d84990160c339c05e2 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 15:31:47 +0200 Subject: [PATCH 49/95] Add defaults to config --- asreviewcontrib/makita/config.py | 11 ++ asreviewcontrib/makita/entrypoint.py | 112 +++++++++++------- asreviewcontrib/makita/template_arfi.py | 28 ++++- asreviewcontrib/makita/template_base.py | 29 ++--- asreviewcontrib/makita/template_basic.py | 27 ++++- asreviewcontrib/makita/template_multimodel.py | 44 ++++--- 6 files changed, 162 insertions(+), 89 deletions(-) diff --git a/asreviewcontrib/makita/config.py b/asreviewcontrib/makita/config.py index ef1335b3..515a10c4 100644 --- a/asreviewcontrib/makita/config.py +++ b/asreviewcontrib/makita/config.py @@ -1,3 +1,14 @@ from pathlib import Path TEMPLATES_FP = Path(Path(__file__).parent, "templates") + +DEFAULTS = { + "dataset_folder": "data", + "output_folder": "output", + "scripts_folder": "scripts", + "init_seed": 535, + "model_seed": 165, + "balance_strategy": "double", + "instances_per_query": 1, + "stop_if": "min", +} diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 4a9119f3..577483d7 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -5,6 +5,7 @@ from asreview.entry_points import BaseEntryPoint from asreviewcontrib.makita import __version__ +from asreviewcontrib.makita.config import DEFAULTS from asreviewcontrib.makita.config import TEMPLATES_FP from asreviewcontrib.makita.template_arfi import TemplateARFI from asreviewcontrib.makita.template_basic import TemplateBasic @@ -47,22 +48,22 @@ def execute(self, argv): # noqa: C901 "jobs.bat for Windows, otherwise jobs.sh.", ) parser_template.add_argument( - "-s", type=str, default="data", help="Dataset folder" + "-s", type=str, default=DEFAULTS["dataset_folder"], help="Dataset folder" ) parser_template.add_argument( - "-o", type=str, default="output", help="Output folder" + "-o", type=str, default=DEFAULTS["output_folder"], help="Output folder" ) parser_template.add_argument( "--init_seed", type=int, - default=535, - help="Seed of the priors. Seed is set to 535 by default.", + default=DEFAULTS["init_seed"], + help="Seed of the priors. " f"{DEFAULTS['init_seed']} by default.", ) parser_template.add_argument( "--model_seed", type=int, - default=165, - help="Seed of the models. Seed is set to 165 by default.", + default=DEFAULTS["model_seed"], + help="Seed of the models. " f"{DEFAULTS['model_seed']} by default.", ) parser_template.add_argument( "--template", type=str, help="Overwrite template with template file path." @@ -76,14 +77,11 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--n_runs", type=int, - default=1, - help="Number of runs. Only for templates 'basic' and 'multimodel'. " - "Default: 1.", + help="Number of runs. Only for templates 'basic' and 'multimodel'. ", ) parser_template.add_argument( "--n_priors", type=int, - default=10, help="Number of priors. Only for template 'arfi'. " "Default: 10.", ) parser_template.add_argument( @@ -99,82 +97,78 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--classifier", type=str, - default="nb", help="Classifier to use. Only for template 'basic' and 'arfi'. " - "Default: nb.", ) parser_template.add_argument( "--feature_extractor", type=str, - default="tfidf", help="Feature_extractor to use. Only for template 'basic' and 'arfi'. " - "Default: tfidf.", ) parser_template.add_argument( "--query_strategy", type=str, - default="max", - help="Query strategy to use. " "Default: max.", + help="Query strategy to use. Only for template 'basic' and 'arfi'. " ) parser_template.add_argument( "--balance_strategy", type=str, - default="double", - help="Balance strategy to use. " "Default: double.", + default=DEFAULTS["balance_strategy"], + help="Balance strategy to use. " + f"{DEFAULTS['balance_strategy']} by default.", ) parser_template.add_argument( "--instances_per_query", type=int, - default=1, - help="Number of instances per query. " "Default: 1.", + default=DEFAULTS["instances_per_query"], + help="Number of instances per query. " + f"{DEFAULTS['instances_per_query']} by default.", ) parser_template.add_argument( "--stop_if", type=str, - default="min", + default=DEFAULTS["stop_if"], help="The number of label actions to simulate. " - "Default 'min' will stop simulating when all relevant records are found.", + f"{DEFAULTS['stop_if']} by default.", ) parser_template.add_argument( "--classifiers", nargs="+", - default=["logistic", "nb", "rf", "svm"], - help="Classifiers to use. Only for template 'multimodel'. " - "Default: ['logistic', 'nb', 'rf', 'svm']", + help="Classifiers to use. Only for template 'multimodel'. ", ) parser_template.add_argument( "--feature_extractors", nargs="+", - default=["doc2vec", "sbert", "tfidf"], - help="Feature extractors to use. Only for template 'multimodel'. " - "Default: ['doc2vec', 'sbert', 'tfidf']", + help="Feature extractors to use. Only for template 'multimodel'. ", ) parser_template.add_argument( "--query_strategies", nargs="+", - default=["max"], - help="Query strategies to use. Only for template 'multimodel'. " - "Default: ['max']", + help="Query strategies to use. Only for template 'multimodel'. ", ) parser_template.add_argument( "--impossible_models", nargs="+", - default=["nb,doc2vec", "nb,sbert"], - help="Model combinations to exclude. Only for template 'multimodel'. " - "Default: ['nb,doc2vec', 'nb,sbert']", + help="Model combinations to exclude. Only for template 'multimodel'. ", ) parser_template.set_defaults(func=self._template_cli) parser_script = subparsers.add_parser("add-script") parser_script.add_argument( - "name", type=str, nargs="?", help="The name of the script." + "name", + type=str, + nargs="?", + help="The name of the script." ) parser_script.add_argument( - "--all", "-a", action="store_true", help="Add all scripts." + "--all", "-a", + action="store_true", + help="Add all scripts." ) parser_script.add_argument( - "-o", type=str, default="scripts", help="Location of the scripts folder." + "-o", + type=str, default=DEFAULTS["scripts_folder"], + help="Location of the scripts folder." ) parser_script.set_defaults(func=self._add_script_cli) @@ -232,9 +226,21 @@ def _template(self, args): Path(args.o).parent.mkdir(parents=True, exist_ok=True) if args.name in [TemplateBasic.template_name]: + prohibited_args = ['classifiers', + 'feature_extractors', + 'query_strategies', + 'impossible_models', + 'n_priors'] + for arg in prohibited_args: + if getattr(args, arg): + raise ValueError( + f"Argument {arg} is not allowed for template {args.name}") + job = TemplateBasic( - datasets, + datasets=datasets, + fp_template=fp_template, output_folder=Path(args.o), + scripts_folder=Path(DEFAULTS["scripts_folder"]), create_wordclouds=args.no_wordclouds, allow_overwrite=args.overwrite, n_runs=args.n_runs, @@ -246,15 +252,26 @@ def _template(self, args): balance_strategy=args.balance_strategy, instances_per_query=args.instances_per_query, stop_if=args.stop_if, - fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, ).render() elif args.name in [TemplateARFI.template_name]: + prohibited_args = ['n_runs', + 'classifiers', + 'feature_extractors', + 'query_strategies', + 'impossible_models'] + for arg in prohibited_args: + if getattr(args, arg): + raise ValueError( + f"Argument {arg} is not allowed for template {args.name}") + job = TemplateARFI( - datasets, + datasets=datasets, + fp_template=fp_template, output_folder=Path(args.o), + scripts_folder=Path(DEFAULTS["scripts_folder"]), create_wordclouds=args.no_wordclouds, allow_overwrite=args.overwrite, n_priors=args.n_priors, @@ -266,15 +283,25 @@ def _template(self, args): balance_strategy=args.balance_strategy, instances_per_query=args.instances_per_query, stop_if=args.stop_if, - fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, ).render() elif args.name in [TemplateMultiModel.template_name]: + prohibited_args = ['classifier', + 'feature_extractor', + 'query_strategy', + 'n_priors'] + for arg in prohibited_args: + if getattr(args, arg): + raise ValueError( + f"Argument {arg} is not allowed for template {args.name}") + job = TemplateMultiModel( - datasets, + datasets=datasets, + fp_template=fp_template, output_folder=Path(args.o), + scripts_folder=Path(DEFAULTS["scripts_folder"]), create_wordclouds=args.no_wordclouds, allow_overwrite=args.overwrite, n_runs=args.n_runs, @@ -287,7 +314,6 @@ def _template(self, args): balance_strategy=args.balance_strategy, instances_per_query=args.instances_per_query, stop_if=args.stop_if, - fp_template=fp_template, job_file=args.job_file, platform_sys=args.platform, ).render() diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index e00d317b..2ab57cef 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -9,12 +9,28 @@ class TemplateARFI(TemplateBase): template_name = "arfi" - def __init__(self, *args, **kwargs): - self.n_runs = kwargs.pop("n_runs", 1) - self.classifier = kwargs.pop("classifier", "nb") - self.feature_extractor = kwargs.pop("feature_extractor", "tfidf") - self.n_priors = kwargs.pop("n_priors", 10) - super().__init__(*args, **kwargs) + def __init__( + self, + classifier, + feature_extractor, + query_strategy, + n_priors, + **kwargs, + ): + if classifier is None: + classifier = "nb" + if feature_extractor is None: + feature_extractor = "tfidf" + if query_strategy is None: + query_strategy = "max" + if n_priors is None: + n_priors = 10 + + self.classifier = classifier + self.feature_extractor = feature_extractor + self.query_strategy = query_strategy + self.n_priors = n_priors + super().__init__(**kwargs) def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index c11c8abb..105f3e5d 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -14,19 +14,18 @@ class TemplateBase: def __init__( self, datasets, - output_folder="output", - scripts_folder="scripts", - create_wordclouds=True, - allow_overwrite=False, - init_seed=535, - model_seed=165, - query_strategy="max", - balance_strategy="double", - instances_per_query=1, - stop_if="min", - fp_template=None, - job_file=None, - platform_sys=None, + fp_template, + output_folder, + scripts_folder, + create_wordclouds, + allow_overwrite, + init_seed, + model_seed, + balance_strategy, + instances_per_query, + stop_if, + job_file, + platform_sys, ): self.datasets = datasets self.output_folder = output_folder @@ -34,7 +33,6 @@ def __init__( self.create_wordclouds = create_wordclouds self.init_seed = init_seed self.model_seed = model_seed - self.query_strategy = query_strategy self.balance_strategy = balance_strategy self.instances_per_query = instances_per_query self.stop_if = stop_if @@ -47,9 +45,6 @@ def __init__( self.template = ConfigTemplate(fp_template) self.__version__ = __version__ - assert self.template is not None, "Template is None." - assert self.fp_template is not None, "Template file is None." - def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index a3941cf5..8b09006d 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -6,11 +6,28 @@ class TemplateBasic(TemplateBase): template_name = "basic" - def __init__(self, *args, **kwargs): - self.n_runs = kwargs.pop("n_runs", 1) - self.classifier = kwargs.pop("classifier", "nb") - self.feature_extractor = kwargs.pop("feature_extractor", "tfidf") - super().__init__(*args, **kwargs) + def __init__( + self, + classifier, + feature_extractor, + query_strategy, + n_runs, + **kwargs, + ): + if classifier is None: + classifier = "nb" + if feature_extractor is None: + feature_extractor = "tfidf" + if query_strategy is None: + query_strategy = "max" + if n_runs is None: + n_runs = 1 + + self.classifier = classifier + self.feature_extractor = feature_extractor + self.query_strategy = query_strategy + self.n_runs = n_runs + super().__init__(**kwargs) def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index e31c7d0e..e06da454 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -6,23 +6,32 @@ class TemplateMultiModel(TemplateBase): template_name = "multimodel" - def __init__(self, *args, **kwargs): - self.n_runs = kwargs.pop( - "n_runs", 1 - ) - self.all_classifiers = kwargs.pop( - "all_classifiers", ["logistic", "nb", "rf"] - ) - self.all_feature_extractors = kwargs.pop( - "all_feature_extractors", ["doc2vec", "sbert", "tfidf"] - ) - self.all_query_strategies = kwargs.pop( - "all_query_strategies", ["max"] - ) - self.impossible_models = kwargs.pop( - "impossible_models", ["nb,doc2vec", "nb,sbert"] - ) - super().__init__(*args, **kwargs) + def __init__( + self, + n_runs, + all_classifiers, + all_feature_extractors, + all_query_strategies, + impossible_models, + **kwargs, + ): + if n_runs is None: + n_runs = 1 + if all_classifiers is None: + all_classifiers = ["logistic", "nb", "rf"] + if all_feature_extractors is None: + all_feature_extractors = ["doc2vec", "sbert", "tfidf"] + if all_query_strategies is None: + all_query_strategies = ["max"] + if impossible_models is None: + impossible_models = ["nb,doc2vec", "nb,sbert"] + + self.n_runs = n_runs + self.all_classifiers = all_classifiers + self.all_feature_extractors = all_feature_extractors + self.all_query_strategies = all_query_strategies + self.impossible_models = impossible_models + super().__init__(**kwargs) def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the @@ -42,7 +51,6 @@ def get_static_params(self, params): return { "datasets": params, "create_wordclouds": self.create_wordclouds, - "query_strategy": self.query_strategy, "balance_strategy": self.balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, From 63a761a575f8b54e553b6dcb15c38047ff2fcf39 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 15:32:04 +0200 Subject: [PATCH 50/95] remove query strategy from MM readme --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 788f50d4..8f24d577 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,6 @@ optional arguments: --n_runs N_RUNS Number of runs. Default: 1. --no_wordclouds Disables the generation of wordclouds. --overwrite Automatically accepts all overwrite requests. - --query_strategy QUERY_STRATEGY Query strategy to use. Default: max. --balance_strategy BALANCE_STRATEGY Balance strategy to use. Default: double. --instances_per_query INSTANCES_PER_QUERY Number of instances per query. Default: 1. --stop_if STOP_IF The number of label actions to simulate. Default 'min' will stop simulating when all relevant records are found. From 44819e846e507808c5e74c3e9edf5ce950a7699d Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 15:56:24 +0200 Subject: [PATCH 51/95] Improve platform handling --- asreviewcontrib/makita/entrypoint.py | 97 ++++++++++--------- asreviewcontrib/makita/template_base.py | 8 +- asreviewcontrib/makita/template_multimodel.py | 1 - 3 files changed, 51 insertions(+), 55 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 577483d7..107ce3cb 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -13,13 +13,6 @@ from asreviewcontrib.makita.utils import FileHandler -def _shell_to_batch(job): - job = f"@ echo off\nCOLOR E0{job}" - job = job.replace("#", "::") - job = job.replace("/", "\\") - return job - - class MakitaEntryPoint(BaseEntryPoint): # backward compat? description = "Makita functionality for ASReview datasets." @@ -97,17 +90,17 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--classifier", type=str, - help="Classifier to use. Only for template 'basic' and 'arfi'. " + help="Classifier to use. Only for template 'basic' and 'arfi'. ", ) parser_template.add_argument( "--feature_extractor", type=str, - help="Feature_extractor to use. Only for template 'basic' and 'arfi'. " + help="Feature_extractor to use. Only for template 'basic' and 'arfi'. ", ) parser_template.add_argument( "--query_strategy", type=str, - help="Query strategy to use. Only for template 'basic' and 'arfi'. " + help="Query strategy to use. Only for template 'basic' and 'arfi'. ", ) parser_template.add_argument( "--balance_strategy", @@ -155,20 +148,16 @@ def execute(self, argv): # noqa: C901 parser_script = subparsers.add_parser("add-script") parser_script.add_argument( - "name", - type=str, - nargs="?", - help="The name of the script." + "name", type=str, nargs="?", help="The name of the script." ) parser_script.add_argument( - "--all", "-a", - action="store_true", - help="Add all scripts." + "--all", "-a", action="store_true", help="Add all scripts." ) parser_script.add_argument( "-o", - type=str, default=DEFAULTS["scripts_folder"], - help="Location of the scripts folder." + type=str, + default=DEFAULTS["scripts_folder"], + help="Location of the scripts folder.", ) parser_script.set_defaults(func=self._add_script_cli) @@ -225,16 +214,26 @@ def _template(self, args): # create output folder Path(args.o).parent.mkdir(parents=True, exist_ok=True) + # get job file + if args.platform == "Windows" or (args.platform is None and os.name == "nt"): + job_file = "jobs.bat" if args.job_file is None else args.job_file + else: + job_file = "jobs.sh" if args.job_file is None else args.job_file + + # render jobs file if args.name in [TemplateBasic.template_name]: - prohibited_args = ['classifiers', - 'feature_extractors', - 'query_strategies', - 'impossible_models', - 'n_priors'] + prohibited_args = [ + "classifiers", + "feature_extractors", + "query_strategies", + "impossible_models", + "n_priors", + ] for arg in prohibited_args: if getattr(args, arg): raise ValueError( - f"Argument {arg} is not allowed for template {args.name}") + f"Argument {arg} is not allowed for template {args.name}" + ) job = TemplateBasic( datasets=datasets, @@ -253,19 +252,21 @@ def _template(self, args): instances_per_query=args.instances_per_query, stop_if=args.stop_if, job_file=args.job_file, - platform_sys=args.platform, ).render() elif args.name in [TemplateARFI.template_name]: - prohibited_args = ['n_runs', - 'classifiers', - 'feature_extractors', - 'query_strategies', - 'impossible_models'] + prohibited_args = [ + "n_runs", + "classifiers", + "feature_extractors", + "query_strategies", + "impossible_models", + ] for arg in prohibited_args: if getattr(args, arg): raise ValueError( - f"Argument {arg} is not allowed for template {args.name}") + f"Argument {arg} is not allowed for template {args.name}" + ) job = TemplateARFI( datasets=datasets, @@ -283,19 +284,21 @@ def _template(self, args): balance_strategy=args.balance_strategy, instances_per_query=args.instances_per_query, stop_if=args.stop_if, - job_file=args.job_file, - platform_sys=args.platform, + job_file=job_file, ).render() elif args.name in [TemplateMultiModel.template_name]: - prohibited_args = ['classifier', - 'feature_extractor', - 'query_strategy', - 'n_priors'] + prohibited_args = [ + "classifier", + "feature_extractor", + "query_strategy", + "n_priors", + ] for arg in prohibited_args: if getattr(args, arg): raise ValueError( - f"Argument {arg} is not allowed for template {args.name}") + f"Argument {arg} is not allowed for template {args.name}" + ) job = TemplateMultiModel( datasets=datasets, @@ -314,8 +317,7 @@ def _template(self, args): balance_strategy=args.balance_strategy, instances_per_query=args.instances_per_query, stop_if=args.stop_if, - job_file=args.job_file, - platform_sys=args.platform, + job_file=job_file, ).render() else: @@ -331,15 +333,14 @@ def _template(self, args): model_seed=args.model_seed, stop_if=args.stop_if, fp_template=fp_template, - job_file=args.job_file, - platform_sys=args.platform, + job_file=job_file, ).render() - if args.platform == "Windows" or (args.platform is None and os.name == "nt"): - job = _shell_to_batch(job) - job_file = "jobs.bat" if args.job_file is None else args.job_file - else: - job_file = "jobs.sh" if args.job_file is None else args.job_file + # convert shell to batch if needed + if job_file.endswith(".bat"): + job = f"@ echo off\nCOLOR E0{job}" + job = job.replace("#", "::") + job = job.replace("/", "\\") # store result in output folder with open(job_file, "w") as f: diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 105f3e5d..c04612cb 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -24,8 +24,7 @@ def __init__( balance_strategy, instances_per_query, stop_if, - job_file, - platform_sys, + job_file ): self.datasets = datasets self.output_folder = output_folder @@ -37,10 +36,7 @@ def __init__( self.instances_per_query = instances_per_query self.stop_if = stop_if self.fp_template = fp_template - self.job_file = ( - job_file if job_file else "jobs.bat" if os.name == "nt" else "jobs.sh" - ) - self.platform_sys = platform_sys if platform_sys else platform.system() + self.job_file = job_file self.file_handler = FileHandler(allow_overwrite) self.template = ConfigTemplate(fp_template) self.__version__ = __version__ diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index e06da454..a5c4c5da 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -57,7 +57,6 @@ def get_static_params(self, params): "output_folder": self.output_folder, "n_runs": self.n_runs, "scripts_folder": self.scripts_folder, - "platform": self.platform_sys, "version": self.__version__, "all_classifiers": self.all_classifiers, "all_feature_extractors": self.all_feature_extractors, From 35d4167127917b859b2fd40a2f34ce54c918ecd5 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 16:04:06 +0200 Subject: [PATCH 52/95] update workflow --- .github/workflows/ci-workflow.yml | 1 + asreviewcontrib/makita/template_arfi.py | 1 - asreviewcontrib/makita/template_basic.py | 1 - 3 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 5287e59b..ffacb422 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -32,6 +32,7 @@ jobs: run: | cd tmp/basic asreview makita template basic | tee output.txt + asreview makita template basic --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 -s data-test -o output-test --init_seed 1 --model_seed 2 --no_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 2ab57cef..f59944ee 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -62,7 +62,6 @@ def get_static_params(self, params): "init_seed": self.init_seed, "output_folder": self.output_folder, "scripts_folder": self.scripts_folder, - "platform": self.platform_sys, "version": self.__version__, } diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 8b09006d..196d5d53 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -56,6 +56,5 @@ def get_static_params(self, params): "stop_if": self.stop_if, "output_folder": self.output_folder, "scripts_folder": self.scripts_folder, - "platform": self.platform_sys, "version": self.__version__, } From 9ad220e95dccf801632740f5c90700a38d58f1cd Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 16:06:12 +0200 Subject: [PATCH 53/95] add test data folder to workflow --- .github/workflows/ci-workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index ffacb422..91172f7e 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -23,9 +23,11 @@ jobs: mkdir tmp cd tmp mkdir -p basic/data + mkdir -p basic/data-test mkdir -p arfi/data mkdir -p multimodel/data cp ../.github/workflows/test_data/labels.csv basic/data/labels.csv + cp ../.github/workflows/test_data/labels.csv basic/data-test/labels.csv cp ../.github/workflows/test_data/labels.csv arfi/data/labels.csv cp ../.github/workflows/test_data/labels.csv multimodel/data/labels.csv - name: Test makita templates From 6bad78b7b6d4d5adf9d5631a9d31441efd2b7907 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 16:08:27 +0200 Subject: [PATCH 54/95] remove unused imports --- asreviewcontrib/makita/template_base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index c04612cb..bb3f8ad7 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -1,7 +1,5 @@ """Rendering base class for templates.""" -import os -import platform from pathlib import Path from cfgtemplater.config_template import ConfigTemplate From 9e7a057f1f5ec6e1ffa9c53c239e39dfdd8ddb18 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 16:12:48 +0200 Subject: [PATCH 55/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 91172f7e..b2436491 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -15,9 +15,9 @@ jobs: - name: Install makita run: | pip install . - - name: Install ruff + - name: Install ruff scitree run: | - pip install ruff + pip install ruff scitree - name: set up environment run: | mkdir tmp @@ -42,6 +42,8 @@ jobs: cd ../multimodel asreview makita template multimodel | tee output.txt grep -q "ERROR" output.txt && exit 1 || true + cd .. + scitree - name: Run ShellCheck if: ${{ matrix.os != 'windows-latest' }} uses: ludeeus/action-shellcheck@master @@ -58,7 +60,7 @@ jobs: - name: Run basic template if: ${{ matrix.os == 'ubuntu-latest' }} run: | - pip install asreview-datatools asreview-insights synergy-dataset scitree + pip install asreview-datatools asreview-insights synergy-dataset mkdir -p tmp/synergy/data cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l From d85469382253a76a3a1134ffb6c5c7f6395228c7 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 4 Apr 2024 16:21:22 +0200 Subject: [PATCH 56/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index b2436491..62cdff03 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -15,9 +15,9 @@ jobs: - name: Install makita run: | pip install . - - name: Install ruff scitree + - name: Install ruff run: | - pip install ruff scitree + pip install ruff - name: set up environment run: | mkdir tmp @@ -43,6 +43,11 @@ jobs: asreview makita template multimodel | tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd .. + - name: Run scitree on output + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + pip install scitree + cd tmp scitree - name: Run ShellCheck if: ${{ matrix.os != 'windows-latest' }} From d99870616ceec38dc1ac297909b9ee6ec08688b5 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:29:42 +0200 Subject: [PATCH 57/95] Move template finder to inside template class --- asreviewcontrib/makita/entrypoint.py | 21 +++++++------------ asreviewcontrib/makita/template_arfi.py | 3 ++- asreviewcontrib/makita/template_base.py | 10 +++++++-- asreviewcontrib/makita/template_basic.py | 3 ++- asreviewcontrib/makita/template_multimodel.py | 3 ++- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 107ce3cb..52873665 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -161,7 +161,7 @@ def execute(self, argv): # noqa: C901 ) parser_script.set_defaults(func=self._add_script_cli) - # parse the args and call whatever function was selected + # parse the args and call the selected function args = parser.parse_args(argv) args.func(args) @@ -177,20 +177,13 @@ def _template(self, args): # lowercase name args.name = args.name.lower() - # backwards compatibility for 'multiple_models' - if args.name == "multiple_models": - args.name = "multimodel" - - # check if the template exists - fp_template = Path(TEMPLATES_FP, f"template_{args.name}.txt.template") - if not fp_template.is_file(): - raise ValueError(f"Template {args.name} not found") - # if a custom template is provided, check if it exists if args.template: fp_template = Path(args.template) if not fp_template.is_file(): raise ValueError(f"Custom template {args.template} not found") + else: + fp_template = None # print rendering message if args.template: @@ -221,7 +214,7 @@ def _template(self, args): job_file = "jobs.sh" if args.job_file is None else args.job_file # render jobs file - if args.name in [TemplateBasic.template_name]: + if args.name in TemplateBasic.template_name: prohibited_args = [ "classifiers", "feature_extractors", @@ -254,7 +247,7 @@ def _template(self, args): job_file=args.job_file, ).render() - elif args.name in [TemplateARFI.template_name]: + elif args.name in TemplateARFI.template_name: prohibited_args = [ "n_runs", "classifiers", @@ -287,7 +280,7 @@ def _template(self, args): job_file=job_file, ).render() - elif args.name in [TemplateMultiModel.template_name]: + elif args.name in TemplateMultiModel.template_name: prohibited_args = [ "classifier", "feature_extractor", @@ -322,7 +315,7 @@ def _template(self, args): else: # Fallback to basic template - # This case can occur if a user adds a new template to the templates folder + print(f"\u001b[31mERROR: \033[33mTemplate {args.name} not found.\u001b[0m\n") print("\u001b[31mFallback: \033[33mUsing the basic template.\u001b[0m\n") job = TemplateBasic( datasets, diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index f59944ee..a3126198 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -7,7 +7,8 @@ class TemplateARFI(TemplateBase): - template_name = "arfi" + template_name = ["arfi"] + template_file = "template_arfi.txt.template" def __init__( self, diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index bb3f8ad7..9d0de728 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -5,10 +5,13 @@ from cfgtemplater.config_template import ConfigTemplate from asreviewcontrib.makita import __version__ +from asreviewcontrib.makita.config import TEMPLATES_FP from asreviewcontrib.makita.utils import FileHandler class TemplateBase: + template_file = None + def __init__( self, datasets, @@ -33,12 +36,15 @@ def __init__( self.balance_strategy = balance_strategy self.instances_per_query = instances_per_query self.stop_if = stop_if - self.fp_template = fp_template self.job_file = job_file self.file_handler = FileHandler(allow_overwrite) - self.template = ConfigTemplate(fp_template) self.__version__ = __version__ + self.template = ConfigTemplate(fp_template if fp_template is not None else self.get_template_file()) # noqa: E501 + + def get_template_file(self): + return Path(TEMPLATES_FP, self.template_file) + def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 196d5d53..d463cd1c 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -4,7 +4,8 @@ class TemplateBasic(TemplateBase): - template_name = "basic" + template_name = ["basic"] + template_file = "template_basic.txt.template" def __init__( self, diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index a5c4c5da..d1bd027d 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -4,7 +4,8 @@ class TemplateMultiModel(TemplateBase): - template_name = "multimodel" + template_name = ["multimodel", "multiple_models"] + template_file = "template_multimodel.txt.template" def __init__( self, From 2d6349b5e1749fc36425cc8d23c085ac2cd37b72 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:29:52 +0200 Subject: [PATCH 58/95] Move n_runs to static params --- asreviewcontrib/makita/template_basic.py | 2 +- asreviewcontrib/makita/templates/template_basic.txt.template | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index d463cd1c..d956c46d 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -39,7 +39,6 @@ def get_dynamic_params(self, index, fp_dataset): "input_file_stem": fp_dataset.stem, "model_seed": self.model_seed + index, "init_seed": self.init_seed, - "n_runs": self.n_runs, } def get_static_params(self, params): @@ -56,6 +55,7 @@ def get_static_params(self, params): "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, + "n_runs": self.n_runs, "scripts_folder": self.scripts_folder, "version": self.__version__, } diff --git a/asreviewcontrib/makita/templates/template_basic.txt.template b/asreviewcontrib/makita/templates/template_basic.txt.template index 7be33cb2..380f2350 100644 --- a/asreviewcontrib/makita/templates/template_basic.txt.template +++ b/asreviewcontrib/makita/templates/template_basic.txt.template @@ -48,7 +48,7 @@ python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/fig # Simulate runs mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files -{% for run in range(dataset.n_runs) %} +{% for run in range(n_runs) %} python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ run }}.asreview --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ run }}.json {% endfor %} From e3deacada28e0ffff7201c474e2a1be4b376158d Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:31:00 +0200 Subject: [PATCH 59/95] Ruff format --- asreviewcontrib/makita/template_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 9d0de728..5b625ff1 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -25,7 +25,7 @@ def __init__( balance_strategy, instances_per_query, stop_if, - job_file + job_file, ): self.datasets = datasets self.output_folder = output_folder @@ -40,7 +40,9 @@ def __init__( self.file_handler = FileHandler(allow_overwrite) self.__version__ = __version__ - self.template = ConfigTemplate(fp_template if fp_template is not None else self.get_template_file()) # noqa: E501 + self.template = ConfigTemplate( + fp_template if fp_template is not None else self.get_template_file() + ) # noqa: E501 def get_template_file(self): return Path(TEMPLATES_FP, self.template_file) From 69341bc4fd187a036c2ddd8bcce21c257be63421 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:33:13 +0200 Subject: [PATCH 60/95] Update entrypoint.py --- asreviewcontrib/makita/entrypoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 52873665..ca2c6e99 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -315,7 +315,7 @@ def _template(self, args): else: # Fallback to basic template - print(f"\u001b[31mERROR: \033[33mTemplate {args.name} not found.\u001b[0m\n") + print(f"\u001b[31mERROR: \033[33mTemplate {args.name} not found.\u001b[0m\n") # noqa: E501 print("\u001b[31mFallback: \033[33mUsing the basic template.\u001b[0m\n") job = TemplateBasic( datasets, From 7ff860365907f6b222f02906fdc4a2b205c0f4c4 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:44:06 +0200 Subject: [PATCH 61/95] n_runs fix in templates --- asreviewcontrib/makita/templates/template_basic.txt.template | 4 ++-- .../makita/templates/template_multimodel.txt.template | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/asreviewcontrib/makita/templates/template_basic.txt.template b/asreviewcontrib/makita/templates/template_basic.txt.template index 380f2350..8f18d916 100644 --- a/asreviewcontrib/makita/templates/template_basic.txt.template +++ b/asreviewcontrib/makita/templates/template_basic.txt.template @@ -49,8 +49,8 @@ python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/fig # Simulate runs mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files {% for run in range(n_runs) %} -python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ run }}.asreview --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} -python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ run }}.json +python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed + run }} -m {{ classifier }} -e {{ feature_extractor }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} +python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}{{ "_{}".format(run) if n_runs > 1 else "" }}.json {% endfor %} # Generate plot and tables for dataset diff --git a/asreviewcontrib/makita/templates/template_multimodel.txt.template b/asreviewcontrib/makita/templates/template_multimodel.txt.template index 2e59e5ff..0b503af8 100644 --- a/asreviewcontrib/makita/templates/template_multimodel.txt.template +++ b/asreviewcontrib/makita/templates/template_multimodel.txt.template @@ -54,8 +54,8 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files # Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model {% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }} {% for run in range(n_runs) %} -python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} -python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.json +python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} +python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}{{ "_{}".format(run) if n_runs > 1 else "" }}.json {% endfor %}{% endif %} {% endfor %} {% endfor %} From 29600f8761c5db64a54308699048026aa814c1d7 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:48:26 +0200 Subject: [PATCH 62/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 62cdff03..4311a8a7 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -69,6 +69,6 @@ jobs: mkdir -p tmp/synergy/data cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l - asreview makita template basic --instances_per_query 100 --no_wordclouds --overwrite + asreview makita template basic --instances_per_query 100 --no_wordclouds --overwrite --n_runs 2 sh jobs.sh scitree From 4132893a9653c10d79e3ceedc51d0b59a396360d Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 5 Apr 2024 15:48:34 +0200 Subject: [PATCH 63/95] add modelmatrix to mm template --- asreviewcontrib/makita/template_multimodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index d1bd027d..39eece35 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -4,7 +4,7 @@ class TemplateMultiModel(TemplateBase): - template_name = ["multimodel", "multiple_models"] + template_name = ["multimodel", "multiple_models", "modelmatrix"] template_file = "template_multimodel.txt.template" def __init__( From cf64ca4c5072442db8ba8361e31b757ec1b7aebe Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 10:41:56 +0200 Subject: [PATCH 64/95] Remove modifications from examples --- examples/arfi_example/README.md | 36 +- examples/arfi_example/jobs.bat | 194 ---------- examples/arfi_example/jobs.sh | 359 +++++++++--------- examples/arfi_example/scripts/get_plot.py | 96 ++--- .../arfi_example/scripts/merge_metrics.py | 5 +- examples/arfi_example/scripts/merge_tds.py | 35 +- examples/basic_example/README.md | 42 +- examples/basic_example/jobs.bat | 71 ---- examples/basic_example/jobs.sh | 135 ++++--- examples/basic_example/scripts/get_plot.py | 96 ++--- .../basic_example/scripts/merge_metrics.py | 5 +- examples/basic_example/scripts/merge_tds.py | 35 +- examples/multimodel_example/README.md | 36 +- examples/multimodel_example/jobs.bat | 157 -------- examples/multimodel_example/jobs.sh | 227 ++++++----- .../multimodel_example/scripts/get_plot.py | 96 ++--- .../scripts/merge_metrics.py | 5 +- .../multimodel_example/scripts/merge_tds.py | 35 +- 18 files changed, 586 insertions(+), 1079 deletions(-) delete mode 100644 examples/arfi_example/jobs.bat delete mode 100644 examples/basic_example/jobs.bat delete mode 100644 examples/multimodel_example/jobs.bat diff --git a/examples/arfi_example/README.md b/examples/arfi_example/README.md index 084f057c..73fbeaf8 100644 --- a/examples/arfi_example/README.md +++ b/examples/arfi_example/README.md @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -For generating wordclouds, install the following dependencies. +If wordcloud images are required, install the following dependencies. ```sh pip install asreview-wordcloud @@ -43,8 +43,8 @@ The following files are found in this project: ├── 📜README.md ├── 📜jobs.sh ├── 📂data - │ ├── 📜Smid_2020.csv │ ├── 📜van_de_Schoot_2018.csv + │ ├── 📜Smid_2020.csv ├── 📂scripts │ ├── 📜get_plot.py │ ├── 📜merge_descriptives.py @@ -53,45 +53,45 @@ The following files are found in this project: │ └── 📜... └── 📂output ├── 📂simulation - | └── 📂Smid_2020 + | └── 📂van_de_Schoot_2018 | ├── 📂descriptives - | | └── 📜data_stats_Smid_2020.json + | | └── 📜data_stats_van_de_Schoot_2018.json | ├── 📂state_files - | | ├── 📜sim_Smid_2020_`x`.asreview + | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_Smid_2020_`x`.json + | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json | └── 📜... - | └── 📂van_de_Schoot_2018 + | └── 📂Smid_2020 | ├── 📂descriptives - | | └── 📜data_stats_van_de_Schoot_2018.json + | | └── 📜data_stats_Smid_2020.json | ├── 📂state_files - | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview + | | ├── 📜sim_Smid_2020_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json + | ├── 📜metrics_sim_Smid_2020_`x`.json | └── 📜... ├── 📂tables | ├── 📜data_descriptives.csv | ├── 📜data_descriptives.xlsx - | ├── 📜tds_sim_Smid_2020.csv - | ├── 📜tds_sim_Smid_2020.xlsx | ├── 📜tds_sim_van_de_Schoot_2018.csv | ├── 📜tds_sim_van_de_Schoot_2018.xlsx + | ├── 📜tds_sim_Smid_2020.csv + | ├── 📜tds_sim_Smid_2020.xlsx | ├── 📜tds_summary.csv | ├── 📜tds_summary.xlsx - | ├── 📜metrics_sim_Smid_2020_metrics.csv - | ├── 📜metrics_sim_Smid_2020_metrics.xlsx | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx + | ├── 📜metrics_sim_Smid_2020_metrics.csv + | ├── 📜metrics_sim_Smid_2020_metrics.xlsx | ├── 📜metrics_summary.csv | └── 📜metrics_summary.xlsx └── 📂figures - ├── 📈plot_recall_Smid_2020.png ├── 📈plot_recall_van_de_Schoot_2018.png - ├── 📈wordcloud_Smid_2020.png - ├── 📈wordcloud_relevant_Smid_2020.png - └── 📈wordcloud_irrelevant_Smid_2020.png + ├── 📈plot_recall_Smid_2020.png ├── 📈wordcloud_van_de_Schoot_2018.png ├── 📈wordcloud_relevant_van_de_Schoot_2018.png └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png + ├── 📈wordcloud_Smid_2020.png + ├── 📈wordcloud_relevant_Smid_2020.png + └── 📈wordcloud_irrelevant_Smid_2020.png diff --git a/examples/arfi_example/jobs.bat b/examples/arfi_example/jobs.bat deleted file mode 100644 index 00fb8a21..00000000 --- a/examples/arfi_example/jobs.bat +++ /dev/null @@ -1,194 +0,0 @@ -@ echo off -COLOR E0 - -:: version 0.0.0 - -:: Create folder structure. By default, the folder 'output' is used to store output. -mkdir output -mkdir output\simulation -mkdir output\tables -mkdir output\tables\metrics -mkdir output\tables\time_to_discovery -mkdir output\figures - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: Smid_2020 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:: Create output folder -mkdir output\simulation\Smid_2020\ -mkdir output\simulation\Smid_2020\metrics - -:: Collect descriptives about the dataset -mkdir output\simulation\Smid_2020\descriptives -python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant - -:: Simulate runs, collect metrics and create plots -mkdir output\simulation\Smid_2020\state_files -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_31.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_121.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_122.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_216.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_520.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_526.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_672.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_763.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_810.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1000.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1063.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1195.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1203.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1257.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1429.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1534.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1809.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1820.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1876.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1877.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2067.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2070.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2241.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2276.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2279.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2307.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2452.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv -python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: van_de_Schoot_2018 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:: Create output folder -mkdir output\simulation\van_de_Schoot_2018\ -mkdir output\simulation\van_de_Schoot_2018\metrics - -:: Collect descriptives about the dataset -mkdir output\simulation\van_de_Schoot_2018\descriptives -python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant - -:: Simulate runs, collect metrics and create plots -mkdir output\simulation\van_de_Schoot_2018\state_files -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_51.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_116.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_462.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_730.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_767.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_831.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_902.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_953.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1033.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1180.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1248.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1356.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1429.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1514.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1554.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1565.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1746.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1881.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1994.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2279.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2496.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2545.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2624.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2740.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2803.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3076.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3314.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3442.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3608.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3680.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3769.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3842.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4154.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4201.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4269.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4327.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4377.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4461.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv -python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv - -:: Merge descriptives and metrics -python scripts\merge_descriptives.py -python scripts\merge_metrics.py diff --git a/examples/arfi_example/jobs.sh b/examples/arfi_example/jobs.sh index 00fb8a21..cbb07843 100644 --- a/examples/arfi_example/jobs.sh +++ b/examples/arfi_example/jobs.sh @@ -1,194 +1,193 @@ -@ echo off -COLOR E0 -:: version 0.0.0 -:: Create folder structure. By default, the folder 'output' is used to store output. +# version 0.0.0 + +# Create folder structure. By default, the folder 'output' is used to store output. mkdir output -mkdir output\simulation -mkdir output\tables -mkdir output\tables\metrics -mkdir output\tables\time_to_discovery -mkdir output\figures +mkdir output/simulation +mkdir output/tables +mkdir output/tables/metrics +mkdir output/tables/time_to_discovery +mkdir output/figures -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: Smid_2020 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:: Create output folder -mkdir output\simulation\Smid_2020\ -mkdir output\simulation\Smid_2020\metrics +################################## +### DATASET: Smid_2020 +################################## +# Create output folder +mkdir output/simulation/Smid_2020/ +mkdir output/simulation/Smid_2020/metrics -:: Collect descriptives about the dataset -mkdir output\simulation\Smid_2020\descriptives -python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json +# Collect descriptives about the dataset +mkdir output/simulation/Smid_2020/descriptives +asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +# Generate wordcloud visualizations of all datasets +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant -:: Simulate runs, collect metrics and create plots -mkdir output\simulation\Smid_2020\state_files -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_31.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_31.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_121.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_121.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_122.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_122.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_216.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_216.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_520.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_520.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_526.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_526.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_672.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_672.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_763.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_763.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_810.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_810.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1000.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1000.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1063.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1063.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1195.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1195.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1203.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1203.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1257.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1257.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1429.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1429.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1534.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1534.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1809.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1809.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1820.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1820.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1876.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1876.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_1877.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_1877.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2067.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2067.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2070.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2070.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2241.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2241.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2276.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2276.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2279.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2279.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2307.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2307.json -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_2452.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_2452.json +# Simulate runs, collect metrics and create plots +mkdir output/simulation/Smid_2020/state_files +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview --prior_record_id 31 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_31.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_31.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview --prior_record_id 121 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_121.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_121.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview --prior_record_id 122 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_122.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_122.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview --prior_record_id 216 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_216.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_216.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview --prior_record_id 520 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_520.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_520.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview --prior_record_id 526 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_526.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_526.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview --prior_record_id 672 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_672.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_672.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview --prior_record_id 763 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_763.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_763.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview --prior_record_id 810 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_810.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_810.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview --prior_record_id 1000 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1000.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1000.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview --prior_record_id 1063 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1063.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1063.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview --prior_record_id 1195 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1195.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1195.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview --prior_record_id 1203 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1203.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1203.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview --prior_record_id 1257 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1257.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1257.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview --prior_record_id 1429 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1429.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1429.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview --prior_record_id 1534 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1534.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1534.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview --prior_record_id 1809 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1809.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1809.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview --prior_record_id 1820 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1820.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1820.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview --prior_record_id 1876 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1876.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1876.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview --prior_record_id 1877 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_1877.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_1877.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview --prior_record_id 2067 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2067.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2067.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview --prior_record_id 2070 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2070.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2070.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview --prior_record_id 2241 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2241.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2241.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview --prior_record_id 2276 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2276.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2276.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview --prior_record_id 2279 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2279.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2279.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview --prior_record_id 2307 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2307.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2307.json +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview --prior_record_id 2452 1225 1252 425 2568 1010 2053 1836 1716 1873 261 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_2452.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_2452.json -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv -python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv +# Generate plot and tables for dataset +python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png --show_legend model +python scripts/merge_metrics.py -s output/simulation/Smid_2020/metrics/ -o output/tables/metrics/metrics_sim_Smid_2020.csv +python scripts/merge_tds.py -s output/simulation/Smid_2020/metrics/ -o output/tables/time_to_discovery/tds_sim_Smid_2020.csv -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: van_de_Schoot_2018 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:: Create output folder -mkdir output\simulation\van_de_Schoot_2018\ -mkdir output\simulation\van_de_Schoot_2018\metrics +################################## +### DATASET: van_de_Schoot_2018 +################################## +# Create output folder +mkdir output/simulation/van_de_Schoot_2018/ +mkdir output/simulation/van_de_Schoot_2018/metrics -:: Collect descriptives about the dataset -mkdir output\simulation\van_de_Schoot_2018\descriptives -python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json +# Collect descriptives about the dataset +mkdir output/simulation/van_de_Schoot_2018/descriptives +asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +# Generate wordcloud visualizations of all datasets +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant -:: Simulate runs, collect metrics and create plots -mkdir output\simulation\van_de_Schoot_2018\state_files -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_51.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_51.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_116.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_116.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_462.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_462.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_730.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_730.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_767.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_767.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_831.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_831.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_902.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_902.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_953.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_953.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1033.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1033.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1180.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1180.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1248.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1248.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1356.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1356.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1429.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1429.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1514.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1514.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1554.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1554.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1565.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1565.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1746.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1746.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1881.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1881.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_1994.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_1994.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2279.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2279.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2496.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2496.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2545.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2545.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2624.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2624.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2740.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2740.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_2803.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_2803.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3076.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3076.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3314.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3314.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3442.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3442.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3608.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3608.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3680.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3680.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3769.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3769.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_3842.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_3842.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4154.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4154.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4201.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4201.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4269.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4269.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4327.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4327.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4377.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4377.json -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_4461.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_4461.json +# Simulate runs, collect metrics and create plots +mkdir output/simulation/van_de_Schoot_2018/state_files +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview --prior_record_id 51 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_51.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_51.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview --prior_record_id 116 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_116.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_116.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview --prior_record_id 462 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_462.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_462.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview --prior_record_id 730 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_730.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_730.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview --prior_record_id 767 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_767.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_767.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview --prior_record_id 831 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_831.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_831.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview --prior_record_id 902 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_902.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_902.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview --prior_record_id 953 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_953.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_953.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview --prior_record_id 1033 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1033.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1033.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview --prior_record_id 1180 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1180.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1180.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview --prior_record_id 1248 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1248.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1248.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview --prior_record_id 1356 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1356.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1356.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview --prior_record_id 1429 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1429.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1429.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview --prior_record_id 1514 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1514.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1514.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview --prior_record_id 1554 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1554.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1554.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview --prior_record_id 1565 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1565.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1565.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview --prior_record_id 1746 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1746.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1746.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview --prior_record_id 1881 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1881.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1881.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview --prior_record_id 1994 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_1994.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_1994.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview --prior_record_id 2279 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2279.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2279.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview --prior_record_id 2496 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2496.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2496.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview --prior_record_id 2545 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2545.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2545.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview --prior_record_id 2624 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2624.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2624.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview --prior_record_id 2740 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2740.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2740.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview --prior_record_id 2803 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_2803.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_2803.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview --prior_record_id 3076 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3076.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3076.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview --prior_record_id 3314 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3314.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3314.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview --prior_record_id 3442 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3442.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3442.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview --prior_record_id 3608 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3608.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3608.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview --prior_record_id 3680 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3680.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3680.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview --prior_record_id 3769 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3769.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3769.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview --prior_record_id 3842 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_3842.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_3842.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview --prior_record_id 4154 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4154.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4154.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview --prior_record_id 4201 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4201.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4201.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview --prior_record_id 4269 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4269.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4269.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview --prior_record_id 4327 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4327.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4327.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview --prior_record_id 4377 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4377.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4377.json +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview --prior_record_id 4461 4382 1972 3329 4088 1416 696 1820 2971 2875 2976 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_4461.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_4461.json -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv -python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv +# Generate plot and tables for dataset +python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png --show_legend model +python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/metrics/metrics_sim_van_de_Schoot_2018.csv +python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv -:: Merge descriptives and metrics -python scripts\merge_descriptives.py -python scripts\merge_metrics.py +# Merge descriptives and metrics +python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv +python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv diff --git a/examples/arfi_example/scripts/get_plot.py b/examples/arfi_example/scripts/get_plot.py index 64d2f8db..7d29468d 100644 --- a/examples/arfi_example/scripts/get_plot.py +++ b/examples/arfi_example/scripts/get_plot.py @@ -20,72 +20,58 @@ import argparse from pathlib import Path +import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def _set_legend(ax, state, legend_option, label_to_line, state_file): - metadata = state.settings_metadata - label = None - - if legend_option == "filename": - label = state_file.stem - elif legend_option == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend_option == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend_option] - except KeyError as err: - raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 - - if label: - # add label to line - if label not in label_to_line: - ax.lines[-2].set_label(label) - label_to_line[label] = ax.lines[-2] - # set color of line to the color of the first line with the same label - else: - ax.lines[-2].set_color(label_to_line[label].get_color()) - ax.lines[-2].set_label("_no_legend_") - - def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files. - - Arguments - --------- - states: list - List of state files. - filename: str - Filename of the plot. - legend: str - Add a legend to the plot, based on the given parameter. - Possible values: "filename", "model", "feature_extraction", - "balance_strategy", "query_strategy", "classifier". - """ - states = sorted(states) + """Generate an ASReview plot from state files.""" + fig, ax = plt.subplots() - label_to_line = {} + + labels = [] + colors = list(mcolors.TABLEAU_COLORS.values()) for state_file in states: with open_state(state_file) as state: + # draw the plot plot_recall(ax, state) - if legend: - _set_legend(ax, state, legend, label_to_line, state_file) - if legend: - ax.legend(loc=4, prop={"size": 8}) + # set the label + if legend == "filename": + ax.lines[-2].set_label(state_file.stem) + ax.legend(loc=4, prop={"size": 8}) + elif legend: + metadata = state.settings_metadata + + if legend == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend] + except KeyError as exc: + raise ValueError( + f"Legend setting '{legend}' " + "not found in state file settings." + ) from exc + if label not in labels: + ax.lines[-2].set_label(label) + labels.append(label) + ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) + ax.legend(loc=4, prop={"size": 8}) + fig.savefig(str(filename)) @@ -104,10 +90,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = list(Path(args.s).glob("*.asreview")) + states = Path(args.s).glob("*.asreview") # check if states are found - if len(states) == 0: + if len(list(states)) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/arfi_example/scripts/merge_metrics.py b/examples/arfi_example/scripts/merge_metrics.py index d8ed971b..aa031461 100644 --- a/examples/arfi_example/scripts/merge_metrics.py +++ b/examples/arfi_example/scripts/merge_metrics.py @@ -55,10 +55,7 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", - type=str, - default="output/simulation/*/metrics/", - help="states location", + "-s", type=str, default="output/simulation/*/metrics/", help="states location" ) parser.add_argument( "-o", diff --git a/examples/arfi_example/scripts/merge_tds.py b/examples/arfi_example/scripts/merge_tds.py index b705ed01..1beb52c6 100644 --- a/examples/arfi_example/scripts/merge_tds.py +++ b/examples/arfi_example/scripts/merge_tds.py @@ -24,7 +24,6 @@ import argparse import glob import json -from math import nan from pathlib import Path import pandas as pd @@ -38,7 +37,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] + ] # noqa values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -48,26 +47,25 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=nan, + fill_value=0, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted -def get_atd_values(df): - df["record_atd"] = df.mean(axis=1) - - df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) - - return df - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument("-s", type=str, required=True, help="metrics location") - parser.add_argument("-o", type=str, required=True, help="Output table location") + parser.add_argument( + "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" + ) + parser.add_argument( + "-o", + type=str, + default="output/tables/tds_sim_all.csv", + help="Output table location", + ) args = parser.parse_args() # load metric files @@ -77,14 +75,9 @@ def get_atd_values(df): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - # check if output file has .csv extension - if Path(args.o).suffix != ".csv": - raise ValueError("Output file should have .csv extension") - - td_table = create_table_state_tds(metric_files) - atd_table = get_atd_values(td_table) + states_table = create_table_state_tds(metric_files) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - atd_table.to_csv(Path(args.o)) - atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) + states_table.to_csv(Path(args.o)) + states_table.to_excel(Path(args.o).with_suffix(".xlsx")) diff --git a/examples/basic_example/README.md b/examples/basic_example/README.md index ad37fd43..1f2f52fc 100644 --- a/examples/basic_example/README.md +++ b/examples/basic_example/README.md @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -For generating wordclouds, install the following dependencies. +If wordcloud images are required, install the following dependencies. ```sh pip install asreview-wordcloud @@ -29,7 +29,11 @@ The performance on the following datasets is evaluated: ## Run simulation -To start the simulation, run the `jobs.bat` file. +To start the simulation, run the following command in the project directory. + +```sh +sh jobs.sh +``` ## Structure @@ -39,8 +43,8 @@ The following files are found in this project: ├── 📜README.md ├── 📜jobs.sh ├── 📂data - │ ├── 📜Smid_2020.csv │ ├── 📜van_de_Schoot_2018.csv + │ ├── 📜Smid_2020.csv ├── 📂scripts │ ├── 📜get_plot.py │ ├── 📜merge_descriptives.py @@ -49,45 +53,45 @@ The following files are found in this project: │ └── 📜... └── 📂output ├── 📂simulation - | └── 📂Smid_2020 + | └── 📂van_de_Schoot_2018 | ├── 📂descriptives - | | └── 📜data_stats_Smid_2020.json + | | └── 📜data_stats_van_de_Schoot_2018.json | ├── 📂state_files - | | ├── 📜sim_Smid_2020_`x`.asreview + | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_Smid_2020_`x`.json + | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json | └── 📜... - | └── 📂van_de_Schoot_2018 + | └── 📂Smid_2020 | ├── 📂descriptives - | | └── 📜data_stats_van_de_Schoot_2018.json + | | └── 📜data_stats_Smid_2020.json | ├── 📂state_files - | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview + | | ├── 📜sim_Smid_2020_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json + | ├── 📜metrics_sim_Smid_2020_`x`.json | └── 📜... ├── 📂tables | ├── 📜data_descriptives.csv | ├── 📜data_descriptives.xlsx - | ├── 📜tds_sim_Smid_2020.csv - | ├── 📜tds_sim_Smid_2020.xlsx | ├── 📜tds_sim_van_de_Schoot_2018.csv | ├── 📜tds_sim_van_de_Schoot_2018.xlsx + | ├── 📜tds_sim_Smid_2020.csv + | ├── 📜tds_sim_Smid_2020.xlsx | ├── 📜tds_summary.csv | ├── 📜tds_summary.xlsx - | ├── 📜metrics_sim_Smid_2020_metrics.csv - | ├── 📜metrics_sim_Smid_2020_metrics.xlsx | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx + | ├── 📜metrics_sim_Smid_2020_metrics.csv + | ├── 📜metrics_sim_Smid_2020_metrics.xlsx | ├── 📜metrics_summary.csv | └── 📜metrics_summary.xlsx └── 📂figures - ├── 📈plot_recall_Smid_2020.png ├── 📈plot_recall_van_de_Schoot_2018.png - ├── 📈wordcloud_Smid_2020.png - ├── 📈wordcloud_relevant_Smid_2020.png - └── 📈wordcloud_irrelevant_Smid_2020.png + ├── 📈plot_recall_Smid_2020.png ├── 📈wordcloud_van_de_Schoot_2018.png ├── 📈wordcloud_relevant_van_de_Schoot_2018.png └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png + ├── 📈wordcloud_Smid_2020.png + ├── 📈wordcloud_relevant_Smid_2020.png + └── 📈wordcloud_irrelevant_Smid_2020.png diff --git a/examples/basic_example/jobs.bat b/examples/basic_example/jobs.bat deleted file mode 100644 index e737c184..00000000 --- a/examples/basic_example/jobs.bat +++ /dev/null @@ -1,71 +0,0 @@ -@ echo off -COLOR E0 - -:: version 0.0.0 - -:: Create folder structure. By default, the folder 'output' is used to store output. -mkdir output -mkdir output\simulation -mkdir output\tables -mkdir output\tables\metrics -mkdir output\tables\time_to_discovery -mkdir output\figures - - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: Smid_2020 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -:: Create output folder -mkdir output\simulation\Smid_2020\ -mkdir output\simulation\Smid_2020\metrics - -:: Collect descriptives about the dataset Smid_2020 -mkdir output\simulation\Smid_2020\descriptives -python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant - -:: Simulate runs -mkdir output\simulation\Smid_2020\state_files -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview --init_seed 535 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_0.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png -python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv -python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: van_de_Schoot_2018 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -:: Create output folder -mkdir output\simulation\van_de_Schoot_2018\ -mkdir output\simulation\van_de_Schoot_2018\metrics - -:: Collect descriptives about the dataset van_de_Schoot_2018 -mkdir output\simulation\van_de_Schoot_2018\descriptives -python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant - -:: Simulate runs -mkdir output\simulation\van_de_Schoot_2018\state_files -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_0.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png -python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv -python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv - -:: Merge descriptives and metrics -python scripts\merge_descriptives.py -python scripts\merge_metrics.py diff --git a/examples/basic_example/jobs.sh b/examples/basic_example/jobs.sh index e737c184..e5327cd9 100644 --- a/examples/basic_example/jobs.sh +++ b/examples/basic_example/jobs.sh @@ -1,71 +1,70 @@ -@ echo off -COLOR E0 -:: version 0.0.0 -:: Create folder structure. By default, the folder 'output' is used to store output. +# version 0.0.0 + +# Create folder structure. By default, the folder 'output' is used to store output. mkdir output -mkdir output\simulation -mkdir output\tables -mkdir output\tables\metrics -mkdir output\tables\time_to_discovery -mkdir output\figures - - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: Smid_2020 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -:: Create output folder -mkdir output\simulation\Smid_2020\ -mkdir output\simulation\Smid_2020\metrics - -:: Collect descriptives about the dataset Smid_2020 -mkdir output\simulation\Smid_2020\descriptives -python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant - -:: Simulate runs -mkdir output\simulation\Smid_2020\state_files -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview --init_seed 535 --seed 165 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_0.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png -python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv -python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: van_de_Schoot_2018 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -:: Create output folder -mkdir output\simulation\van_de_Schoot_2018\ -mkdir output\simulation\van_de_Schoot_2018\metrics - -:: Collect descriptives about the dataset van_de_Schoot_2018 -mkdir output\simulation\van_de_Schoot_2018\descriptives -python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant - -:: Simulate runs -mkdir output\simulation\van_de_Schoot_2018\state_files -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 -m nb -e tfidf -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_0.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png -python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv -python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv - -:: Merge descriptives and metrics -python scripts\merge_descriptives.py -python scripts\merge_metrics.py +mkdir output/simulation +mkdir output/tables +mkdir output/tables/metrics +mkdir output/tables/time_to_discovery +mkdir output/figures + + +################################## +### DATASET: Smid_2020 +################################## + +# Create output folder +mkdir output/simulation/Smid_2020/ +mkdir output/simulation/Smid_2020/metrics + +# Collect descriptives about the dataset Smid_2020 +mkdir output/simulation/Smid_2020/descriptives +asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json + +# Generate wordcloud visualizations of all datasets +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant + +# Simulate runs +mkdir output/simulation/Smid_2020/state_files +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_0.asreview --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_0.json + +# Generate plot and tables for dataset +python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png +python scripts/merge_metrics.py -s output/simulation/Smid_2020/metrics/ -o output/tables/metrics/metrics_sim_Smid_2020.csv +python scripts/merge_tds.py -s output/simulation/Smid_2020/metrics/ -o output/tables/time_to_discovery/tds_sim_Smid_2020.csv + +################################## +### DATASET: van_de_Schoot_2018 +################################## + +# Create output folder +mkdir output/simulation/van_de_Schoot_2018/ +mkdir output/simulation/van_de_Schoot_2018/metrics + +# Collect descriptives about the dataset van_de_Schoot_2018 +mkdir output/simulation/van_de_Schoot_2018/descriptives +asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json + +# Generate wordcloud visualizations of all datasets +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant + +# Simulate runs +mkdir output/simulation/van_de_Schoot_2018/state_files +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_0.asreview --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_0.json + +# Generate plot and tables for dataset +python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png +python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/metrics/metrics_sim_van_de_Schoot_2018.csv +python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv + +# Merge descriptives and metrics +python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv +python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv diff --git a/examples/basic_example/scripts/get_plot.py b/examples/basic_example/scripts/get_plot.py index 64d2f8db..7d29468d 100644 --- a/examples/basic_example/scripts/get_plot.py +++ b/examples/basic_example/scripts/get_plot.py @@ -20,72 +20,58 @@ import argparse from pathlib import Path +import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def _set_legend(ax, state, legend_option, label_to_line, state_file): - metadata = state.settings_metadata - label = None - - if legend_option == "filename": - label = state_file.stem - elif legend_option == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend_option == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend_option] - except KeyError as err: - raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 - - if label: - # add label to line - if label not in label_to_line: - ax.lines[-2].set_label(label) - label_to_line[label] = ax.lines[-2] - # set color of line to the color of the first line with the same label - else: - ax.lines[-2].set_color(label_to_line[label].get_color()) - ax.lines[-2].set_label("_no_legend_") - - def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files. - - Arguments - --------- - states: list - List of state files. - filename: str - Filename of the plot. - legend: str - Add a legend to the plot, based on the given parameter. - Possible values: "filename", "model", "feature_extraction", - "balance_strategy", "query_strategy", "classifier". - """ - states = sorted(states) + """Generate an ASReview plot from state files.""" + fig, ax = plt.subplots() - label_to_line = {} + + labels = [] + colors = list(mcolors.TABLEAU_COLORS.values()) for state_file in states: with open_state(state_file) as state: + # draw the plot plot_recall(ax, state) - if legend: - _set_legend(ax, state, legend, label_to_line, state_file) - if legend: - ax.legend(loc=4, prop={"size": 8}) + # set the label + if legend == "filename": + ax.lines[-2].set_label(state_file.stem) + ax.legend(loc=4, prop={"size": 8}) + elif legend: + metadata = state.settings_metadata + + if legend == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend] + except KeyError as exc: + raise ValueError( + f"Legend setting '{legend}' " + "not found in state file settings." + ) from exc + if label not in labels: + ax.lines[-2].set_label(label) + labels.append(label) + ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) + ax.legend(loc=4, prop={"size": 8}) + fig.savefig(str(filename)) @@ -104,10 +90,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = list(Path(args.s).glob("*.asreview")) + states = Path(args.s).glob("*.asreview") # check if states are found - if len(states) == 0: + if len(list(states)) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/basic_example/scripts/merge_metrics.py b/examples/basic_example/scripts/merge_metrics.py index d8ed971b..aa031461 100644 --- a/examples/basic_example/scripts/merge_metrics.py +++ b/examples/basic_example/scripts/merge_metrics.py @@ -55,10 +55,7 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", - type=str, - default="output/simulation/*/metrics/", - help="states location", + "-s", type=str, default="output/simulation/*/metrics/", help="states location" ) parser.add_argument( "-o", diff --git a/examples/basic_example/scripts/merge_tds.py b/examples/basic_example/scripts/merge_tds.py index b705ed01..1beb52c6 100644 --- a/examples/basic_example/scripts/merge_tds.py +++ b/examples/basic_example/scripts/merge_tds.py @@ -24,7 +24,6 @@ import argparse import glob import json -from math import nan from pathlib import Path import pandas as pd @@ -38,7 +37,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] + ] # noqa values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -48,26 +47,25 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=nan, + fill_value=0, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted -def get_atd_values(df): - df["record_atd"] = df.mean(axis=1) - - df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) - - return df - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument("-s", type=str, required=True, help="metrics location") - parser.add_argument("-o", type=str, required=True, help="Output table location") + parser.add_argument( + "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" + ) + parser.add_argument( + "-o", + type=str, + default="output/tables/tds_sim_all.csv", + help="Output table location", + ) args = parser.parse_args() # load metric files @@ -77,14 +75,9 @@ def get_atd_values(df): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - # check if output file has .csv extension - if Path(args.o).suffix != ".csv": - raise ValueError("Output file should have .csv extension") - - td_table = create_table_state_tds(metric_files) - atd_table = get_atd_values(td_table) + states_table = create_table_state_tds(metric_files) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - atd_table.to_csv(Path(args.o)) - atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) + states_table.to_csv(Path(args.o)) + states_table.to_excel(Path(args.o).with_suffix(".xlsx")) diff --git a/examples/multimodel_example/README.md b/examples/multimodel_example/README.md index a7eab5d3..e0bf8f57 100644 --- a/examples/multimodel_example/README.md +++ b/examples/multimodel_example/README.md @@ -14,7 +14,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -For generating wordclouds, install the following dependencies. +If wordcloud images are required, install the following dependencies. ```sh pip install asreview-wordcloud @@ -43,8 +43,8 @@ The following files are found in this project: ├── 📜README.md ├── 📜jobs.sh ├── 📂data - │ ├── 📜Smid_2020.csv │ ├── 📜van_de_Schoot_2018.csv + │ ├── 📜Smid_2020.csv ├── 📂scripts │ ├── 📜get_plot.py │ ├── 📜merge_descriptives.py @@ -53,45 +53,45 @@ The following files are found in this project: │ └── 📜... └── 📂output ├── 📂simulation - | └── 📂Smid_2020 + | └── 📂van_de_Schoot_2018 | ├── 📂descriptives - | | └── 📜data_stats_Smid_2020.json + | | └── 📜data_stats_van_de_Schoot_2018.json | ├── 📂state_files - | | ├── 📜sim_Smid_2020_`x`.asreview + | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_Smid_2020_`x`.json + | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json | └── 📜... - | └── 📂van_de_Schoot_2018 + | └── 📂Smid_2020 | ├── 📂descriptives - | | └── 📜data_stats_van_de_Schoot_2018.json + | | └── 📜data_stats_Smid_2020.json | ├── 📂state_files - | | ├── 📜sim_van_de_Schoot_2018_`x`.asreview + | | ├── 📜sim_Smid_2020_`x`.asreview | | └── 📜... | └── 📂metrics - | ├── 📜metrics_sim_van_de_Schoot_2018_`x`.json + | ├── 📜metrics_sim_Smid_2020_`x`.json | └── 📜... ├── 📂tables | ├── 📜data_descriptives.csv | ├── 📜data_descriptives.xlsx - | ├── 📜tds_sim_Smid_2020.csv - | ├── 📜tds_sim_Smid_2020.xlsx | ├── 📜tds_sim_van_de_Schoot_2018.csv | ├── 📜tds_sim_van_de_Schoot_2018.xlsx + | ├── 📜tds_sim_Smid_2020.csv + | ├── 📜tds_sim_Smid_2020.xlsx | ├── 📜tds_summary.csv | ├── 📜tds_summary.xlsx - | ├── 📜metrics_sim_Smid_2020_metrics.csv - | ├── 📜metrics_sim_Smid_2020_metrics.xlsx | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.csv | ├── 📜metrics_sim_van_de_Schoot_2018_metrics.xlsx + | ├── 📜metrics_sim_Smid_2020_metrics.csv + | ├── 📜metrics_sim_Smid_2020_metrics.xlsx | ├── 📜metrics_summary.csv | └── 📜metrics_summary.xlsx └── 📂figures - ├── 📈plot_recall_Smid_2020.png ├── 📈plot_recall_van_de_Schoot_2018.png - ├── 📈wordcloud_Smid_2020.png - ├── 📈wordcloud_relevant_Smid_2020.png - └── 📈wordcloud_irrelevant_Smid_2020.png + ├── 📈plot_recall_Smid_2020.png ├── 📈wordcloud_van_de_Schoot_2018.png ├── 📈wordcloud_relevant_van_de_Schoot_2018.png └── 📈wordcloud_irrelevant_van_de_Schoot_2018.png + ├── 📈wordcloud_Smid_2020.png + ├── 📈wordcloud_relevant_Smid_2020.png + └── 📈wordcloud_irrelevant_Smid_2020.png diff --git a/examples/multimodel_example/jobs.bat b/examples/multimodel_example/jobs.bat deleted file mode 100644 index 8c78726f..00000000 --- a/examples/multimodel_example/jobs.bat +++ /dev/null @@ -1,157 +0,0 @@ -@ echo off -COLOR E0 -:: version 0.0.0 - -:: Create folder structure. By default, the folder 'output' is used to store output. -mkdir output -mkdir output\simulation -mkdir output\tables -mkdir output\tables\metrics -mkdir output\tables\time_to_discovery -mkdir output\figures - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: Smid_2020 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -:: Create output folder -mkdir output\simulation\Smid_2020\ -mkdir output\simulation\Smid_2020\metrics - -:: Collect descriptives about the dataset Smid_2020 -mkdir output\simulation\Smid_2020\descriptives -python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant - -:: Simulate runs -mkdir output\simulation\Smid_2020\state_files - -:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_doc2vec_0.json - -:: Classifier = logistic, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_sbert_0.json - -:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_tfidf_0.json - - -:: Skipped nb + doc2vec model - - -:: Skipped nb + sbert model - -:: Classifier = nb, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_nb_tfidf_0.json - -:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_doc2vec_0.json - -:: Classifier = rf, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_sbert_0.json - -:: Classifier = rf, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_tfidf_0.json - -:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_doc2vec_0.json - -:: Classifier = svm, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_sbert_0.json - -:: Classifier = svm, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_tfidf_0.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv -python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv - -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: van_de_Schoot_2018 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: - -:: Create output folder -mkdir output\simulation\van_de_Schoot_2018\ -mkdir output\simulation\van_de_Schoot_2018\metrics - -:: Collect descriptives about the dataset van_de_Schoot_2018 -mkdir output\simulation\van_de_Schoot_2018\descriptives -python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json - -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant - -:: Simulate runs -mkdir output\simulation\van_de_Schoot_2018\state_files - -:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json - -:: Classifier = logistic, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json - -:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json - - -:: Skipped nb + doc2vec model - - -:: Skipped nb + sbert model - -:: Classifier = nb, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json - -:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json - -:: Classifier = rf, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_sbert_0.json - -:: Classifier = rf, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json - -:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json - -:: Classifier = svm, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_sbert_0.json - -:: Classifier = svm, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json - -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv -python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv - -:: Merge descriptives and metrics -python scripts\merge_descriptives.py -python scripts\merge_metrics.py diff --git a/examples/multimodel_example/jobs.sh b/examples/multimodel_example/jobs.sh index 8c78726f..9e6c1894 100644 --- a/examples/multimodel_example/jobs.sh +++ b/examples/multimodel_example/jobs.sh @@ -1,157 +1,156 @@ -@ echo off -COLOR E0 -:: version 0.0.0 -:: Create folder structure. By default, the folder 'output' is used to store output. +# version 0.0.0 + +# Create folder structure. By default, the folder 'output' is used to store output. mkdir output -mkdir output\simulation -mkdir output\tables -mkdir output\tables\metrics -mkdir output\tables\time_to_discovery -mkdir output\figures +mkdir output/simulation +mkdir output/tables +mkdir output/tables/metrics +mkdir output/tables/time_to_discovery +mkdir output/figures -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: Smid_2020 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +################################## +### DATASET: Smid_2020 +################################## -:: Create output folder -mkdir output\simulation\Smid_2020\ -mkdir output\simulation\Smid_2020\metrics +# Create output folder +mkdir output/simulation/Smid_2020/ +mkdir output/simulation/Smid_2020/metrics -:: Collect descriptives about the dataset Smid_2020 -mkdir output\simulation\Smid_2020\descriptives -python -m asreview data describe data\Smid_2020.csv -o output\simulation\Smid_2020\descriptives\data_stats_Smid_2020.json +# Collect descriptives about the dataset Smid_2020 +mkdir output/simulation/Smid_2020/descriptives +asreview data describe data/Smid_2020.csv -o output/simulation/Smid_2020/descriptives/data_stats_Smid_2020.json -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_Smid_2020.png --width 800 --height 500 -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\Smid_2020.csv -o output\figures\wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant +# Generate wordcloud visualizations of all datasets +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_Smid_2020.png --width 800 --height 500 +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_relevant_Smid_2020.png --width 800 --height 500 --relevant +asreview wordcloud data/Smid_2020.csv -o output/figures/wordcloud_irrelevant_Smid_2020.png --width 800 --height 500 --irrelevant -:: Simulate runs -mkdir output\simulation\Smid_2020\state_files +# Simulate runs +mkdir output/simulation/Smid_2020/state_files -:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_doc2vec_0.json +# Classifier = logistic, Feature extractor = doc2vec , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_doc2vec_0.json -:: Classifier = logistic, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_sbert_0.json +# Classifier = logistic, Feature extractor = sbert , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_sbert_0.json -:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_logistic_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_logistic_tfidf_0.json +# Classifier = logistic, Feature extractor = tfidf , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_logistic_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_logistic_tfidf_0.json -:: Skipped nb + doc2vec model +# Skipped nb + doc2vec model -:: Skipped nb + sbert model +# Skipped nb + sbert model -:: Classifier = nb, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_nb_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_nb_tfidf_0.json +# Classifier = nb, Feature extractor = tfidf , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_nb_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_nb_tfidf_0.json -:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_doc2vec_0.json +# Classifier = rf, Feature extractor = doc2vec , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_doc2vec_0.json -:: Classifier = rf, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_sbert_0.json +# Classifier = rf, Feature extractor = sbert , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_sbert_0.json -:: Classifier = rf, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_rf_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_rf_tfidf_0.json +# Classifier = rf, Feature extractor = tfidf , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_rf_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_rf_tfidf_0.json -:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_doc2vec_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_doc2vec_0.json +# Classifier = svm, Feature extractor = doc2vec , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_doc2vec_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_doc2vec_0.json -:: Classifier = svm, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_sbert_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_sbert_0.json +# Classifier = svm, Feature extractor = sbert , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_sbert_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_sbert_0.json -:: Classifier = svm, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\Smid_2020.csv -s output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\Smid_2020\state_files\sim_Smid_2020_svm_tfidf_0.asreview -o output\simulation\Smid_2020\metrics\metrics_sim_Smid_2020_svm_tfidf_0.json +# Classifier = svm, Feature extractor = tfidf , Query strategy = max +asreview simulate data/Smid_2020.csv -s output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 165 +asreview metrics output/simulation/Smid_2020/state_files/sim_Smid_2020_svm_tfidf_0.asreview -o output/simulation/Smid_2020/metrics/metrics_sim_Smid_2020_svm_tfidf_0.json -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\Smid_2020\state_files\ -o output\figures\plot_recall_sim_Smid_2020.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\Smid_2020\metrics\ -o output\tables\metrics\metrics_sim_Smid_2020.csv -python scripts\merge_tds.py -s output\simulation\Smid_2020\metrics\ -o output\tables\time_to_discovery\tds_sim_Smid_2020.csv +# Generate plot and tables for dataset +python scripts/get_plot.py -s output/simulation/Smid_2020/state_files/ -o output/figures/plot_recall_sim_Smid_2020.png --show_legend model +python scripts/merge_metrics.py -s output/simulation/Smid_2020/metrics/ -o output/tables/metrics/metrics_sim_Smid_2020.csv +python scripts/merge_tds.py -s output/simulation/Smid_2020/metrics/ -o output/tables/time_to_discovery/tds_sim_Smid_2020.csv -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -:::::: DATASET: van_de_Schoot_2018 -:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: +################################## +### DATASET: van_de_Schoot_2018 +################################## -:: Create output folder -mkdir output\simulation\van_de_Schoot_2018\ -mkdir output\simulation\van_de_Schoot_2018\metrics +# Create output folder +mkdir output/simulation/van_de_Schoot_2018/ +mkdir output/simulation/van_de_Schoot_2018/metrics -:: Collect descriptives about the dataset van_de_Schoot_2018 -mkdir output\simulation\van_de_Schoot_2018\descriptives -python -m asreview data describe data\van_de_Schoot_2018.csv -o output\simulation\van_de_Schoot_2018\descriptives\data_stats_van_de_Schoot_2018.json +# Collect descriptives about the dataset van_de_Schoot_2018 +mkdir output/simulation/van_de_Schoot_2018/descriptives +asreview data describe data/van_de_Schoot_2018.csv -o output/simulation/van_de_Schoot_2018/descriptives/data_stats_van_de_Schoot_2018.json -:: Generate wordcloud visualizations of all datasets -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_van_de_Schoot_2018.png --width 800 --height 500 -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant -python -m asreview wordcloud data\van_de_Schoot_2018.csv -o output\figures\wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant +# Generate wordcloud visualizations of all datasets +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_van_de_Schoot_2018.png --width 800 --height 500 +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_relevant_van_de_Schoot_2018.png --width 800 --height 500 --relevant +asreview wordcloud data/van_de_Schoot_2018.csv -o output/figures/wordcloud_irrelevant_van_de_Schoot_2018.png --width 800 --height 500 --irrelevant -:: Simulate runs -mkdir output\simulation\van_de_Schoot_2018\state_files +# Simulate runs +mkdir output/simulation/van_de_Schoot_2018/state_files -:: Classifier = logistic, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json +# Classifier = logistic, Feature extractor = doc2vec , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview --model logistic --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_doc2vec_0.json -:: Classifier = logistic, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json +# Classifier = logistic, Feature extractor = sbert , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_0.asreview --model logistic --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_sbert_0.json -:: Classifier = logistic, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json +# Classifier = logistic, Feature extractor = tfidf , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_0.asreview --model logistic --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_logistic_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_logistic_tfidf_0.json -:: Skipped nb + doc2vec model +# Skipped nb + doc2vec model -:: Skipped nb + sbert model +# Skipped nb + sbert model -:: Classifier = nb, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json +# Classifier = nb, Feature extractor = tfidf , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_0.asreview --model nb --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_nb_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_nb_tfidf_0.json -:: Classifier = rf, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json +# Classifier = rf, Feature extractor = doc2vec , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_0.asreview --model rf --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_doc2vec_0.json -:: Classifier = rf, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_sbert_0.json +# Classifier = rf, Feature extractor = sbert , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_0.asreview --model rf --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_sbert_0.json -:: Classifier = rf, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json +# Classifier = rf, Feature extractor = tfidf , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_0.asreview --model rf --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_rf_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_rf_tfidf_0.json -:: Classifier = svm, Feature extractor = doc2vec , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json +# Classifier = svm, Feature extractor = doc2vec , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_doc2vec_0.asreview --model svm --query_strategy max --feature_extraction doc2vec --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_doc2vec_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_doc2vec_0.json -:: Classifier = svm, Feature extractor = sbert , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_sbert_0.json +# Classifier = svm, Feature extractor = sbert , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_sbert_0.asreview --model svm --query_strategy max --feature_extraction sbert --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_sbert_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_sbert_0.json -:: Classifier = svm, Feature extractor = tfidf , Query strategy = max -python -m asreview simulate data\van_de_Schoot_2018.csv -s output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 -q max -b double --n_instances 1 --stop_if min -python -m asreview metrics output\simulation\van_de_Schoot_2018\state_files\sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output\simulation\van_de_Schoot_2018\metrics\metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json +# Classifier = svm, Feature extractor = tfidf , Query strategy = max +asreview simulate data/van_de_Schoot_2018.csv -s output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_tfidf_0.asreview --model svm --query_strategy max --feature_extraction tfidf --init_seed 535 --seed 166 +asreview metrics output/simulation/van_de_Schoot_2018/state_files/sim_van_de_Schoot_2018_svm_tfidf_0.asreview -o output/simulation/van_de_Schoot_2018/metrics/metrics_sim_van_de_Schoot_2018_svm_tfidf_0.json -:: Generate plot and tables for dataset -python scripts\get_plot.py -s output\simulation\van_de_Schoot_2018\state_files\ -o output\figures\plot_recall_sim_van_de_Schoot_2018.png --show_legend model -python scripts\merge_metrics.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\metrics\metrics_sim_van_de_Schoot_2018.csv -python scripts\merge_tds.py -s output\simulation\van_de_Schoot_2018\metrics\ -o output\tables\time_to_discovery\tds_sim_van_de_Schoot_2018.csv +# Generate plot and tables for dataset +python scripts/get_plot.py -s output/simulation/van_de_Schoot_2018/state_files/ -o output/figures/plot_recall_sim_van_de_Schoot_2018.png --show_legend model +python scripts/merge_metrics.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/metrics/metrics_sim_van_de_Schoot_2018.csv +python scripts/merge_tds.py -s output/simulation/van_de_Schoot_2018/metrics/ -o output/tables/time_to_discovery/tds_sim_van_de_Schoot_2018.csv -:: Merge descriptives and metrics -python scripts\merge_descriptives.py -python scripts\merge_metrics.py +# Merge descriptives and metrics +python scripts/merge_descriptives.py -s output/simulation/*/descriptives/ -o output/tables/data_descriptives_all.csv +python scripts/merge_metrics.py -s output/simulation/*/metrics/ -o output/tables/metrics_sim_all.csv diff --git a/examples/multimodel_example/scripts/get_plot.py b/examples/multimodel_example/scripts/get_plot.py index 64d2f8db..7d29468d 100644 --- a/examples/multimodel_example/scripts/get_plot.py +++ b/examples/multimodel_example/scripts/get_plot.py @@ -20,72 +20,58 @@ import argparse from pathlib import Path +import matplotlib.colors as mcolors import matplotlib.pyplot as plt from asreview import open_state from asreviewcontrib.insights.plot import plot_recall -def _set_legend(ax, state, legend_option, label_to_line, state_file): - metadata = state.settings_metadata - label = None - - if legend_option == "filename": - label = state_file.stem - elif legend_option == "model": - label = " - ".join( - [ - metadata["settings"]["model"], - metadata["settings"]["feature_extraction"], - metadata["settings"]["balance_strategy"], - metadata["settings"]["query_strategy"], - ] - ) - elif legend_option == "classifier": - label = metadata["settings"]["model"] - else: - try: - label = metadata["settings"][legend_option] - except KeyError as err: - raise ValueError(f"Invalid legend setting: '{legend_option}'") from err # noqa: E501 - - if label: - # add label to line - if label not in label_to_line: - ax.lines[-2].set_label(label) - label_to_line[label] = ax.lines[-2] - # set color of line to the color of the first line with the same label - else: - ax.lines[-2].set_color(label_to_line[label].get_color()) - ax.lines[-2].set_label("_no_legend_") - - def get_plot_from_states(states, filename, legend=None): - """Generate an ASReview plot from state files. - - Arguments - --------- - states: list - List of state files. - filename: str - Filename of the plot. - legend: str - Add a legend to the plot, based on the given parameter. - Possible values: "filename", "model", "feature_extraction", - "balance_strategy", "query_strategy", "classifier". - """ - states = sorted(states) + """Generate an ASReview plot from state files.""" + fig, ax = plt.subplots() - label_to_line = {} + + labels = [] + colors = list(mcolors.TABLEAU_COLORS.values()) for state_file in states: with open_state(state_file) as state: + # draw the plot plot_recall(ax, state) - if legend: - _set_legend(ax, state, legend, label_to_line, state_file) - if legend: - ax.legend(loc=4, prop={"size": 8}) + # set the label + if legend == "filename": + ax.lines[-2].set_label(state_file.stem) + ax.legend(loc=4, prop={"size": 8}) + elif legend: + metadata = state.settings_metadata + + if legend == "model": + label = " - ".join( + [ + metadata["settings"]["model"], + metadata["settings"]["feature_extraction"], + metadata["settings"]["balance_strategy"], + metadata["settings"]["query_strategy"], + ] + ) + elif legend == "classifier": + label = metadata["settings"]["model"] + else: + try: + label = metadata["settings"][legend] + except KeyError as exc: + raise ValueError( + f"Legend setting '{legend}' " + "not found in state file settings." + ) from exc + if label not in labels: + ax.lines[-2].set_label(label) + labels.append(label) + ax.lines[-2].set_color(colors[labels.index(label) % len(colors)]) + ax.legend(loc=4, prop={"size": 8}) + fig.savefig(str(filename)) @@ -104,10 +90,10 @@ def get_plot_from_states(states, filename, legend=None): args = parser.parse_args() # load states - states = list(Path(args.s).glob("*.asreview")) + states = Path(args.s).glob("*.asreview") # check if states are found - if len(states) == 0: + if len(list(states)) == 0: raise FileNotFoundError(f"No state files found in {args.s}") # generate plot and save results diff --git a/examples/multimodel_example/scripts/merge_metrics.py b/examples/multimodel_example/scripts/merge_metrics.py index d8ed971b..aa031461 100644 --- a/examples/multimodel_example/scripts/merge_metrics.py +++ b/examples/multimodel_example/scripts/merge_metrics.py @@ -55,10 +55,7 @@ def create_table_state_metrics(metric_files): description="Merge metrics of multiple states into single table." ) parser.add_argument( - "-s", - type=str, - default="output/simulation/*/metrics/", - help="states location", + "-s", type=str, default="output/simulation/*/metrics/", help="states location" ) parser.add_argument( "-o", diff --git a/examples/multimodel_example/scripts/merge_tds.py b/examples/multimodel_example/scripts/merge_tds.py index b705ed01..1beb52c6 100644 --- a/examples/multimodel_example/scripts/merge_tds.py +++ b/examples/multimodel_example/scripts/merge_tds.py @@ -24,7 +24,6 @@ import argparse import glob import json -from math import nan from pathlib import Path import pandas as pd @@ -38,7 +37,7 @@ def create_table_state_tds(metrics): with open(metric) as f: i = next(filter(lambda x: x["id"] == "td", json.load(f)["data"]["items"]))[ "value" - ] + ] # noqa values.extend((item[0], item[1], file_counter) for item in i) file_counter += 1 @@ -48,26 +47,25 @@ def create_table_state_tds(metrics): columns="metric_file", values="td", aggfunc="first", - fill_value=nan, + fill_value=0, ) pivoted.columns = [f"td_sim_{col}" for col in pivoted.columns] return pivoted -def get_atd_values(df): - df["record_atd"] = df.mean(axis=1) - - df.loc["average_simulation_TD"] = df.iloc[:, :-1].mean(axis=0) - - return df - - if __name__ == "__main__": parser = argparse.ArgumentParser( description="Merge tds of multiple metrics into single table." ) - parser.add_argument("-s", type=str, required=True, help="metrics location") - parser.add_argument("-o", type=str, required=True, help="Output table location") + parser.add_argument( + "-s", type=str, default="output/simulation/*/metrics/", help="metrics location" + ) + parser.add_argument( + "-o", + type=str, + default="output/tables/tds_sim_all.csv", + help="Output table location", + ) args = parser.parse_args() # load metric files @@ -77,14 +75,9 @@ def get_atd_values(df): if len(metric_files) == 0: raise FileNotFoundError("No metrics found in " + args.s) - # check if output file has .csv extension - if Path(args.o).suffix != ".csv": - raise ValueError("Output file should have .csv extension") - - td_table = create_table_state_tds(metric_files) - atd_table = get_atd_values(td_table) + states_table = create_table_state_tds(metric_files) # store table Path(args.o).parent.mkdir(parents=True, exist_ok=True) - atd_table.to_csv(Path(args.o)) - atd_table.to_excel(Path(args.o).with_suffix(".xlsx")) + states_table.to_csv(Path(args.o)) + states_table.to_excel(Path(args.o).with_suffix(".xlsx")) From a82338b6d352bc6b816af608cba44be50ae706cd Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 13:21:23 +0200 Subject: [PATCH 65/95] rename jobs --- .github/workflows/ci-workflow.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 4311a8a7..d8b821af 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -9,9 +9,6 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - with: - python-version: '3.x' - architecture: 'x64' - name: Install makita run: | pip install . @@ -62,7 +59,7 @@ jobs: - name: Lint python with ruff run: | ruff check . - - name: Run basic template + - name: Execute basic template jobsfile if: ${{ matrix.os == 'ubuntu-latest' }} run: | pip install asreview-datatools asreview-insights synergy-dataset From c21f05b1df32b1be0e88ff3e9f9e3c8e58ad44c8 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 13:22:31 +0200 Subject: [PATCH 66/95] Update names in workflow --- .github/workflows/ci-workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index d8b821af..7bb5663c 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -27,7 +27,7 @@ jobs: cp ../.github/workflows/test_data/labels.csv basic/data-test/labels.csv cp ../.github/workflows/test_data/labels.csv arfi/data/labels.csv cp ../.github/workflows/test_data/labels.csv multimodel/data/labels.csv - - name: Test makita templates + - name: Render makita templates run: | cd tmp/basic asreview makita template basic | tee output.txt @@ -59,7 +59,7 @@ jobs: - name: Lint python with ruff run: | ruff check . - - name: Execute basic template jobsfile + - name: Execute basic template jobs file if: ${{ matrix.os == 'ubuntu-latest' }} run: | pip install asreview-datatools asreview-insights synergy-dataset From f41d64fa3f7209fff6b1b5bda175445102cce727 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 13:23:15 +0200 Subject: [PATCH 67/95] rename total files var --- asreviewcontrib/makita/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/asreviewcontrib/makita/utils.py b/asreviewcontrib/makita/utils.py index a21e67f8..d818413d 100644 --- a/asreviewcontrib/makita/utils.py +++ b/asreviewcontrib/makita/utils.py @@ -14,7 +14,7 @@ class FileHandler: def __init__(self, allow_overwrite=False): self.overwrite_all = allow_overwrite - self.total_files = 0 + self._total_files = 0 def add_file(self, content, export_fp): """ @@ -54,14 +54,14 @@ def allow_overwrite(): print(f"Created {export_fp}") - self.total_files += 1 + self._total_files += 1 def print_summary(self): """ Print the total number of files created by the FileHandler object. """ - print(f"\n{self.total_files} file(s) created.") + print(f"\n{self._total_files} file(s) created.") def render_file_from_template(self, name, file_type, **kwargs): """ From 21bab0ddcab53c7bda0cb5a84af2207910f6fa31 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 13:25:58 +0200 Subject: [PATCH 68/95] Add python version --- .github/workflows/ci-workflow.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 7bb5663c..6a7f1b9b 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -9,6 +9,9 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 + with: + python-version: '3.x' + architecture: 'x64' - name: Install makita run: | pip install . From e13fb4b77d7ad9097c456b11f9096bdfc30c41e9 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 13:47:52 +0200 Subject: [PATCH 69/95] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b29f869d..53c18a5b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3.12" ] license = {text = "MIT"} -dependencies = ["asreview", "jinja2", "cfgtemplater"] +dependencies = ["asreview>=1,<2", "jinja2", "cfgtemplater"] dynamic = ["version"] requires-python = ">=3.7" From e29a8e8eef6da620dc74e2f19341ec41c06d59e7 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:19:36 +0200 Subject: [PATCH 70/95] Fix arguments passing --- asreviewcontrib/makita/entrypoint.py | 204 +++++------------- asreviewcontrib/makita/template_arfi.py | 25 ++- asreviewcontrib/makita/template_base.py | 19 +- asreviewcontrib/makita/template_basic.py | 25 ++- asreviewcontrib/makita/template_multimodel.py | 34 +-- pyproject.toml | 5 + 6 files changed, 113 insertions(+), 199 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index ca2c6e99..0b66545a 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -2,14 +2,12 @@ import os from pathlib import Path +from asreview import config as ASREVIEW_CONFIG from asreview.entry_points import BaseEntryPoint +from asreview.utils import _entry_points from asreviewcontrib.makita import __version__ -from asreviewcontrib.makita.config import DEFAULTS from asreviewcontrib.makita.config import TEMPLATES_FP -from asreviewcontrib.makita.template_arfi import TemplateARFI -from asreviewcontrib.makita.template_basic import TemplateBasic -from asreviewcontrib.makita.template_multimodel import TemplateMultiModel from asreviewcontrib.makita.utils import FileHandler @@ -41,22 +39,22 @@ def execute(self, argv): # noqa: C901 "jobs.bat for Windows, otherwise jobs.sh.", ) parser_template.add_argument( - "-s", type=str, default=DEFAULTS["dataset_folder"], help="Dataset folder" + "-s", type=str, default="data", help="Dataset folder" ) parser_template.add_argument( - "-o", type=str, default=DEFAULTS["output_folder"], help="Output folder" + "-o", type=str, default="output", help="Output folder" ) parser_template.add_argument( "--init_seed", type=int, - default=DEFAULTS["init_seed"], - help="Seed of the priors. " f"{DEFAULTS['init_seed']} by default.", + default=535, + help="Seed of the priors. " "535 by default.", ) parser_template.add_argument( "--model_seed", type=int, - default=DEFAULTS["model_seed"], - help="Seed of the models. " f"{DEFAULTS['model_seed']} by default.", + default=165, + help="Seed of the models. " "165 by default.", ) parser_template.add_argument( "--template", type=str, help="Overwrite template with template file path." @@ -67,6 +65,17 @@ def execute(self, argv): # noqa: C901 help="Platform to run jobs: Windows, Darwin, Linux. " "Default: the system of rendering templates.", ) + parser_template.add_argument( + "--instances_per_query", + type=int, + default=ASREVIEW_CONFIG.DEFAULT_N_INSTANCES, + help="Number of instances per query. ", + ) + parser_template.add_argument( + "--stop_if", + type=str, + help="The number of label actions to simulate. ", + ) parser_template.add_argument( "--n_runs", type=int, @@ -75,7 +84,7 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--n_priors", type=int, - help="Number of priors. Only for template 'arfi'. " "Default: 10.", + help="Number of priors. Only for template 'arfi'.", ) parser_template.add_argument( "--no_wordclouds", @@ -105,23 +114,7 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--balance_strategy", type=str, - default=DEFAULTS["balance_strategy"], - help="Balance strategy to use. " - f"{DEFAULTS['balance_strategy']} by default.", - ) - parser_template.add_argument( - "--instances_per_query", - type=int, - default=DEFAULTS["instances_per_query"], - help="Number of instances per query. " - f"{DEFAULTS['instances_per_query']} by default.", - ) - parser_template.add_argument( - "--stop_if", - type=str, - default=DEFAULTS["stop_if"], - help="The number of label actions to simulate. " - f"{DEFAULTS['stop_if']} by default.", + help="Balance strategy to use. ", ) parser_template.add_argument( "--classifiers", @@ -141,7 +134,7 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--impossible_models", nargs="+", - help="Model combinations to exclude. Only for template 'multimodel'. ", + help="Model combinations to exclude. Only for template 'multimodel'.", ) parser_template.set_defaults(func=self._template_cli) @@ -156,7 +149,7 @@ def execute(self, argv): # noqa: C901 parser_script.add_argument( "-o", type=str, - default=DEFAULTS["scripts_folder"], + default="scripts", help="Location of the scripts folder.", ) parser_script.set_defaults(func=self._add_script_cli) @@ -177,20 +170,20 @@ def _template(self, args): # lowercase name args.name = args.name.lower() + # check if args.name is in _entry_points + if args.name not in _entry_points(group="asreview.makita.templates").names: + raise ValueError(f"Template {args.name} not found.") + # if a custom template is provided, check if it exists if args.template: fp_template = Path(args.template) if not fp_template.is_file(): raise ValueError(f"Custom template {args.template} not found") - else: - fp_template = None - - # print rendering message - if args.template: print( f"\033[33mRendering custom template {args.template} using {args.name}.\u001b[0m\n" # noqa: E501 ) else: + fp_template = None print(f"\033[33mRendering template {args.name}.\u001b[0m\n") # load datasets @@ -213,121 +206,32 @@ def _template(self, args): else: job_file = "jobs.sh" if args.job_file is None else args.job_file - # render jobs file - if args.name in TemplateBasic.template_name: - prohibited_args = [ - "classifiers", - "feature_extractors", - "query_strategies", - "impossible_models", - "n_priors", - ] - for arg in prohibited_args: - if getattr(args, arg): - raise ValueError( - f"Argument {arg} is not allowed for template {args.name}" - ) - - job = TemplateBasic( - datasets=datasets, - fp_template=fp_template, - output_folder=Path(args.o), - scripts_folder=Path(DEFAULTS["scripts_folder"]), - create_wordclouds=args.no_wordclouds, - allow_overwrite=args.overwrite, - n_runs=args.n_runs, - init_seed=args.init_seed, - model_seed=args.model_seed, - classifier=args.classifier, - feature_extractor=args.feature_extractor, - query_strategy=args.query_strategy, - balance_strategy=args.balance_strategy, - instances_per_query=args.instances_per_query, - stop_if=args.stop_if, - job_file=args.job_file, - ).render() - - elif args.name in TemplateARFI.template_name: - prohibited_args = [ - "n_runs", - "classifiers", - "feature_extractors", - "query_strategies", - "impossible_models", - ] - for arg in prohibited_args: - if getattr(args, arg): - raise ValueError( - f"Argument {arg} is not allowed for template {args.name}" - ) - - job = TemplateARFI( - datasets=datasets, - fp_template=fp_template, - output_folder=Path(args.o), - scripts_folder=Path(DEFAULTS["scripts_folder"]), - create_wordclouds=args.no_wordclouds, - allow_overwrite=args.overwrite, - n_priors=args.n_priors, - init_seed=args.init_seed, - model_seed=args.model_seed, - classifier=args.classifier, - feature_extractor=args.feature_extractor, - query_strategy=args.query_strategy, - balance_strategy=args.balance_strategy, - instances_per_query=args.instances_per_query, - stop_if=args.stop_if, - job_file=job_file, - ).render() - - elif args.name in TemplateMultiModel.template_name: - prohibited_args = [ - "classifier", - "feature_extractor", - "query_strategy", - "n_priors", - ] - for arg in prohibited_args: - if getattr(args, arg): - raise ValueError( - f"Argument {arg} is not allowed for template {args.name}" - ) - - job = TemplateMultiModel( - datasets=datasets, - fp_template=fp_template, - output_folder=Path(args.o), - scripts_folder=Path(DEFAULTS["scripts_folder"]), - create_wordclouds=args.no_wordclouds, - allow_overwrite=args.overwrite, - n_runs=args.n_runs, - init_seed=args.init_seed, - model_seed=args.model_seed, - all_classifiers=args.classifiers, - all_feature_extractors=args.feature_extractors, - all_query_strategies=args.query_strategies, - impossible_models=args.impossible_models, - balance_strategy=args.balance_strategy, - instances_per_query=args.instances_per_query, - stop_if=args.stop_if, - job_file=job_file, - ).render() - - else: - # Fallback to basic template - print(f"\u001b[31mERROR: \033[33mTemplate {args.name} not found.\u001b[0m\n") # noqa: E501 - print("\u001b[31mFallback: \033[33mUsing the basic template.\u001b[0m\n") - job = TemplateBasic( - datasets, - output_folder=Path(args.o), - create_wordclouds=args.no_wordclouds, - allow_overwrite=args.overwrite, - init_seed=args.init_seed, - model_seed=args.model_seed, - stop_if=args.stop_if, - fp_template=fp_template, - job_file=job_file, - ).render() + # load template + template = _entry_points(group="asreview.makita.templates")[args.name].load() + + job = template( + datasets=datasets, + fp_template=fp_template, + output_folder=Path(args.o), + scripts_folder=Path("scripts"), + create_wordclouds=args.no_wordclouds, + allow_overwrite=args.overwrite, + n_runs=args.n_runs, + n_priors=args.n_priors, + init_seed=args.init_seed, + model_seed=args.model_seed, + classifier=args.classifier, + feature_extractor=args.feature_extractor, + query_strategy=args.query_strategy, + balance_strategy=args.balance_strategy, + all_classifiers=args.classifiers, + all_feature_extractors=args.feature_extractors, + all_query_strategies=args.query_strategies, + impossible_models=args.impossible_models, + instances_per_query=args.instances_per_query, + stop_if=args.stop_if, + job_file=job_file, + ).render() # convert shell to batch if needed if job_file.endswith(".bat"): diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index a3126198..efe821ef 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -2,6 +2,7 @@ import numpy as np from asreview.data import ASReviewData +from asreview import config as ASREVIEW_CONFIG from asreviewcontrib.makita.template_base import TemplateBase @@ -18,15 +19,6 @@ def __init__( n_priors, **kwargs, ): - if classifier is None: - classifier = "nb" - if feature_extractor is None: - feature_extractor = "tfidf" - if query_strategy is None: - query_strategy = "max" - if n_priors is None: - n_priors = 10 - self.classifier = classifier self.feature_extractor = feature_extractor self.query_strategy = query_strategy @@ -37,8 +29,10 @@ def get_dynamic_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" + n_priors = self.n_priors if self.n_priors is not None else 10 + priors = _get_priors( - fp_dataset, init_seed=self.init_seed + index, n_priors=self.n_priors + fp_dataset, init_seed=self.init_seed + index, n_priors=n_priors ) return { "input_file": fp_dataset.as_posix(), @@ -51,12 +45,17 @@ def get_static_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" + # set default values if not provided + classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501 + feature_extractor = self.feature_extractor if self.feature_extractor is None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 + query_strategy = self.query_strategy if self.query_strategy is None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 + return { "datasets": params, "create_wordclouds": self.create_wordclouds, - "classifier": self.classifier, - "feature_extractor": self.feature_extractor, - "query_strategy": self.query_strategy, + "classifier": classifier, + "feature_extractor": feature_extractor, + "query_strategy": query_strategy, "balance_strategy": self.balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 5b625ff1..dedf6b21 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -10,7 +10,8 @@ class TemplateBase: - template_file = None + template_name = [] + template_file = "" def __init__( self, @@ -26,6 +27,7 @@ def __init__( instances_per_query, stop_if, job_file, + **kwargs ): self.datasets = datasets self.output_folder = output_folder @@ -44,6 +46,12 @@ def __init__( fp_template if fp_template is not None else self.get_template_file() ) # noqa: E501 + for param in kwargs: + if kwargs[param] is not None: + # print value of param + print(f"{param} = {kwargs[param]}") + raise ValueError(f"{param} should not be set for this template.") + def get_template_file(self): return Path(TEMPLATES_FP, self.template_file) @@ -111,17 +119,16 @@ def render(self): fp_dataset = Path(fp_dataset) params.append(self.get_dynamic_params(i, fp_dataset)) - # render template try: rendered_output = self.template.render(self.get_static_params(params)) - except Exception as e: - if str(e) == "'StrictUndefined' object cannot be interpreted as an integer": + except TypeError as e: + if "'StrictUndefined' object cannot be interpreted as an integer" in str(e): print("\033[31mERROR: A rendering exception occurred -", e) print( - "The rendering process failed due to undefined parameters in the template." # noqa: E501 + "The rendering process failed due to an attempt to use an undefined variable where an integer was expected." # noqa: E501 ) print( - "\033[33mPlease verify that the chosen template is compatible with the selected custom template.\033[0m" # noqa: E501 + "\033[33mPlease check your template for variables that are not properly defined or passed in.\033[0m" # noqa: E501 ) exit(1) else: diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index d956c46d..ee61f3cb 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -1,5 +1,7 @@ """Render basic template.""" +from asreview import config as ASREVIEW_CONFIG + from asreviewcontrib.makita.template_base import TemplateBase @@ -15,15 +17,6 @@ def __init__( n_runs, **kwargs, ): - if classifier is None: - classifier = "nb" - if feature_extractor is None: - feature_extractor = "tfidf" - if query_strategy is None: - query_strategy = "max" - if n_runs is None: - n_runs = 1 - self.classifier = classifier self.feature_extractor = feature_extractor self.query_strategy = query_strategy @@ -45,17 +38,23 @@ def get_static_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" + # set default values if not provided + classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501 + feature_extractor = self.feature_extractor if self.feature_extractor is None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 + query_strategy = self.query_strategy if self.query_strategy is None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 + n_runs = self.n_runs if self.n_runs is not None else 1 + return { + "classifier": classifier, + "feature_extractor": feature_extractor, + "query_strategy": query_strategy, + "n_runs": n_runs, "datasets": params, "create_wordclouds": self.create_wordclouds, - "classifier": self.classifier, - "feature_extractor": self.feature_extractor, - "query_strategy": self.query_strategy, "balance_strategy": self.balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, - "n_runs": self.n_runs, "scripts_folder": self.scripts_folder, "version": self.__version__, } diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 39eece35..9982ab86 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -1,5 +1,7 @@ """Render multimodel template.""" +from asreview import config as ASREVIEW_CONFIG + from asreviewcontrib.makita.template_base import TemplateBase @@ -9,29 +11,19 @@ class TemplateMultiModel(TemplateBase): def __init__( self, - n_runs, all_classifiers, all_feature_extractors, all_query_strategies, impossible_models, + n_runs, **kwargs, ): - if n_runs is None: - n_runs = 1 - if all_classifiers is None: - all_classifiers = ["logistic", "nb", "rf"] - if all_feature_extractors is None: - all_feature_extractors = ["doc2vec", "sbert", "tfidf"] - if all_query_strategies is None: - all_query_strategies = ["max"] - if impossible_models is None: - impossible_models = ["nb,doc2vec", "nb,sbert"] - self.n_runs = n_runs self.all_classifiers = all_classifiers self.all_feature_extractors = all_feature_extractors self.all_query_strategies = all_query_strategies self.impossible_models = impossible_models + super().__init__(**kwargs) def get_dynamic_params(self, index, fp_dataset): @@ -49,6 +41,14 @@ def get_static_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" + all_classifiers = self.all_classifiers if self.all_classifiers is not None else ["logistic", "nb", "rf"] # noqa: E501 + all_feature_extractors = self.all_feature_extractors if self.all_feature_extractors is not None else ["doc2vec", "sbert", "tfidf"] # noqa: E501 + all_query_strategies = self.all_query_strategies if self.all_query_strategies is not None else [ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY] # noqa: E501 + impossible_models = self.impossible_models if self.impossible_models is not None else ["nb,doc2vec", "nb,sbert"] # noqa: E501 + n_runs = self.n_runs if self.n_runs is not None else 1 + + impossible_models = [i.split(",") for i in impossible_models] + return { "datasets": params, "create_wordclouds": self.create_wordclouds, @@ -56,11 +56,11 @@ def get_static_params(self, params): "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, - "n_runs": self.n_runs, + "n_runs": n_runs, "scripts_folder": self.scripts_folder, "version": self.__version__, - "all_classifiers": self.all_classifiers, - "all_feature_extractors": self.all_feature_extractors, - "all_query_strategies": self.all_query_strategies, - "impossible_models": [i.split(",") for i in self.impossible_models], + "all_classifiers": all_classifiers, + "all_feature_extractors": all_feature_extractors, + "all_query_strategies": all_query_strategies, + "impossible_models": impossible_models, } diff --git a/pyproject.toml b/pyproject.toml index 53c18a5b..bc518d89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,11 @@ repository = "https://github.com/asreview/asreview-makita" [project.entry-points."asreview.entry_points"] makita = "asreviewcontrib.makita.entrypoint:MakitaEntryPoint" +[project.entry-points."asreview.makita.templates"] +basic = "asreviewcontrib.makita.template_basic:TemplateBasic" +arfi = "asreviewcontrib.makita.template_arfi:TemplateARFI" +multimodel = "asreviewcontrib.makita.template_multimodel:TemplateMultiModel" + [project.optional-dependencies] lint = ["ruff"] test = ["pytest"] From a02e587569141d22b06ee0a30d4f5a81be8c62b7 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:19:38 +0200 Subject: [PATCH 71/95] Update template_multimodel.txt.template --- .../makita/templates/template_multimodel.txt.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/templates/template_multimodel.txt.template b/asreviewcontrib/makita/templates/template_multimodel.txt.template index 0b503af8..8d8cd729 100644 --- a/asreviewcontrib/makita/templates/template_multimodel.txt.template +++ b/asreviewcontrib/makita/templates/template_multimodel.txt.template @@ -48,7 +48,7 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files {% for classifier in all_classifiers %} {% for feature_extraction in all_feature_extractors %} {% for query_strategy in all_query_strategies %} -{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}{{ temp.append(query_strategy)|default("", True) }} +{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }} {% if temp in impossible_models %} # Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model From afee598cdcd38785d4f6fd5a23ce01cd8562e15b Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:21:27 +0200 Subject: [PATCH 72/95] Clean up impossible models --- asreviewcontrib/makita/template_multimodel.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 9982ab86..54492e9c 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -44,11 +44,9 @@ def get_static_params(self, params): all_classifiers = self.all_classifiers if self.all_classifiers is not None else ["logistic", "nb", "rf"] # noqa: E501 all_feature_extractors = self.all_feature_extractors if self.all_feature_extractors is not None else ["doc2vec", "sbert", "tfidf"] # noqa: E501 all_query_strategies = self.all_query_strategies if self.all_query_strategies is not None else [ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY] # noqa: E501 - impossible_models = self.impossible_models if self.impossible_models is not None else ["nb,doc2vec", "nb,sbert"] # noqa: E501 + impossible_models = [i.split(",") for i in self.impossible_models] if self.impossible_models is not None else [['nb', 'doc2vec'], ['nb', 'sbert']] # noqa: E501 n_runs = self.n_runs if self.n_runs is not None else 1 - impossible_models = [i.split(",") for i in impossible_models] - return { "datasets": params, "create_wordclouds": self.create_wordclouds, From df915e791577ddfc91251f8d51bacb2c24612500 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:23:31 +0200 Subject: [PATCH 73/95] Organize imports for arfi --- asreviewcontrib/makita/template_arfi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index efe821ef..e0386f89 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -1,8 +1,8 @@ """Render ARFI template.""" import numpy as np -from asreview.data import ASReviewData from asreview import config as ASREVIEW_CONFIG +from asreview.data import ASReviewData from asreviewcontrib.makita.template_base import TemplateBase From 4d3a2ef53c028bc6086b58c25cae68d5a8496fde Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:27:08 +0200 Subject: [PATCH 74/95] add default min --- asreviewcontrib/makita/entrypoint.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 0b66545a..3c73eb5a 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -74,6 +74,7 @@ def execute(self, argv): # noqa: C901 parser_template.add_argument( "--stop_if", type=str, + default="min", help="The number of label actions to simulate. ", ) parser_template.add_argument( From 4e507a954078f35a0560ab7de18ff9cda8ebaf36 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:33:56 +0200 Subject: [PATCH 75/95] not not --- asreviewcontrib/makita/template_arfi.py | 4 ++-- asreviewcontrib/makita/template_basic.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index e0386f89..cf0f17d1 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -47,8 +47,8 @@ def get_static_params(self, params): # set default values if not provided classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501 - feature_extractor = self.feature_extractor if self.feature_extractor is None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 - query_strategy = self.query_strategy if self.query_strategy is None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 + feature_extractor = self.feature_extractor if self.feature_extractor is not None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 + query_strategy = self.query_strategy if self.query_strategy is not None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 return { "datasets": params, diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index ee61f3cb..f87791f1 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -40,8 +40,8 @@ def get_static_params(self, params): # set default values if not provided classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501 - feature_extractor = self.feature_extractor if self.feature_extractor is None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 - query_strategy = self.query_strategy if self.query_strategy is None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 + feature_extractor = self.feature_extractor if self.feature_extractor is not None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 + query_strategy = self.query_strategy if self.query_strategy is not None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 n_runs = self.n_runs if self.n_runs is not None else 1 return { From 470eae386c2ad5ab4bb2e22d1ef956c9671d0255 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 11 Apr 2024 17:39:47 +0200 Subject: [PATCH 76/95] set defaults --- asreviewcontrib/makita/template_arfi.py | 4 ++-- asreviewcontrib/makita/template_basic.py | 4 ++-- asreviewcontrib/makita/template_multimodel.py | 4 ++-- pyproject.toml | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index cf0f17d1..327cc93e 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -8,7 +8,6 @@ class TemplateARFI(TemplateBase): - template_name = ["arfi"] template_file = "template_arfi.txt.template" def __init__( @@ -49,6 +48,7 @@ def get_static_params(self, params): classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501 feature_extractor = self.feature_extractor if self.feature_extractor is not None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 query_strategy = self.query_strategy if self.query_strategy is not None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 + balance_strategy = self.balance_strategy if self.balance_strategy is not None else ASREVIEW_CONFIG.DEFAULT_BALANCE_STRATEGY # noqa: E501 return { "datasets": params, @@ -56,7 +56,7 @@ def get_static_params(self, params): "classifier": classifier, "feature_extractor": feature_extractor, "query_strategy": query_strategy, - "balance_strategy": self.balance_strategy, + "balance_strategy": balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "init_seed": self.init_seed, diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index f87791f1..58790159 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -6,7 +6,6 @@ class TemplateBasic(TemplateBase): - template_name = ["basic"] template_file = "template_basic.txt.template" def __init__( @@ -42,16 +41,17 @@ def get_static_params(self, params): classifier = self.classifier if self.classifier is not None else ASREVIEW_CONFIG.DEFAULT_MODEL # noqa: E501 feature_extractor = self.feature_extractor if self.feature_extractor is not None else ASREVIEW_CONFIG.DEFAULT_FEATURE_EXTRACTION # noqa: E501 query_strategy = self.query_strategy if self.query_strategy is not None else ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY # noqa: E501 + balance_strategy = self.balance_strategy if self.balance_strategy is not None else ASREVIEW_CONFIG.DEFAULT_BALANCE_STRATEGY # noqa: E501 n_runs = self.n_runs if self.n_runs is not None else 1 return { "classifier": classifier, "feature_extractor": feature_extractor, "query_strategy": query_strategy, + "balance_strategy": balance_strategy, "n_runs": n_runs, "datasets": params, "create_wordclouds": self.create_wordclouds, - "balance_strategy": self.balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 54492e9c..7ef6437f 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -6,7 +6,6 @@ class TemplateMultiModel(TemplateBase): - template_name = ["multimodel", "multiple_models", "modelmatrix"] template_file = "template_multimodel.txt.template" def __init__( @@ -45,12 +44,13 @@ def get_static_params(self, params): all_feature_extractors = self.all_feature_extractors if self.all_feature_extractors is not None else ["doc2vec", "sbert", "tfidf"] # noqa: E501 all_query_strategies = self.all_query_strategies if self.all_query_strategies is not None else [ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY] # noqa: E501 impossible_models = [i.split(",") for i in self.impossible_models] if self.impossible_models is not None else [['nb', 'doc2vec'], ['nb', 'sbert']] # noqa: E501 + balance_strategy = self.balance_strategy if self.balance_strategy is not None else ASREVIEW_CONFIG.DEFAULT_BALANCE_STRATEGY # noqa: E501 n_runs = self.n_runs if self.n_runs is not None else 1 return { "datasets": params, "create_wordclouds": self.create_wordclouds, - "balance_strategy": self.balance_strategy, + "balance_strategy": balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, diff --git a/pyproject.toml b/pyproject.toml index bc518d89..6af6bad4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ makita = "asreviewcontrib.makita.entrypoint:MakitaEntryPoint" basic = "asreviewcontrib.makita.template_basic:TemplateBasic" arfi = "asreviewcontrib.makita.template_arfi:TemplateARFI" multimodel = "asreviewcontrib.makita.template_multimodel:TemplateMultiModel" +multiple_models = "asreviewcontrib.makita.template_multimodel:TemplateMultiModel" [project.optional-dependencies] lint = ["ruff"] From f00790bf6786af6cd2a09c5b8bb673cbb1bdb2fd Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 13:58:03 +0200 Subject: [PATCH 77/95] rename some arguments --- .github/workflows/ci-workflow.yml | 2 +- asreviewcontrib/makita/entrypoint.py | 23 ++++--------------- asreviewcontrib/makita/template_arfi.py | 2 +- asreviewcontrib/makita/template_base.py | 10 ++++---- asreviewcontrib/makita/template_basic.py | 2 +- asreviewcontrib/makita/template_multimodel.py | 14 +++++------ .../makita/templates/doc_README.md.template | 2 +- .../templates/template_arfi.txt.template | 2 +- .../templates/template_basic.txt.template | 2 +- .../template_multimodel.txt.template | 2 +- 10 files changed, 24 insertions(+), 37 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 6a7f1b9b..9c4d6cd5 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -34,7 +34,7 @@ jobs: run: | cd tmp/basic asreview makita template basic | tee output.txt - asreview makita template basic --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 -s data-test -o output-test --init_seed 1 --model_seed 2 --no_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output.txt + asreview makita template basic --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 -s data-test -o output-test --init_seed 1 --model_seed 2 --skip_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi asreview makita template arfi | tee output.txt diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 3c73eb5a..9c90a5c1 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -88,8 +88,8 @@ def execute(self, argv): # noqa: C901 help="Number of priors. Only for template 'arfi'.", ) parser_template.add_argument( - "--no_wordclouds", - action="store_false", + "--skip_wordclouds", + action="store_true", help="Disables the generation of wordclouds. ", ) parser_template.add_argument( @@ -210,28 +210,15 @@ def _template(self, args): # load template template = _entry_points(group="asreview.makita.templates")[args.name].load() + print(vars(args)) + job = template( datasets=datasets, fp_template=fp_template, output_folder=Path(args.o), scripts_folder=Path("scripts"), - create_wordclouds=args.no_wordclouds, - allow_overwrite=args.overwrite, - n_runs=args.n_runs, - n_priors=args.n_priors, - init_seed=args.init_seed, - model_seed=args.model_seed, - classifier=args.classifier, - feature_extractor=args.feature_extractor, - query_strategy=args.query_strategy, - balance_strategy=args.balance_strategy, - all_classifiers=args.classifiers, - all_feature_extractors=args.feature_extractors, - all_query_strategies=args.query_strategies, - impossible_models=args.impossible_models, - instances_per_query=args.instances_per_query, - stop_if=args.stop_if, job_file=job_file, + **vars(args), ).render() # convert shell to batch if needed diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 327cc93e..6f03f2d0 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -52,7 +52,7 @@ def get_static_params(self, params): return { "datasets": params, - "create_wordclouds": self.create_wordclouds, + "skip_wordclouds": self.skip_wordclouds, "classifier": classifier, "feature_extractor": feature_extractor, "query_strategy": query_strategy, diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index dedf6b21..46255a36 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -19,8 +19,8 @@ def __init__( fp_template, output_folder, scripts_folder, - create_wordclouds, - allow_overwrite, + skip_wordclouds, + overwrite, init_seed, model_seed, balance_strategy, @@ -32,14 +32,14 @@ def __init__( self.datasets = datasets self.output_folder = output_folder self.scripts_folder = scripts_folder - self.create_wordclouds = create_wordclouds + self.skip_wordclouds = skip_wordclouds self.init_seed = init_seed self.model_seed = model_seed self.balance_strategy = balance_strategy self.instances_per_query = instances_per_query self.stop_if = stop_if self.job_file = job_file - self.file_handler = FileHandler(allow_overwrite) + self.file_handler = FileHandler(overwrite) self.__version__ = __version__ self.template = ConfigTemplate( @@ -92,7 +92,7 @@ def render_docs(self, docs: list): template_name=self.template.name, template_name_long=self.template.name_long, template_scripts=self.template.scripts, - create_wordclouds=self.create_wordclouds, + skip_wordclouds=self.skip_wordclouds, output_folder=self.output_folder, job_file=self.job_file, ) diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 58790159..40a19163 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -51,7 +51,7 @@ def get_static_params(self, params): "balance_strategy": balance_strategy, "n_runs": n_runs, "datasets": params, - "create_wordclouds": self.create_wordclouds, + "skip_wordclouds": self.skip_wordclouds, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, "output_folder": self.output_folder, diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 7ef6437f..0dd0ec73 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -10,17 +10,17 @@ class TemplateMultiModel(TemplateBase): def __init__( self, - all_classifiers, - all_feature_extractors, - all_query_strategies, + classifiers, + feature_extractors, + query_strategies, impossible_models, n_runs, **kwargs, ): self.n_runs = n_runs - self.all_classifiers = all_classifiers - self.all_feature_extractors = all_feature_extractors - self.all_query_strategies = all_query_strategies + self.all_classifiers = classifiers + self.all_feature_extractors = feature_extractors + self.all_query_strategies = query_strategies self.impossible_models = impossible_models super().__init__(**kwargs) @@ -49,7 +49,7 @@ def get_static_params(self, params): return { "datasets": params, - "create_wordclouds": self.create_wordclouds, + "skip_wordclouds": self.skip_wordclouds, "balance_strategy": balance_strategy, "instances_per_query": self.instances_per_query, "stop_if": self.stop_if, diff --git a/asreviewcontrib/makita/templates/doc_README.md.template b/asreviewcontrib/makita/templates/doc_README.md.template index e5658404..869b1826 100644 --- a/asreviewcontrib/makita/templates/doc_README.md.template +++ b/asreviewcontrib/makita/templates/doc_README.md.template @@ -13,7 +13,7 @@ This project depends on Python 3.7 or later (python.org/download), and [ASReview ```sh pip install asreview>=1.0 asreview-insights>=1.1.2 asreview-datatools ``` -{% if create_wordclouds %} +{% if not skip_wordclouds %} For generating wordclouds, install the following dependencies. ```sh diff --git a/asreviewcontrib/makita/templates/template_arfi.txt.template b/asreviewcontrib/makita/templates/template_arfi.txt.template index 182ee5ad..99a7bead 100644 --- a/asreviewcontrib/makita/templates/template_arfi.txt.template +++ b/asreviewcontrib/makita/templates/template_arfi.txt.template @@ -36,7 +36,7 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics # Collect descriptives about the dataset mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/descriptives python -m asreview data describe {{ dataset.input_file }} -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/descriptives/data_stats_{{ dataset.input_file_stem }}.json -{% if create_wordclouds %} +{% if not skip_wordclouds %} # Generate wordcloud visualizations of all datasets python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/figures/wordcloud_{{ dataset.input_file_stem }}.png --width 800 --height 500 diff --git a/asreviewcontrib/makita/templates/template_basic.txt.template b/asreviewcontrib/makita/templates/template_basic.txt.template index 8f18d916..734e072f 100644 --- a/asreviewcontrib/makita/templates/template_basic.txt.template +++ b/asreviewcontrib/makita/templates/template_basic.txt.template @@ -38,7 +38,7 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics # Collect descriptives about the dataset {{ dataset.input_file_stem }} mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/descriptives python -m asreview data describe {{ dataset.input_file }} -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/descriptives/data_stats_{{ dataset.input_file_stem }}.json -{% if create_wordclouds %} +{% if not skip_wordclouds %} # Generate wordcloud visualizations of all datasets python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/figures/wordcloud_{{ dataset.input_file_stem }}.png --width 800 --height 500 diff --git a/asreviewcontrib/makita/templates/template_multimodel.txt.template b/asreviewcontrib/makita/templates/template_multimodel.txt.template index 8d8cd729..6955792d 100644 --- a/asreviewcontrib/makita/templates/template_multimodel.txt.template +++ b/asreviewcontrib/makita/templates/template_multimodel.txt.template @@ -35,7 +35,7 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics # Collect descriptives about the dataset {{ dataset.input_file_stem }} mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/descriptives python -m asreview data describe {{ dataset.input_file }} -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/descriptives/data_stats_{{ dataset.input_file_stem }}.json -{% if create_wordclouds %} +{% if not skip_wordclouds %} # Generate wordcloud visualizations of all datasets python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/figures/wordcloud_{{ dataset.input_file_stem }}.png --width 800 --height 500 From 83f472e67abacd95585deb8010d24278a674b6f3 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:15:37 +0200 Subject: [PATCH 78/95] Update args passing --- asreviewcontrib/makita/entrypoint.py | 40 ++++++++++++++++++------- asreviewcontrib/makita/template_base.py | 1 - 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 9c90a5c1..3ec4917b 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -202,35 +202,55 @@ def _template(self, args): Path(args.o).parent.mkdir(parents=True, exist_ok=True) # get job file - if args.platform == "Windows" or (args.platform is None and os.name == "nt"): - job_file = "jobs.bat" if args.job_file is None else args.job_file - else: - job_file = "jobs.sh" if args.job_file is None else args.job_file + if args.job_file is None: + if args.platform == "Windows" or ( + args.platform is None and os.name == "nt" + ): # noqa: E501 + args.job_file = "jobs.bat" + else: + args.job_file = "jobs.sh" # load template template = _entry_points(group="asreview.makita.templates")[args.name].load() - print(vars(args)) + keys_of_interest = [ + "skip_wordclouds", + "overwrite", + "n_runs", + "n_priors", + "init_seed", + "model_seed", + "classifier", + "feature_extractor", + "query_strategy", + "balance_strategy", + "classifiers", + "feature_extractors", + "query_strategies", + "impossible_models", + "instances_per_query", + "stop_if", + "job_file", + ] job = template( datasets=datasets, fp_template=fp_template, output_folder=Path(args.o), scripts_folder=Path("scripts"), - job_file=job_file, - **vars(args), + **{key: vars(args)[key] for key in keys_of_interest if key in vars(args)}, ).render() # convert shell to batch if needed - if job_file.endswith(".bat"): + if args.job_file.endswith(".bat"): job = f"@ echo off\nCOLOR E0{job}" job = job.replace("#", "::") job = job.replace("/", "\\") # store result in output folder - with open(job_file, "w") as f: + with open(args.job_file, "w") as f: f.write(job) - print(f"Rendered template {args.name} and saved to {job_file}") + print(f"Rendered template {args.name} and saved to {args.job_file}") def _add_script_cli(self, args): try: diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 46255a36..0f5aa02a 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -10,7 +10,6 @@ class TemplateBase: - template_name = [] template_file = "" def __init__( From 1d59c15efe79b56f7c54a47eab82d2ff35a6dfe7 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:16:14 +0200 Subject: [PATCH 79/95] Update config.py --- asreviewcontrib/makita/config.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/asreviewcontrib/makita/config.py b/asreviewcontrib/makita/config.py index 515a10c4..ef1335b3 100644 --- a/asreviewcontrib/makita/config.py +++ b/asreviewcontrib/makita/config.py @@ -1,14 +1,3 @@ from pathlib import Path TEMPLATES_FP = Path(Path(__file__).parent, "templates") - -DEFAULTS = { - "dataset_folder": "data", - "output_folder": "output", - "scripts_folder": "scripts", - "init_seed": 535, - "model_seed": 165, - "balance_strategy": "double", - "instances_per_query": 1, - "stop_if": "min", -} From 8919c164f19d82daa16e21bec5f826f05e373f92 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:20:53 +0200 Subject: [PATCH 80/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 9c4d6cd5..8f1b114a 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -69,6 +69,6 @@ jobs: mkdir -p tmp/synergy/data cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l - asreview makita template basic --instances_per_query 100 --no_wordclouds --overwrite --n_runs 2 + asreview makita template basic --instances_per_query 100 --skip_wordclouds --overwrite --n_runs 2 sh jobs.sh scitree From b930c591468bd72d7a1eecfaef4e1cbc6f399d4a Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:24:07 +0200 Subject: [PATCH 81/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 8f1b114a..d04e85ab 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -4,13 +4,14 @@ jobs: test-template-and-lint: strategy: matrix: - os: [macos-latest, windows-latest, ubuntu-latest] + os: [windows-latest, ubuntu-latest] + python-version: ['3.8', '3.12'] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.x' + python-version: ${{ matrix.python-version }} architecture: 'x64' - name: Install makita run: | From 8c44764ed6da3737354b8b9e5ba51f4ea2cf97e6 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:34:27 +0200 Subject: [PATCH 82/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 58 ++++++++++++------------------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index d04e85ab..06a2565d 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -13,28 +13,31 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: 'x64' - - name: Install makita + - name: Lint python with ruff + run: | + ruff check . + - name: Cache Python packages + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - name: Install dependencies run: | - pip install . - - name: Install ruff + pip install . ruff scitree asreview-datatools asreview-insights synergy-dataset + - name: Set up directories run: | - pip install ruff + mkdir -p ./tmp/basic/data-test ./tmp/arfi/data ./tmp/multimodel/data ./tmp/scripts ./tmp/synergy/data - name: set up environment run: | - mkdir tmp - cd tmp - mkdir -p basic/data - mkdir -p basic/data-test - mkdir -p arfi/data - mkdir -p multimodel/data - cp ../.github/workflows/test_data/labels.csv basic/data/labels.csv - cp ../.github/workflows/test_data/labels.csv basic/data-test/labels.csv - cp ../.github/workflows/test_data/labels.csv arfi/data/labels.csv - cp ../.github/workflows/test_data/labels.csv multimodel/data/labels.csv + cp .github/workflows/test_data/labels.csv ./tmp/basic/data/labels.csv + cp .github/workflows/test_data/labels.csv ./tmp/basic/data-test/labels.csv + cp .github/workflows/test_data/labels.csv ./tmp/arfi/data/labels.csv + cp .github/workflows/test_data/labels.csv ./tmp/multimodel/data/labels.csv - name: Render makita templates run: | cd tmp/basic - asreview makita template basic | tee output.txt asreview makita template basic --classifier nb --feature_extractor tfidf --query_strategy max --n_runs 1 -s data-test -o output-test --init_seed 1 --model_seed 2 --skip_wordclouds --overwrite --instances_per_query 2 --stop_if min --balance_strategy double | tee output.txt grep -q "ERROR" output.txt && exit 1 || true cd ../arfi @@ -43,31 +46,16 @@ jobs: cd ../multimodel asreview makita template multimodel | tee output.txt grep -q "ERROR" output.txt && exit 1 || true - cd .. - - name: Run scitree on output - if: ${{ matrix.os == 'ubuntu-latest' }} - run: | - pip install scitree - cd tmp - scitree - - name: Run ShellCheck - if: ${{ matrix.os != 'windows-latest' }} - uses: ludeeus/action-shellcheck@master - with: - scandir: './tmp' - env: - SHELLCHECK_OPTS: -e SC2148 - - name: Generate makita scripts + - name: Render makita templates run: | - asreview makita add-script --all - - name: Lint python with ruff + asreview makita add-script --all -o ./tmp/scripts | tee output.txt + grep -q "ERROR" output.txt && exit 1 || true + - name: Run SciTree run: | - ruff check . + scitree - name: Execute basic template jobs file if: ${{ matrix.os == 'ubuntu-latest' }} run: | - pip install asreview-datatools asreview-insights synergy-dataset - mkdir -p tmp/synergy/data cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l asreview makita template basic --instances_per_query 100 --skip_wordclouds --overwrite --n_runs 2 From 78c11817deda38832b9fc4bf138fc33fac9a0516 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:35:18 +0200 Subject: [PATCH 83/95] lint after installing linter --- .github/workflows/ci-workflow.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 06a2565d..965a8134 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -13,9 +13,6 @@ jobs: with: python-version: ${{ matrix.python-version }} architecture: 'x64' - - name: Lint python with ruff - run: | - ruff check . - name: Cache Python packages uses: actions/cache@v3 with: @@ -26,6 +23,9 @@ jobs: - name: Install dependencies run: | pip install . ruff scitree asreview-datatools asreview-insights synergy-dataset + - name: Lint python with ruff + run: | + ruff check . - name: Set up directories run: | mkdir -p ./tmp/basic/data-test ./tmp/arfi/data ./tmp/multimodel/data ./tmp/scripts ./tmp/synergy/data From 1596b6da3819b76230537d55ae39300e997d6ad4 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:39:13 +0200 Subject: [PATCH 84/95] Update ci-workflow.yml --- .github/workflows/ci-workflow.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 965a8134..0a9b0b9e 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -31,7 +31,6 @@ jobs: mkdir -p ./tmp/basic/data-test ./tmp/arfi/data ./tmp/multimodel/data ./tmp/scripts ./tmp/synergy/data - name: set up environment run: | - cp .github/workflows/test_data/labels.csv ./tmp/basic/data/labels.csv cp .github/workflows/test_data/labels.csv ./tmp/basic/data-test/labels.csv cp .github/workflows/test_data/labels.csv ./tmp/arfi/data/labels.csv cp .github/workflows/test_data/labels.csv ./tmp/multimodel/data/labels.csv From c713f99d50408803c7441707f2706ca06a5c3d7d Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:44:21 +0200 Subject: [PATCH 85/95] os agnostic directories --- .github/workflows/ci-workflow.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 0a9b0b9e..7dee9b0d 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -26,9 +26,8 @@ jobs: - name: Lint python with ruff run: | ruff check . - - name: Set up directories - run: | - mkdir -p ./tmp/basic/data-test ./tmp/arfi/data ./tmp/multimodel/data ./tmp/scripts ./tmp/synergy/data + - name: Create directories using Python + run: python -c "import os; [os.makedirs(path, exist_ok=True) for path in ['./tmp/basic/data-test', './tmp/arfi/data', './tmp/multimodel/data', './tmp/scripts', './tmp/synergy/data']]" - name: set up environment run: | cp .github/workflows/test_data/labels.csv ./tmp/basic/data-test/labels.csv @@ -51,6 +50,7 @@ jobs: grep -q "ERROR" output.txt && exit 1 || true - name: Run SciTree run: | + cd ./tmp/ scitree - name: Execute basic template jobs file if: ${{ matrix.os == 'ubuntu-latest' }} From a1ee44134c31790cddf7751807849d2a859f2956 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:45:46 +0200 Subject: [PATCH 86/95] rename step in workflow --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 7dee9b0d..42d78fda 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -44,7 +44,7 @@ jobs: cd ../multimodel asreview makita template multimodel | tee output.txt grep -q "ERROR" output.txt && exit 1 || true - - name: Render makita templates + - name: Render makita scripts run: | asreview makita add-script --all -o ./tmp/scripts | tee output.txt grep -q "ERROR" output.txt && exit 1 || true From bb0fa0f41636f82f78eb08ef83a84f021f82c5e6 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:51:08 +0200 Subject: [PATCH 87/95] Scitree error windows --- .github/workflows/ci-workflow.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 42d78fda..7cd1cdb9 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -49,6 +49,7 @@ jobs: asreview makita add-script --all -o ./tmp/scripts | tee output.txt grep -q "ERROR" output.txt && exit 1 || true - name: Run SciTree + if: ${{ matrix.os != 'windows-latest' }} run: | cd ./tmp/ scitree From bc629b1fcede0bd8e4f9da84e188c8cf4f88e8a9 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:56:47 +0200 Subject: [PATCH 88/95] Update pip cache for windows --- .github/workflows/ci-workflow.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 7cd1cdb9..34242bff 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -14,12 +14,17 @@ jobs: python-version: ${{ matrix.python-version }} architecture: 'x64' - name: Cache Python packages - uses: actions/cache@v3 + uses: actions/cache@v4 with: - path: ~/.cache/pip + path: | + ${{ runner.os == 'Windows' && 'D:\\a\\_temp\\pip\\Cache' || '~/.cache/pip' }} key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- + - name: Print pip cache directory + run: echo "Pip cache directory: $env:LOCALAPPDATA\pip\Cache" + shell: pwsh + if: runner.os == 'Windows' - name: Install dependencies run: | pip install . ruff scitree asreview-datatools asreview-insights synergy-dataset @@ -54,7 +59,7 @@ jobs: cd ./tmp/ scitree - name: Execute basic template jobs file - if: ${{ matrix.os == 'ubuntu-latest' }} + if: ${{ matrix.os != 'windows-latest' }} run: | cd tmp/synergy synergy_dataset get -d van_de_Schoot_2018 -o ./data -l From af2c247b329027b3a61263cc14ff10c449d9c993 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 14:58:52 +0200 Subject: [PATCH 89/95] Windows workflow workaround --- .github/workflows/ci-workflow.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 34242bff..3b3776fd 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -21,10 +21,6 @@ jobs: key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- - - name: Print pip cache directory - run: echo "Pip cache directory: $env:LOCALAPPDATA\pip\Cache" - shell: pwsh - if: runner.os == 'Windows' - name: Install dependencies run: | pip install . ruff scitree asreview-datatools asreview-insights synergy-dataset From 2b90a2ada105ba6820f7220f252e5453aa4886e2 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 15:03:17 +0200 Subject: [PATCH 90/95] update windows cache --- .github/workflows/ci-workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 3b3776fd..fe0dce24 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -17,7 +17,7 @@ jobs: uses: actions/cache@v4 with: path: | - ${{ runner.os == 'Windows' && 'D:\\a\\_temp\\pip\\Cache' || '~/.cache/pip' }} + ${{ runner.os == 'Windows' && 'C:\users\runneradmin\appdata\local\pip\cache' || '~/.cache/pip' }} key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} restore-keys: | ${{ runner.os }}-pip- From 6de93e925e4669513aa7b4991013a7f057c8446d Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 15:22:08 +0200 Subject: [PATCH 91/95] Update doc_README.md.template --- asreviewcontrib/makita/templates/doc_README.md.template | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/asreviewcontrib/makita/templates/doc_README.md.template b/asreviewcontrib/makita/templates/doc_README.md.template index 869b1826..5569cb6c 100644 --- a/asreviewcontrib/makita/templates/doc_README.md.template +++ b/asreviewcontrib/makita/templates/doc_README.md.template @@ -70,6 +70,6 @@ The following files are found in this project: | └── 📜metrics_summary.xlsx └── 📂figures{% for dataset in datasets %} ├── 📈plot_recall_{{ dataset.stem }}.png{% endfor %}{% for dataset in datasets %} - ├── 📈wordcloud_{{ dataset.stem }}.png +{% if not skip_wordclouds %} ├── 📈wordcloud_{{ dataset.stem }}.png ├── 📈wordcloud_relevant_{{ dataset.stem }}.png - └── 📈wordcloud_irrelevant_{{ dataset.stem }}.png{% endfor %}{%endif %} \ No newline at end of file + └── 📈wordcloud_irrelevant_{{ dataset.stem }}.png{% endfor %}{%endif %}{%endif %} \ No newline at end of file From 2f34b3698e9d02af4cee250f62b59b74c9839b19 Mon Sep 17 00:00:00 2001 From: JT Date: Fri, 12 Apr 2024 15:22:36 +0200 Subject: [PATCH 92/95] Update doc_README.md.template --- asreviewcontrib/makita/templates/doc_README.md.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asreviewcontrib/makita/templates/doc_README.md.template b/asreviewcontrib/makita/templates/doc_README.md.template index 5569cb6c..2dc3e989 100644 --- a/asreviewcontrib/makita/templates/doc_README.md.template +++ b/asreviewcontrib/makita/templates/doc_README.md.template @@ -72,4 +72,4 @@ The following files are found in this project: ├── 📈plot_recall_{{ dataset.stem }}.png{% endfor %}{% for dataset in datasets %} {% if not skip_wordclouds %} ├── 📈wordcloud_{{ dataset.stem }}.png ├── 📈wordcloud_relevant_{{ dataset.stem }}.png - └── 📈wordcloud_irrelevant_{{ dataset.stem }}.png{% endfor %}{%endif %}{%endif %} \ No newline at end of file + └── 📈wordcloud_irrelevant_{{ dataset.stem }}.png{%endif %}{% endfor %}{%endif %} \ No newline at end of file From 5bc77114ddb8dd461683626a74bb2200c42519be Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 18 Apr 2024 14:35:59 +0200 Subject: [PATCH 93/95] rename to balance_strategies --- README.md | 4 ++-- asreviewcontrib/makita/entrypoint.py | 4 ++-- asreviewcontrib/makita/template_multimodel.py | 8 ++++---- .../makita/templates/template_multimodel.txt.template | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a43a94ad..2ea17c3c 100644 --- a/README.md +++ b/README.md @@ -183,13 +183,13 @@ optional arguments: --classifiers CLASSIFIERS Classifiers to use Default: ['logistic', 'nb', 'rf', 'svm'] --feature_extractors FEATURE_EXTRACTOR Feature extractors to use Default: ['doc2vec', 'sbert', 'tfidf'] --query_strategies QUERY_STRATEGY Query strategies to use Default: ['max'] - --balancing_strategies BALANCE_STRATEGY Balance strategies to use Default: ['double'] + --balance_strategies BALANCE_STRATEGY Balance strategies to use Default: ['double'] --impossible_models IMPOSSIBLE_MODELS Model combinations to exclude Default: ['nb,doc2vec', 'nb,sbert'] ``` If you want to specify certain combinations of classifiers and feature extractors that should and should not be used, you can use the `--classifiers`, -`--feature_extractors`, `--query_strategies`, `--balancing_strategies` and `--impossible_models` option. For instance, if you +`--feature_extractors`, `--query_strategies`, `--balance_strategies` and `--impossible_models` option. For instance, if you want to exclude the combinations of `nb` with `doc2vec` and `logistic` with `tfidf`, use the following command: diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 54b131f0..7c463756 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -133,7 +133,7 @@ def execute(self, argv): # noqa: C901 help="Query strategies to use. Only for template 'multimodel'. ", ) parser_template.add_argument( - "--balancing_strategies", + "--balance_strategies", nargs="+", help="Balancing strategies to use. Only for template 'multimodel'. ", ) @@ -232,7 +232,7 @@ def _template(self, args): "classifiers", "feature_extractors", "query_strategies", - "balancing_strategies", + "balance_strategies", "impossible_models", "instances_per_query", "stop_if", diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 4ce7761d..e1d606a8 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -13,7 +13,7 @@ def __init__( classifiers, feature_extractors, query_strategies, - balancing_strategies, + balance_strategies, impossible_models, n_runs, **kwargs, @@ -22,7 +22,7 @@ def __init__( self.all_classifiers = classifiers self.all_feature_extractors = feature_extractors self.all_query_strategies = query_strategies - self.all_balancing_strategies = balancing_strategies + self.all_balance_strategies = balance_strategies self.impossible_models = impossible_models super().__init__(**kwargs) @@ -45,7 +45,7 @@ def get_static_params(self, params): all_classifiers = self.all_classifiers if self.all_classifiers is not None else ["logistic", "nb", "rf"] # noqa: E501 all_feature_extractors = self.all_feature_extractors if self.all_feature_extractors is not None else ["doc2vec", "sbert", "tfidf"] # noqa: E501 all_query_strategies = self.all_query_strategies if self.all_query_strategies is not None else [ASREVIEW_CONFIG.DEFAULT_QUERY_STRATEGY] # noqa: E501 - all_balancing_strategies = self.all_balancing_strategies if self.all_balancing_strategies is not None else [ASREVIEW_CONFIG.DEFAULT_BALANCE_STRATEGY] # noqa: E501 + all_balance_strategies = self.all_balance_strategies if self.all_balance_strategies is not None else [ASREVIEW_CONFIG.DEFAULT_BALANCE_STRATEGY] # noqa: E501 impossible_models = [i.split(",") for i in self.impossible_models] if self.impossible_models is not None else [['nb', 'doc2vec'], ['nb', 'sbert']] # noqa: E501 n_runs = self.n_runs if self.n_runs is not None else 1 @@ -61,6 +61,6 @@ def get_static_params(self, params): "all_classifiers": all_classifiers, "all_feature_extractors": all_feature_extractors, "all_query_strategies": all_query_strategies, - "all_balancing_strategies": all_balancing_strategies, + "all_balance_strategies": all_balance_strategies, "impossible_models": impossible_models, } diff --git a/asreviewcontrib/makita/templates/template_multimodel.txt.template b/asreviewcontrib/makita/templates/template_multimodel.txt.template index 4bbd5e6e..f0b8a949 100644 --- a/asreviewcontrib/makita/templates/template_multimodel.txt.template +++ b/asreviewcontrib/makita/templates/template_multimodel.txt.template @@ -48,7 +48,7 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files {% for classifier in all_classifiers %} {% for feature_extraction in all_feature_extractors %} {% for query_strategy in all_query_strategies %} -{% for balance_strategy in all_balancing_strategies %} +{% for balance_strategy in all_balance_strategies %} {% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }} {% if temp in impossible_models %} From 2955ef43b5ab007c2fde56a5ddaa6d5ef85b2d36 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 18 Apr 2024 15:01:16 +0200 Subject: [PATCH 94/95] Rename template functions --- asreviewcontrib/makita/entrypoint.py | 4 ++-- asreviewcontrib/makita/template_arfi.py | 4 ++-- asreviewcontrib/makita/template_base.py | 8 ++++---- asreviewcontrib/makita/template_basic.py | 4 ++-- asreviewcontrib/makita/template_multimodel.py | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 7c463756..4b8dd9e0 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -48,13 +48,13 @@ def execute(self, argv): # noqa: C901 "--init_seed", type=int, default=535, - help="Seed of the priors. " "535 by default.", + help="Seed of the priors. 535 by default.", ) parser_template.add_argument( "--model_seed", type=int, default=165, - help="Seed of the models. " "165 by default.", + help="Seed of the models. 165 by default.", ) parser_template.add_argument( "--template", type=str, help="Overwrite template with template file path." diff --git a/asreviewcontrib/makita/template_arfi.py b/asreviewcontrib/makita/template_arfi.py index 6f03f2d0..800a453d 100644 --- a/asreviewcontrib/makita/template_arfi.py +++ b/asreviewcontrib/makita/template_arfi.py @@ -24,7 +24,7 @@ def __init__( self.n_priors = n_priors super().__init__(**kwargs) - def get_dynamic_params(self, index, fp_dataset): + def get_dataset_specific_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" @@ -40,7 +40,7 @@ def get_dynamic_params(self, index, fp_dataset): "model_seed": self.model_seed + index, } - def get_static_params(self, params): + def get_template_specific_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 0f5aa02a..7b5c0f2e 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -54,7 +54,7 @@ def __init__( def get_template_file(self): return Path(TEMPLATES_FP, self.template_file) - def get_dynamic_params(self, index, fp_dataset): + def get_dataset_specific_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" @@ -62,7 +62,7 @@ def get_dynamic_params(self, index, fp_dataset): "Subclasses should implement this method to prepare dataset-specific parameters." # noqa: E501 ) - def get_static_params(self, params): + def get_template_specific_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" @@ -116,10 +116,10 @@ def render(self): f"Dataset filename '{fp_dataset}' cannot contain whitespace." ) # noqa fp_dataset = Path(fp_dataset) - params.append(self.get_dynamic_params(i, fp_dataset)) + params.append(self.get_dataset_specific_params(i, fp_dataset)) try: - rendered_output = self.template.render(self.get_static_params(params)) + rendered_output = self.template.render(self.get_template_specific_params(params)) except TypeError as e: if "'StrictUndefined' object cannot be interpreted as an integer" in str(e): print("\033[31mERROR: A rendering exception occurred -", e) diff --git a/asreviewcontrib/makita/template_basic.py b/asreviewcontrib/makita/template_basic.py index 40a19163..e1edb76d 100644 --- a/asreviewcontrib/makita/template_basic.py +++ b/asreviewcontrib/makita/template_basic.py @@ -22,7 +22,7 @@ def __init__( self.n_runs = n_runs super().__init__(**kwargs) - def get_dynamic_params(self, index, fp_dataset): + def get_dataset_specific_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" @@ -33,7 +33,7 @@ def get_dynamic_params(self, index, fp_dataset): "init_seed": self.init_seed, } - def get_static_params(self, params): + def get_template_specific_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index e1d606a8..60ce53a4 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -27,7 +27,7 @@ def __init__( super().__init__(**kwargs) - def get_dynamic_params(self, index, fp_dataset): + def get_dataset_specific_params(self, index, fp_dataset): """Prepare dataset-specific parameters. These parameters are provided to the template once for each dataset.""" @@ -38,7 +38,7 @@ def get_dynamic_params(self, index, fp_dataset): "init_seed": self.init_seed, } - def get_static_params(self, params): + def get_template_specific_params(self, params): """Prepare template-specific parameters. These parameters are provided to the template only once.""" From 8ad435d863632e2a53b6236f1bda550336175607 Mon Sep 17 00:00:00 2001 From: JT Date: Thu, 18 Apr 2024 15:02:46 +0200 Subject: [PATCH 95/95] Ruff formatter --- asreviewcontrib/makita/template_base.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/asreviewcontrib/makita/template_base.py b/asreviewcontrib/makita/template_base.py index 7b5c0f2e..c0fa83d9 100644 --- a/asreviewcontrib/makita/template_base.py +++ b/asreviewcontrib/makita/template_base.py @@ -26,7 +26,7 @@ def __init__( instances_per_query, stop_if, job_file, - **kwargs + **kwargs, ): self.datasets = datasets self.output_folder = output_folder @@ -119,7 +119,9 @@ def render(self): params.append(self.get_dataset_specific_params(i, fp_dataset)) try: - rendered_output = self.template.render(self.get_template_specific_params(params)) + rendered_output = self.template.render( + self.get_template_specific_params(params) + ) except TypeError as e: if "'StrictUndefined' object cannot be interpreted as an integer" in str(e): print("\033[31mERROR: A rendering exception occurred -", e)