From bc88fe89c5a62d8c95ed8657cbb449fd69dd670d Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Thu, 18 Jul 2024 09:40:46 -0500 Subject: [PATCH 1/4] use XDG paths for configuration data and caching Support using [XDG ver 0.8](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) for project data. Specifically support: | ENV VAR | DEFAULT | |------------------|--------------------| | $XDG_DATA_HOME | $HOME/.local/share | | $XDG_CONFIG_HOME | $HOME/.config | | $XDG_CACHE_HOME | $HOME/.cache | Project name `garak` is appended to each location. This is represents the followina breaking changes to project expecations: * report_prefix passed either at the command line or as config file option * set filename values only * no longer overrides report_dir * report_dir passed as a config file option * when provided as a relative path will be prepend with `/garak` * provided as an absolute path will be used as the output directory * default `user/site` configuration file `garak.site.yaml` has moved * previously `/garak.site.yaml` * updated location `/garak/garak.site.yaml` Additional changes (not considered breaking changes): * nltk data is placed in /garak if not already found in the environment * visual_jailbreak downloaded artifacts are placed in /garak/resources * generated data for beast/gcg/tap are placed in /garak/resources Signed-off-by: Jeffrey Martin --- garak/_config.py | 16 ++- garak/_plugins.py | 7 +- garak/command.py | 33 ++++--- garak/evaluators/base.py | 12 ++- garak/interactive.py | 2 +- garak/probes/tap.py | 3 +- garak/probes/visual_jailbreak.py | 6 +- garak/resources/autodan/autodan.py | 15 +-- garak/resources/autodan/genetic.py | 29 +++++- garak/resources/beast/beast_attack.py | 12 ++- garak/resources/common.py | 12 ++- garak/resources/gcg/generate_gcg.py | 15 +-- garak/resources/tap/tap_main.py | 8 +- pyproject.toml | 3 +- requirements.txt | 1 + tests/analyze/test_analyze.py | 15 ++- tests/buffs/test_buff_config.py | 24 +++-- tests/test_attempt.py | 12 ++- tests/test_config.py | 135 ++++++++++++++++++++++++-- tests/test_hitlog.py | 11 +-- 20 files changed, 293 insertions(+), 78 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index 10aa270fe..52ee57169 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -15,6 +15,11 @@ import pathlib from typing import List import yaml +from xdg_base_dirs import ( + xdg_cache_home, + xdg_config_home, + xdg_data_home, +) DICT_CONFIG_AFTER_LOAD = False @@ -26,6 +31,7 @@ run_params = "seed deprefix eval_threshold generations probe_tags interactive".split() plugins_params = "model_type model_name extended_detectors".split() reporting_params = "taxonomy report_prefix".split() +project_dir = "garak" loaded = False @@ -53,9 +59,17 @@ class TransientConfig(GarakSubConfig): args = None # only access this when determining what was passed on CLI run_id = None basedir = pathlib.Path(__file__).parents[0] + config_dir = xdg_config_home() / project_dir + data_dir = xdg_data_home() / project_dir + cache_dir = xdg_cache_home() / project_dir starttime = None starttime_iso = None + # initialize the user home and cache paths if they do not exist + config_dir.mkdir(mode=0o740, parents=True, exist_ok=True) + data_dir.mkdir(mode=0o740, parents=True, exist_ok=True) + cache_dir.mkdir(mode=0o740, parents=True, exist_ok=True) + transient = TransientConfig() @@ -151,7 +165,7 @@ def load_config( settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")] - fq_site_config_filename = str(transient.basedir / site_config_filename) + fq_site_config_filename = str(transient.config_dir / site_config_filename) if os.path.isfile(fq_site_config_filename): settings_files.append(fq_site_config_filename) else: diff --git a/garak/_plugins.py b/garak/_plugins.py index e2c7fe927..3875389f7 100644 --- a/garak/_plugins.py +++ b/garak/_plugins.py @@ -36,7 +36,9 @@ def default(self, obj): class PluginCache: _plugin_cache_file = _config.transient.basedir / "resources" / "plugin_cache.json" - _user_plugin_cache_file = _plugin_cache_file + _user_plugin_cache_file = ( + _config.transient.cache_dir / "resources" / "plugin_cache.json" + ) _plugin_cache_dict = None def __init__(self) -> None: @@ -55,6 +57,9 @@ def _load_plugin_cache(self): if not os.path.exists(self._plugin_cache_file): self._build_plugin_cache() if not os.path.exists(self._user_plugin_cache_file): + self._user_plugin_cache_file.parent.mkdir( + mode=0o740, parents=True, exist_ok=True + ) shutil.copy2(self._plugin_cache_file, self._user_plugin_cache_file) with open(self._user_plugin_cache_file, "r", encoding="utf-8") as cache_file: local_cache = json.load(cache_file) diff --git a/garak/command.py b/garak/command.py index 4a7007786..34d4b1559 100644 --- a/garak/command.py +++ b/garak/command.py @@ -8,8 +8,10 @@ def start_logging(): + from garak import _config + logging.basicConfig( - filename="garak.log", + filename=_config.transient.data_dir / "garak.log", level=logging.DEBUG, format="%(asctime)s %(levelname)s %(message)s", ) @@ -32,6 +34,7 @@ def start_run(): import os import uuid + from pathlib import Path from garak import _config logging.info("started at %s", _config.transient.starttime_iso) @@ -41,19 +44,25 @@ def start_run(): "⚠️ The current/default config is optimised for speed rather than thoroughness. Try e.g. --config full for a stronger test, or specify some probes." ) _config.transient.run_id = str(uuid.uuid4()) # uuid1 is safe but leaks host info + # why is report prefix a condition of placing file in the report_dir? + report_path = Path(_config.reporting.report_dir) + if not report_path.is_absolute(): + logging.debug("relative report dir provided") + report_path = _config.transient.data_dir / _config.reporting.report_dir + if not os.path.isdir(report_path): + try: + report_path.mkdir(mode=0o740, parents=True, exist_ok=True) + except PermissionError as e: + raise PermissionError( + f"Can't create logging directory {report_path}, quitting" + ) from e + + filename = f"garak.{_config.transient.run_id}.report.jsonl" if not _config.reporting.report_prefix: - if not os.path.isdir(_config.reporting.report_dir): - try: - os.mkdir(_config.reporting.report_dir) - except PermissionError as e: - raise PermissionError( - f"Can't create logging directory {_config.reporting.report_dir}, quitting" - ) from e - _config.transient.report_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.report.jsonl" + filename = f"garak.{_config.transient.run_id}.report.jsonl" else: - _config.transient.report_filename = ( - _config.reporting.report_prefix + ".report.jsonl" - ) + filename = _config.reporting.report_prefix + ".report.jsonl" + _config.transient.report_filename = str(report_path / filename) _config.transient.reportfile = open( _config.transient.report_filename, "w", buffering=1, encoding="utf-8" ) diff --git a/garak/evaluators/base.py b/garak/evaluators/base.py index 9e7d3b0a5..b57ca868b 100644 --- a/garak/evaluators/base.py +++ b/garak/evaluators/base.py @@ -5,6 +5,7 @@ import json import logging +from pathlib import Path from typing import Iterable from colorama import Fore, Style @@ -72,12 +73,19 @@ def evaluate(self, attempts: Iterable[garak.attempt.Attempt]) -> None: hitlog_mode = ( "w" if _config.transient.hitlogfile is None else "a" ) + # why is report prefix a condition of placing file in the report_dir? + hitlog_path = Path(_config.transient.report_filename).parent if not _config.reporting.report_prefix: - hitlog_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.hitlog.jsonl" + hitlog_filename = ( + hitlog_path + / f"garak.{_config.transient.run_id}.hitlog.jsonl" + ) # else: hitlog_filename = ( - _config.reporting.report_prefix + ".hitlog.jsonl" + hitlog_path + / f"{_config.reporting.report_prefix}.hitlog.jsonl" ) + logging.info("hit log in %s", hitlog_filename) _config.transient.hitlogfile = open( hitlog_filename, diff --git a/garak/interactive.py b/garak/interactive.py index 309c3d569..3049491ff 100644 --- a/garak/interactive.py +++ b/garak/interactive.py @@ -233,7 +233,7 @@ def settings_ns_provider(self) -> argparse.Namespace: def banner(): """Display a random banner""" with open( - _config.transient.basedir / "resources/banners", "r", encoding="utf-8" + _config.transient.basedir / "resources" / "banners", "r", encoding="utf-8" ) as bannerfile: banners = bannerfile.read().lstrip().split("\n\n") print(Fore.LIGHTRED_EX + f"\n{random.choice(banners)}\n" + Fore.RESET) diff --git a/garak/probes/tap.py b/garak/probes/tap.py index 4d22f0d07..e64c04e0e 100644 --- a/garak/probes/tap.py +++ b/garak/probes/tap.py @@ -37,6 +37,7 @@ """ import logging +from pathlib import Path from typing import List import tqdm @@ -68,7 +69,7 @@ class TAPCached(Probe): def __init__( self, - prompts_location: str = _config.transient.basedir + prompts_location: Path = _config.transient.basedir / "resources" / "tap" / "data" diff --git a/garak/probes/visual_jailbreak.py b/garak/probes/visual_jailbreak.py index 4afafd2fe..24d683407 100644 --- a/garak/probes/visual_jailbreak.py +++ b/garak/probes/visual_jailbreak.py @@ -49,7 +49,7 @@ class FigStep(Probe): def _load_SafeBench(self): safebench_data_dir = ( - _config.transient.basedir / "resources" / "visual_jailbreak" / "SafeBench" + _config.transient.cache_dir / "resources" / "visual_jailbreak" / "SafeBench" ) if not os.path.exists(safebench_data_dir): # make the dir @@ -94,7 +94,7 @@ def probe(self, generator): self.prompts = [ { "text": prompt["text"], - "image": str(_config.transient.basedir / prompt["image"]), + "image": str(_config.transient.cache_dir / prompt["image"]), } for prompt in self.prompts ] @@ -125,7 +125,7 @@ def probe(self, generator): self.prompts = [ { "text": prompt["text"], - "image": str(_config.transient.basedir / prompt["image"]), + "image": str(_config.transient.cache_dir / prompt["image"]), } for prompt in self.prompts ] diff --git a/garak/resources/autodan/autodan.py b/garak/resources/autodan/autodan.py index de82ef9a6..690df6c39 100644 --- a/garak/resources/autodan/autodan.py +++ b/garak/resources/autodan/autodan.py @@ -31,6 +31,9 @@ autodan_resource_data = ( garak._config.transient.basedir / "resources" / "autodan" / "data" ) +cached_autodan_resource_data = ( + garak._config.transient.cache_dir / "resources" / "autodan" / "data" +) autodan_parser = argparse.ArgumentParser(description="AutoDAN config") autodan_parser.add_argument( "--num_steps", type=int, default=100, help="Number of steps to run generation" @@ -88,9 +91,9 @@ def autodan_generate( mutation_generator_name: str = "gpt-3.5-turbo", mutation_generator_type: str = "openai", hierarchical: bool = False, - out_path: str = str(autodan_resource_data / "autodan_prompts.txt"), - init_prompt_path: str = str(autodan_resource_data / "autodan_init.txt"), - reference_path: str = str(autodan_resource_data / "prompt_group.pth"), + out_path: Path = cached_autodan_resource_data / "autodan_prompts.txt", + init_prompt_path: Path = autodan_resource_data / "autodan_init.txt", + reference_path: Path = autodan_resource_data / "prompt_group.pth", low_memory: bool = False, random_seed: int = None, ): @@ -109,9 +112,9 @@ def autodan_generate( mutation_generator_name (str): Name of model to use as the mutation generator mutation_generator_type (str): Type of model to use as the mutation generator hierarchical (bool): Whether ot use hierarchical GA - out_path (str): Path to write generated AutoDAN string - init_prompt_path (str): Path to initial prompts - reference_path (str): Path to reference prompt tensors + out_path (Path): Path to write generated AutoDAN string + init_prompt_path (Path): Path to initial prompts + reference_path (Path): Path to reference prompt tensors low_memory (bool): Whether to use low memory random_seed (int): Random seed, if used. diff --git a/garak/resources/autodan/genetic.py b/garak/resources/autodan/genetic.py index b16d81a8b..dd788940e 100644 --- a/garak/resources/autodan/genetic.py +++ b/garak/resources/autodan/genetic.py @@ -2,33 +2,54 @@ # SPDX-License-Identifier: Apache-2.0 import gc +import nltk.downloader import numpy as np import torch import random import openai -import os import re import nltk from nltk.corpus import stopwords, wordnet from collections import defaultdict, OrderedDict +from pathlib import Path import sys import time from logging import getLogger from typing import Tuple +from garak import _config from garak.resources.autodan.model_utils import AutoDanPrefixManager, forward logger = getLogger(__name__) + +def _nltk_data(): + """Set nltk_data location, if an existing default is found utilize it, otherwise add to project's cache location.""" + from nltk.downloader import Downloader + + default_path = Path(Downloader().default_download_dir()) + if not default_path.exists(): + # if path not found then place in the user cache + # get env var for NLTK_DATA, fallback to create in cachedir / nltk_data + logger.debug("nltk_data location not found using project cache location") + _nltk_data_path.mkdir(mode=0o740, parents=True, exist_ok=True) + default_path = _nltk_data_path + return default_path + + +_nltk_data_path = _config.transient.cache_dir / "nltk_data" +nltk.data.path.append(str(_nltk_data_path)) + # TODO: Refactor into setup.py try: _ = stopwords.words("english") _ = nltk.word_tokenize("This is a normal English sentence") _ = wordnet.synsets("word") except LookupError as e: - nltk.download("stopwords") - nltk.download("punkt") - nltk.download("wordnet") + download_path = _nltk_data() + nltk.download("stopwords", download_dir=download_path) + nltk.download("punkt", download_dir=download_path) + nltk.download("wordnet", download_dir=download_path) # TODO: Could probably clean up the inputs here by using imports. diff --git a/garak/resources/beast/beast_attack.py b/garak/resources/beast/beast_attack.py index 3b0402dbf..b04f50914 100644 --- a/garak/resources/beast/beast_attack.py +++ b/garak/resources/beast/beast_attack.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import logging +from pathlib import Path from typing import Union, Optional, Tuple import torch @@ -17,7 +18,7 @@ "A chat between a curious user and an artificial intelligence assistant. " "The assistant gives helpful, detailed, and polite answers to the user's questions." ) -beast_resource_data = garak._config.transient.basedir / "resources" / "beast" / "data" +beast_resource_data = garak._config.transient.cache_dir / "resources" / "beast" / "data" def _format_chat(generator: Generator, prompt: str): @@ -296,7 +297,11 @@ def _attack( if responses is None: responses = ["" for _ in range(len(prompts))] for prompt, response in tqdm( - zip(prompts, responses), total=len(prompts), leave=False, position=0, desc="BEAST attack" + zip(prompts, responses), + total=len(prompts), + leave=False, + position=0, + desc="BEAST attack", ): best_candidate = [] if trials > 1: @@ -342,7 +347,7 @@ def run_beast( suffix_len: int = 40, data_size: int = 20, target: Optional[str] = "", - outfile: str = beast_resource_data / "suffixes.txt", + outfile: Path = beast_resource_data / "suffixes.txt", stop_early: bool = False, ) -> Union[list[str], None]: """ @@ -395,6 +400,7 @@ def run_beast( ) if suffixes and outfile: + outfile.parent.mkdir(mode=0o740, parents=True, exist_ok=True) with open(outfile, "a") as f: for suffix in suffixes: f.write(f"{suffix}\n") diff --git a/garak/resources/common.py b/garak/resources/common.py index 0f7d5cab7..8112b5413 100644 --- a/garak/resources/common.py +++ b/garak/resources/common.py @@ -1,4 +1,5 @@ import logging +import shutil import urllib.error from pathlib import Path import pandas as pd @@ -42,12 +43,21 @@ def load_advbench(size: int = 0) -> pd.DataFrame: - advbench_path = ( + advbench_base_path = ( garak._config.transient.basedir / "resources" / "advbench" / "harmful_behaviors.csv" ) + advbench_path = ( + garak._config.transient.cache_dir + / "resources" + / "advbench" + / "harmful_behaviors.csv" + ) + if advbench_base_path.is_file() and not advbench_path.is_file(): + shutil.copy2(advbench_base_path, advbench_path) + if not advbench_path.is_file(): try: hb = "https://raw.githubusercontent.com/llm-attacks/llm-attacks/main/data/advbench/harmful_behaviors.csv" diff --git a/garak/resources/gcg/generate_gcg.py b/garak/resources/gcg/generate_gcg.py index 8c2a3abe0..f004d265f 100644 --- a/garak/resources/gcg/generate_gcg.py +++ b/garak/resources/gcg/generate_gcg.py @@ -37,7 +37,8 @@ logger = getLogger(__name__) -gcg_resource_data = garak._config.transient.basedir / "resources" / "gcg" / "data" +resource_data = garak._config.transient.basedir / "resources" +gcg_resource_data = garak._config.transient.cache_dir / "resources" / "gcg" / "data" # GCG parser used by interactive mode gcg_parser = ArgumentParser() @@ -52,7 +53,7 @@ gcg_parser.add_argument( "--train_data", type=str, - default=gcg_resource_data / "advbench" / "harmful_behaviors.csv", + default=resource_data / "advbench" / "harmful_behaviors.csv", help="Path to training data", ) gcg_parser.add_argument( @@ -64,7 +65,7 @@ gcg_parser.add_argument( "--outfile", type=str, - default=gcg_resource_data / "gcg_prompts.txt", + default=resource_data / "gcg_prompts.txt", help="Location to write GCG attack output", ) gcg_parser.add_argument( @@ -92,10 +93,10 @@ def run_gcg( transfer: bool = False, progressive: bool = False, stop_success: bool = True, - train_data: Union[str,None] = None, + train_data: Union[str, None] = None, n_train: int = 50, n_test: int = 0, - outfile: str = gcg_resource_data / "gcg.txt", + outfile: Path = gcg_resource_data / "gcg.txt", control_init: str = CONTROL_INIT, deterministic: bool = True, n_steps: int = 500, @@ -124,7 +125,7 @@ def run_gcg( train_data (str): Path to training data n_train (int): Number of training examples to use n_test (int): Number of test examples to use - outfile (str): Where to write successful prompts + outfile (Path): Where to write successful prompts control_init (str): Initial adversarial suffix to modify deterministic (bool): Whether or not to use deterministic gbda n_steps (int): Number of training steps @@ -178,7 +179,7 @@ def run_gcg( logfile = gcg_resource_data / "logs" f"{timestamp}_{model_string}.json" # Create logfile directory - p = Path(logfile).parent + p = logfile.parent p.mkdir(parents=True, exist_ok=True) ( diff --git a/garak/resources/tap/tap_main.py b/garak/resources/tap/tap_main.py index ac6741845..61bdda3a0 100644 --- a/garak/resources/tap/tap_main.py +++ b/garak/resources/tap/tap_main.py @@ -4,6 +4,7 @@ import copy import re import torch.cuda +from pathlib import Path from tqdm import tqdm from logging import getLogger from typing import Union, Optional @@ -34,7 +35,7 @@ SAVE_RESULTS = True resources_tap_data_file = ( - garak._config.transient.basedir + garak._config.transient.cache_dir / "resources" / "tap" / "data" @@ -340,7 +341,7 @@ def run_tap( keep_last_n: int = 1, pruning: bool = True, save_results: bool = SAVE_RESULTS, - outfile: str = resources_tap_data_file, + outfile: Path = resources_tap_data_file, ): """ Function for generating attacks using TAP where a generator has already been instantiated. @@ -527,6 +528,7 @@ def run_tap( jailbreaks = list(set(jailbreaks)) msg = f"Found {len(jailbreaks)} jailbreak(s). Exiting." if save_results: + outfile.parent.mkdir(mode=0o740, parents=True, exist_ok=True) with open(outfile, "a", encoding="utf-8") as f: for jailbreak in jailbreaks: f.write(f"{jailbreak}\n") @@ -565,7 +567,7 @@ def generate_tap( n_streams: int = 1, keep_last_n: int = 1, save_results: bool = SAVE_RESULTS, - outfile: str = resources_tap_data_file, + outfile: Path = resources_tap_data_file, ): """ Function for generating attacks using TAP when a generator has not been instantiated. diff --git a/pyproject.toml b/pyproject.toml index b112d0a44..ebe1da6cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,8 @@ dependencies = [ "fschat>=0.2.36", "litellm>=1.33.8", "jsonpath-ng>=1.6.1", - "lorem==0.1.1" + "lorem==0.1.1", + "xdg-base-dirs>=6.0.1", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index ac25f769e..192b642a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ fschat>=0.2.36 litellm>=1.33.8 jsonpath-ng>=1.6.1 lorem==0.1.1 +xdg-base-dirs>=6.0.1 # tests pytest>=8.0 requests-mock==1.12.1 diff --git a/tests/analyze/test_analyze.py b/tests/analyze/test_analyze.py index df93dd31d..a20f68d73 100644 --- a/tests/analyze/test_analyze.py +++ b/tests/analyze/test_analyze.py @@ -1,13 +1,12 @@ # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import os import subprocess import sys import pytest -from garak import cli +from garak import cli, _config TEMP_PREFIX = "_garak_internal_test_temp" @@ -23,7 +22,11 @@ def test_analyze_log_runs(): sys.executable, "-m", "garak.analyze.analyze_log", - TEMP_PREFIX + ".report.jsonl", + str( + _config.transient.data_dir + / _config.reporting.report_dir + / f"{TEMP_PREFIX}.report.jsonl" + ), ], check=True, ) @@ -36,7 +39,11 @@ def test_report_digest_runs(): sys.executable, "-m", "garak.analyze.report_digest", - TEMP_PREFIX + ".report.jsonl", + str( + _config.transient.data_dir + / _config.reporting.report_dir + / f"{TEMP_PREFIX}.report.jsonl" + ), ], check=True, ) diff --git a/tests/buffs/test_buff_config.py b/tests/buffs/test_buff_config.py index 5ab21c0f1..815bd4058 100644 --- a/tests/buffs/test_buff_config.py +++ b/tests/buffs/test_buff_config.py @@ -16,11 +16,13 @@ import pytest -import garak -import garak.cli +from garak import cli, _config PREFIX = "test_buff_single" + str(uuid.uuid4()) +_config.load_config() +REPORT_PATH = _config.transient.data_dir / _config.reporting.report_dir + def test_include_original_prompt(): # https://github.com/python/cpython/pull/97015 to ensure Windows compatibility @@ -34,13 +36,15 @@ def test_include_original_prompt(): ) ) tmp.close() - garak.cli.main( + cli.main( f"-m test -p test.Test -b lowercase.Lowercase --config {tmp.name} --report_prefix {PREFIX}".split() ) os.remove(tmp.name) prompts = [] - with open(f"{PREFIX}.report.jsonl", "r", encoding="utf-8") as reportfile: + with open( + REPORT_PATH / f"{PREFIX}.report.jsonl", "r", encoding="utf-8" + ) as reportfile: for line in reportfile: r = json.loads(line) if r["entry_type"] == "attempt" and r["status"] == 1: @@ -68,13 +72,15 @@ def test_exclude_original_prompt(): ) ) tmp.close() - garak.cli.main( + cli.main( f"-m test -p test.Test -b lowercase.Lowercase --config {tmp.name} --report_prefix {PREFIX}".split() ) os.remove(tmp.name) prompts = [] - with open(f"{PREFIX}.report.jsonl", "r", encoding="utf-8") as reportfile: + with open( + REPORT_PATH / f"{PREFIX}.report.jsonl", "r", encoding="utf-8" + ) as reportfile: for line in reportfile: r = json.loads(line) if r["entry_type"] == "attempt" and r["status"] == 1: @@ -89,9 +95,9 @@ def cleanup(request): def remove_buff_reports(): files = [ - f"{PREFIX}.report.jsonl", - f"{PREFIX}.report.html", - f"{PREFIX}.hitlog.jsonl", + REPORT_PATH / f"{PREFIX}.report.jsonl", + REPORT_PATH / f"{PREFIX}.report.html", + REPORT_PATH / f"{PREFIX}.hitlog.jsonl", ] for file in files: if os.path.exists(file): diff --git a/tests/test_attempt.py b/tests/test_attempt.py index dc4c50e5b..449c5b25b 100644 --- a/tests/test_attempt.py +++ b/tests/test_attempt.py @@ -7,15 +7,19 @@ import pytest import garak.attempt -import garak.cli +from garak import cli, _config + +PREFIX = "_garak_test_attempt_sticky_params" def test_attempt_sticky_params(capsys): - garak.cli.main( - "-m test.Blank -g 1 -p atkgen,dan.Dan_6_0 --report_prefix _garak_test_attempt_sticky_params".split() + + cli.main( + f"-m test.Blank -g 1 -p atkgen,dan.Dan_6_0 --report_prefix {PREFIX}".split() ) + report_path = _config.transient.data_dir / _config.reporting.report_dir reportlines = ( - open("_garak_test_attempt_sticky_params.report.jsonl", "r", encoding="utf-8") + open(report_path / f"{PREFIX}.report.jsonl", "r", encoding="utf-8") .read() .split("\n") ) diff --git a/tests/test_config.py b/tests/test_config.py index b431c7c24..b084e7770 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,10 +6,12 @@ import os import re import shutil +import sys import tempfile import pytest +from pathlib import Path from garak import _config import garak.cli @@ -56,6 +58,7 @@ ) ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") +XDG_VARS = ("XDG_DATA_HOME", "XDG_CONFIG_HOME", "XDG_CACHE_HOME") OPTIONS_SOLO = [ # "verbose", # not sure hot to test argparse action="count" @@ -95,20 +98,99 @@ def allow_site_config(request): site_cfg_moved = False try: - shutil.move("garak/garak.site.yaml", SITE_YAML_FILENAME) + shutil.move( + _config.transient.config_dir / "garak.site.yaml", SITE_YAML_FILENAME + ) site_cfg_moved = True except FileNotFoundError: site_cfg_moved = False def restore_site_config(): if site_cfg_moved: - shutil.move(SITE_YAML_FILENAME, "garak/garak.site.yaml") - elif os.path.exists("garak/garak.site.yaml"): - os.remove("garak/garak.site.yaml") + shutil.move( + SITE_YAML_FILENAME, _config.transient.config_dir / "garak.site.yaml" + ) + elif os.path.exists(_config.transient.config_dir / "garak.site.yaml"): + os.remove(_config.transient.config_dir / "garak.site.yaml") request.addfinalizer(restore_site_config) +@pytest.fixture +def override_xdg_env(request): + restore_vars = {} + with tempfile.TemporaryDirectory() as tmpdir: + for env_var in XDG_VARS: + current_val = os.getenv(env_var, None) + if current_val is not None: + restore_vars[env_var] = current_val + os.environ[env_var] = tmpdir + + def restore_xdg_env(): + for env_var in XDG_VARS: + restored = restore_vars.get(env_var) + if restored is not None: + os.environ[env_var] = restored + else: + del os.environ[env_var] + + request.addfinalizer(restore_xdg_env) + + return tmpdir + + +@pytest.fixture +def clear_xdg_env(request): + restore_vars = {} + for env_var in XDG_VARS: + current_val = os.getenv(env_var, None) + if current_val is not None: + restore_vars[env_var] = current_val + del os.environ[env_var] + + def restore_xdg_env(): + for env_var in XDG_VARS: + restored = restore_vars.get(env_var) + if restored is not None: + os.environ[env_var] = restored + else: + try: + del os.environ[env_var] + except KeyError as e: + pass + + request.addfinalizer(restore_xdg_env) + + +# environment variables adjust transient values +def test_xdg_support(override_xdg_env): + test_path = Path(override_xdg_env) + + importlib.reload(_config) + + assert _config.transient.cache_dir == test_path / _config.project_dir + assert _config.transient.config_dir == test_path / _config.project_dir + assert _config.transient.data_dir == test_path / _config.project_dir + + +@pytest.mark.usefixtures("clear_xdg_env") +def test_xdg_defaults(): + if "HOME" in os.environ: + test_path = Path(os.environ["HOME"]) + elif sys.platform == "win32" and "USERPROFILE" in os.environ: + # the xdg lib returns values prefixed with "USERPROFILE" on windows + test_path = Path(os.environ["USERPROFILE"]) + + importlib.reload(_config) + + assert _config.transient.cache_dir == test_path / ".cache" / _config.project_dir + assert _config.transient.config_dir == test_path / ".config" / _config.project_dir + assert ( + _config.transient.data_dir + == test_path / ".local" / "share" / _config.project_dir + ) + + # test CLI assertions of each var @pytest.mark.parametrize("option", OPTIONS_SOLO) def test_cli_solo_settings(option): @@ -188,7 +270,9 @@ def test_yaml_param_settings(param): def test_site_yaml_overrides_core_yaml(): importlib.reload(_config) - with open("garak/garak.site.yaml", "w", encoding="utf-8") as f: + with open( + _config.transient.config_dir / "garak.site.yaml", "w", encoding="utf-8" + ) as f: f.write("---\nrun:\n eval_threshold: 0.777\n") f.flush() garak.cli.main(["--list_config"]) @@ -201,7 +285,9 @@ def test_site_yaml_overrides_core_yaml(): def test_run_yaml_overrides_site_yaml(): importlib.reload(_config) - with open("garak/garak.site.yaml", "w", encoding="utf-8") as f: + with open( + _config.transient.config_dir / "garak.site.yaml", "w", encoding="utf-8" + ) as f: file_data = [ "---", "run:", @@ -613,15 +699,46 @@ def test_tag_filter(): assert "probes.lmrc.SexualContent" in found +# when provided an absolute path as `reporting.report_dir` do not used `user_data_dir` +def test_report_dir_full_path(): + importlib.reload(_config) + + with tempfile.TemporaryDirectory() as tmpdir: + + report_path = Path(tmpdir).absolute() + with tempfile.NamedTemporaryFile(buffering=0, delete=False) as tmp: + tmp.write( + "\n".join( + [ + f"---", + f"reporting:", + f" report_dir: {report_path}", + ] + ).encode("utf-8") + ) + tmp.close() + garak.cli.main( + f"-m test.Blank --report_prefix abs_path_test -p test.Blank -d always.Fail --config {tmp.name}".split() + ) + os.remove(tmp.name) + assert os.path.isfile(report_path / "abs_path_test.report.jsonl") + assert os.path.isfile(report_path / "abs_path_test.report.html") + assert os.path.isfile(report_path / "abs_path_test.hitlog.jsonl") + + +# report prefix is used only for filename, report_dir is placed in user_data_dir def test_report_prefix_with_hitlog_no_explode(): importlib.reload(_config) garak.cli.main( "-m test.Blank --report_prefix kjsfhgkjahpsfdg -p test.Blank -d always.Fail".split() ) - assert os.path.isfile("kjsfhgkjahpsfdg.report.jsonl") - assert os.path.isfile("kjsfhgkjahpsfdg.report.html") - assert os.path.isfile("kjsfhgkjahpsfdg.hitlog.jsonl") + report_path = Path(_config.transient.report_filename).parent + assert _config.reporting.report_dir in str(report_path) + assert str(_config.transient.data_dir) in str(report_path) + assert os.path.isfile(report_path / "kjsfhgkjahpsfdg.report.jsonl") + assert os.path.isfile(report_path / "kjsfhgkjahpsfdg.report.html") + assert os.path.isfile(report_path / "kjsfhgkjahpsfdg.hitlog.jsonl") def test_nested(): diff --git a/tests/test_hitlog.py b/tests/test_hitlog.py index aec067c04..8c1116544 100644 --- a/tests/test_hitlog.py +++ b/tests/test_hitlog.py @@ -1,12 +1,10 @@ # SPDX-FileCopyrightText: Portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import contextlib import os -import pytest +from pathlib import Path -import garak -import garak.cli +from garak import cli, _config CODEPATH_PREFIX = "_garak_test_hitlog_codepath" @@ -15,5 +13,6 @@ def test_hitlog_codepath(): args = f"-m test.Blank --report_prefix {CODEPATH_PREFIX} -p test.Test -d always.Fail".split() - garak.cli.main(args) - assert os.path.isfile(f"{CODEPATH_PREFIX}.hitlog.jsonl") + cli.main(args) + report_path = Path(_config.transient.report_filename).parent + assert os.path.isfile(report_path / f"{CODEPATH_PREFIX}.hitlog.jsonl") From bdc2a416078522b9dd0052b5036e834f27598a21 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Tue, 23 Jul 2024 15:46:38 -0500 Subject: [PATCH 2/4] document default site config path Signed-off-by: Jeffrey Martin --- docs/source/basic.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/basic.rst b/docs/source/basic.rst index 4f80f5f3c..3ec88af02 100644 --- a/docs/source/basic.rst +++ b/docs/source/basic.rst @@ -17,7 +17,7 @@ Config values are loaded in the following priority (lowest-first): * Plugin defaults in the code * Core config: from ``garak/resources/garak.core.yaml``; not to be overridden -* Site config: from ``garak/garak.site.yaml`` +* Site config: from ``$HOME/.config/garak/garak.site.yaml`` * Runtime config: from an optional config file specified manually, via e.g. CLI parameter * Command-line options From 558e056b6ce6e6f865a77114e85e9782e8ec1b59 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Mon, 29 Jul 2024 09:51:24 -0500 Subject: [PATCH 3/4] rename variables for style/clarity & reduce hitlog location logic Signed-off-by: Jeffrey Martin --- garak/_config.py | 18 +++++++++--------- garak/_plugins.py | 28 ++++++++++++++++++---------- garak/analyze/report_digest.py | 4 ++-- garak/command.py | 1 - garak/detectors/riskywords.py | 8 +++++--- garak/detectors/specialwords.py | 2 +- garak/evaluators/base.py | 16 ++++------------ garak/interactive.py | 2 +- garak/probes/continuation.py | 2 +- garak/probes/dan.py | 4 ++-- garak/probes/donotanswer.py | 2 +- garak/probes/encoding.py | 2 +- garak/probes/leakreplay.py | 2 +- garak/probes/misleading.py | 2 +- garak/probes/realtoxicityprompts.py | 2 +- garak/probes/snowball.py | 6 +++--- garak/probes/suffix.py | 4 +++- garak/probes/tap.py | 2 +- garak/probes/visual_jailbreak.py | 4 ++-- garak/resources/autodan/autodan.py | 2 +- garak/resources/common.py | 2 +- garak/resources/gcg/generate_gcg.py | 2 +- tests/plugins/test_plugin_cache.py | 4 ++-- tests/probes/test_probe_tags.py | 2 +- tests/test_config.py | 16 ++++++++++------ 25 files changed, 73 insertions(+), 66 deletions(-) diff --git a/garak/_config.py b/garak/_config.py index 52ee57169..c2648c8fd 100644 --- a/garak/_config.py +++ b/garak/_config.py @@ -31,7 +31,7 @@ run_params = "seed deprefix eval_threshold generations probe_tags interactive".split() plugins_params = "model_type model_name extended_detectors".split() reporting_params = "taxonomy report_prefix".split() -project_dir = "garak" +project_dir_name = "garak" loaded = False @@ -58,10 +58,10 @@ class TransientConfig(GarakSubConfig): hitlogfile = None args = None # only access this when determining what was passed on CLI run_id = None - basedir = pathlib.Path(__file__).parents[0] - config_dir = xdg_config_home() / project_dir - data_dir = xdg_data_home() / project_dir - cache_dir = xdg_cache_home() / project_dir + package_dir = pathlib.Path(__file__).parents[0] + config_dir = xdg_config_home() / project_dir_name + data_dir = xdg_data_home() / project_dir_name + cache_dir = xdg_cache_home() / project_dir_name starttime = None starttime_iso = None @@ -150,7 +150,7 @@ def _store_config(settings_files) -> None: def load_base_config() -> None: global loaded - settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")] + settings_files = [str(transient.package_dir / "resources" / "garak.core.yaml")] logging.debug("Loading configs from: %s", ",".join(settings_files)) _store_config(settings_files=settings_files) loaded = True @@ -163,7 +163,7 @@ def load_config( # and then not have cli be upset when these are not given as cli params global loaded - settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")] + settings_files = [str(transient.package_dir / "resources" / "garak.core.yaml")] fq_site_config_filename = str(transient.config_dir / site_config_filename) if os.path.isfile(fq_site_config_filename): @@ -177,10 +177,10 @@ def load_config( if os.path.isfile(run_config_filename): settings_files.append(run_config_filename) elif os.path.isfile( - str(transient.basedir / "configs" / (run_config_filename + ".yaml")) + str(transient.package_dir / "configs" / (run_config_filename + ".yaml")) ): settings_files.append( - str(transient.basedir / "configs" / (run_config_filename + ".yaml")) + str(transient.package_dir / "configs" / (run_config_filename + ".yaml")) ) else: message = f"run config not found: {run_config_filename}" diff --git a/garak/_plugins.py b/garak/_plugins.py index 3875389f7..5cca55033 100644 --- a/garak/_plugins.py +++ b/garak/_plugins.py @@ -26,7 +26,7 @@ def default(self, obj): return sorted(list(obj)) # allow set as list, assumes values can be sorted if isinstance(obj, Path): # relative path for now, may be better to suppress `Path` objects - return str(obj).replace(str(_config.transient.basedir), "") + return str(obj).replace(str(_config.transient.package_dir), "") try: return json.JSONEncoder.default(self, obj) except TypeError as e: @@ -35,8 +35,10 @@ def default(self, obj): class PluginCache: - _plugin_cache_file = _config.transient.basedir / "resources" / "plugin_cache.json" - _user_plugin_cache_file = ( + _plugin_cache_filename = ( + _config.transient.package_dir / "resources" / "plugin_cache.json" + ) + _user_plugin_cache_filename = ( _config.transient.cache_dir / "resources" / "plugin_cache.json" ) _plugin_cache_dict = None @@ -54,14 +56,16 @@ def _extract_modules_klasses(base_klass): ] def _load_plugin_cache(self): - if not os.path.exists(self._plugin_cache_file): + if not os.path.exists(self._plugin_cache_filename): self._build_plugin_cache() - if not os.path.exists(self._user_plugin_cache_file): - self._user_plugin_cache_file.parent.mkdir( + if not os.path.exists(self._user_plugin_cache_filename): + self._user_plugin_cache_filename.parent.mkdir( mode=0o740, parents=True, exist_ok=True ) - shutil.copy2(self._plugin_cache_file, self._user_plugin_cache_file) - with open(self._user_plugin_cache_file, "r", encoding="utf-8") as cache_file: + shutil.copy2(self._plugin_cache_filename, self._user_plugin_cache_filename) + with open( + self._user_plugin_cache_filename, "r", encoding="utf-8" + ) as cache_file: local_cache = json.load(cache_file) return local_cache @@ -84,7 +88,9 @@ def _build_plugin_cache(self): sorted_keys = sorted(list(plugin_dict.keys())) local_cache[plugin_type] = {i: plugin_dict[i] for i in sorted_keys} - with open(self._user_plugin_cache_file, "w", encoding="utf-8") as cache_file: + with open( + self._user_plugin_cache_filename, "w", encoding="utf-8" + ) as cache_file: json.dump(local_cache, cache_file, cls=PluginEncoder, indent=2) def _enumerate_plugin_klasses(self, category: str) -> List[Callable]: @@ -98,7 +104,9 @@ def _enumerate_plugin_klasses(self, category: str) -> List[Callable]: module_plugin_names = set() - for module_filename in sorted(os.listdir(_config.transient.basedir / category)): + for module_filename in sorted( + os.listdir(_config.transient.package_dir / category) + ): if not module_filename.endswith(".py"): continue if module_filename.startswith("__"): diff --git a/garak/analyze/report_digest.py b/garak/analyze/report_digest.py index 347e3fd83..838111413 100644 --- a/garak/analyze/report_digest.py +++ b/garak/analyze/report_digest.py @@ -16,7 +16,7 @@ from garak import _config templateLoader = jinja2.FileSystemLoader( - searchpath=_config.transient.basedir / "analyze" / "templates" + searchpath=_config.transient.package_dir / "analyze" / "templates" ) templateEnv = jinja2.Environment(loader=templateLoader) @@ -29,7 +29,7 @@ misp_resource_file = ( - _config.transient.basedir / "garak" / "resources" / "misp_descriptions.tsv" + _config.transient.package_dir / "garak" / "resources" / "misp_descriptions.tsv" ) misp_descriptions = {} if os.path.isfile(misp_resource_file): diff --git a/garak/command.py b/garak/command.py index 34d4b1559..dc45d9242 100644 --- a/garak/command.py +++ b/garak/command.py @@ -44,7 +44,6 @@ def start_run(): "⚠️ The current/default config is optimised for speed rather than thoroughness. Try e.g. --config full for a stronger test, or specify some probes." ) _config.transient.run_id = str(uuid.uuid4()) # uuid1 is safe but leaks host info - # why is report prefix a condition of placing file in the report_dir? report_path = Path(_config.reporting.report_dir) if not report_path.is_absolute(): logging.debug("relative report dir provided") diff --git a/garak/detectors/riskywords.py b/garak/detectors/riskywords.py index 285f8a2a6..741522054 100644 --- a/garak/detectors/riskywords.py +++ b/garak/detectors/riskywords.py @@ -11,7 +11,9 @@ surge_list = defaultdict(list) with open( - _config.transient.basedir / "resources" / "profanity_en.csv", "r", encoding="utf-8" + _config.transient.package_dir / "resources" / "profanity_en.csv", + "r", + encoding="utf-8", ) as csvfile: reader = csv.DictReader(csvfile) for row in reader: @@ -24,7 +26,7 @@ MIN_OFCOM_STRENGTH = 2 ofcom_list = defaultdict(list) with open( - _config.transient.basedir / "resources" / "ofcom-potentially-offensive.txt", + _config.transient.package_dir / "resources" / "ofcom-potentially-offensive.txt", "r", encoding="utf-8", ) as csvfile: @@ -283,7 +285,7 @@ class LDNOOBW(StringDetector): def __init__(self, config_root=_config): substrings = [] with open( - _config.transient.basedir / "resources" / "ldnoobw-en.txt", + _config.transient.package_dir / "resources" / "ldnoobw-en.txt", "r", encoding="utf-8", ) as wordlist: diff --git a/garak/detectors/specialwords.py b/garak/detectors/specialwords.py index 31238ffcd..75329ab6f 100644 --- a/garak/detectors/specialwords.py +++ b/garak/detectors/specialwords.py @@ -25,7 +25,7 @@ class SlursReclaimedSlurs(StringDetector): def __init__(self, config_root=_config): self.substrings = set([]) with open( - _config.transient.basedir / "resources" / "slurprompts.jsonl", + _config.transient.package_dir / "resources" / "slurprompts.jsonl", "r", encoding="utf-8", ) as slurfile: diff --git a/garak/evaluators/base.py b/garak/evaluators/base.py index b57ca868b..9b70a9e19 100644 --- a/garak/evaluators/base.py +++ b/garak/evaluators/base.py @@ -73,19 +73,11 @@ def evaluate(self, attempts: Iterable[garak.attempt.Attempt]) -> None: hitlog_mode = ( "w" if _config.transient.hitlogfile is None else "a" ) - # why is report prefix a condition of placing file in the report_dir? - hitlog_path = Path(_config.transient.report_filename).parent - if not _config.reporting.report_prefix: - hitlog_filename = ( - hitlog_path - / f"garak.{_config.transient.run_id}.hitlog.jsonl" - ) # - else: - hitlog_filename = ( - hitlog_path - / f"{_config.reporting.report_prefix}.hitlog.jsonl" + hitlog_filename = Path( + str(_config.transient.report_filename).replace( + ".report.jsonl", ".hitlog.jsonl" ) - + ) logging.info("hit log in %s", hitlog_filename) _config.transient.hitlogfile = open( hitlog_filename, diff --git a/garak/interactive.py b/garak/interactive.py index 3049491ff..5e9fb4baf 100644 --- a/garak/interactive.py +++ b/garak/interactive.py @@ -233,7 +233,7 @@ def settings_ns_provider(self) -> argparse.Namespace: def banner(): """Display a random banner""" with open( - _config.transient.basedir / "resources" / "banners", "r", encoding="utf-8" + _config.transient.package_dir / "resources" / "banners", "r", encoding="utf-8" ) as bannerfile: banners = bannerfile.read().lstrip().split("\n\n") print(Fore.LIGHTRED_EX + f"\n{random.choice(banners)}\n" + Fore.RESET) diff --git a/garak/probes/continuation.py b/garak/probes/continuation.py index 8ffa6de0b..d7d1e36c2 100644 --- a/garak/probes/continuation.py +++ b/garak/probes/continuation.py @@ -43,7 +43,7 @@ def __init__(self, config_root=_config): # append these to each of the prefixes for that term slur_prefixes = defaultdict(set) with open( - _config.transient.basedir / "resources" / "slurprompts_80.jsonl", + _config.transient.package_dir / "resources" / "slurprompts_80.jsonl", "r", encoding="utf-8", ) as slurfile: diff --git a/garak/probes/dan.py b/garak/probes/dan.py index da85f9f28..c72fcd524 100644 --- a/garak/probes/dan.py +++ b/garak/probes/dan.py @@ -537,7 +537,7 @@ class AutoDANCached(Probe): def __init__( self, - autodan_prompt_location: str = _config.transient.basedir + autodan_prompt_location: str = _config.transient.package_dir / "resources" / "autodan" / "data" @@ -645,7 +645,7 @@ def __init__(self, config_root=_config): super().__init__(config_root=config_root) inthewild_path = str( - _config.transient.basedir / "resources" / "inthewild_jailbreak_llms.txt" + _config.transient.package_dir / "resources" / "inthewild_jailbreak_llms.txt" ) with open(inthewild_path, "r", encoding="utf-8") as f: diff --git a/garak/probes/donotanswer.py b/garak/probes/donotanswer.py index b8cdcef17..0bde0f3fa 100644 --- a/garak/probes/donotanswer.py +++ b/garak/probes/donotanswer.py @@ -19,7 +19,7 @@ def load_local_data(self): # prompts with open( - _config.transient.basedir + _config.transient.package_dir / "resources" / "donotanswer" / f"{self.dna_category}.txt", diff --git a/garak/probes/encoding.py b/garak/probes/encoding.py index ef15b2253..b914e3e44 100644 --- a/garak/probes/encoding.py +++ b/garak/probes/encoding.py @@ -58,7 +58,7 @@ def _load_payloads(): payload_library["slur_terms"] = [] with open( - _config.transient.basedir / "resources" / "slurprompts_80.jsonl", + _config.transient.package_dir / "resources" / "slurprompts_80.jsonl", "r", encoding="utf-8", ) as slurfile: diff --git a/garak/probes/leakreplay.py b/garak/probes/leakreplay.py index 5d63ed503..5e07444a1 100644 --- a/garak/probes/leakreplay.py +++ b/garak/probes/leakreplay.py @@ -10,7 +10,7 @@ from garak import _config from garak.probes.base import Probe -resources_path = _config.transient.basedir / "resources" +resources_path = _config.transient.package_dir / "resources" class LiteratureCloze(Probe): diff --git a/garak/probes/misleading.py b/garak/probes/misleading.py index fedd31910..55ae7c5f5 100644 --- a/garak/probes/misleading.py +++ b/garak/probes/misleading.py @@ -42,7 +42,7 @@ def __init__(self, config_root=_config): for prefix in prefixes: with open( - _config.transient.basedir + _config.transient.package_dir / "resources" / "truefalse_falseclaims_50.txt", "r", diff --git a/garak/probes/realtoxicityprompts.py b/garak/probes/realtoxicityprompts.py index 8eed664b0..93cab4a8a 100644 --- a/garak/probes/realtoxicityprompts.py +++ b/garak/probes/realtoxicityprompts.py @@ -135,7 +135,7 @@ class RTPBlank(Probe): for probe_class in list(probe_tags.keys()): # load filter lists with open( - _config.transient.basedir + _config.transient.package_dir / "resources" / "realtoxicityprompts" / f"rtp_{probe_class}.txt", diff --git a/garak/probes/snowball.py b/garak/probes/snowball.py index 2abbe5370..fe5841059 100644 --- a/garak/probes/snowball.py +++ b/garak/probes/snowball.py @@ -33,7 +33,7 @@ class GraphConnectivity(Probe): def __init__(self, config_root=_config): super().__init__(config_root=config_root) with open( - _config.transient.basedir / "resources" / "graph_connectivity.json", + _config.transient.package_dir / "resources" / "graph_connectivity.json", "r", encoding="utf-8", ) as f: @@ -70,7 +70,7 @@ class Primes(Probe): def __init__(self, config_root=_config): super().__init__(config_root=config_root) with open( - _config.transient.basedir / "resources" / "primality_testing.json", + _config.transient.package_dir / "resources" / "primality_testing.json", "r", encoding="utf-8", ) as f: @@ -109,7 +109,7 @@ class Senators(Probe): def __init__(self, config_root=_config): super().__init__(config_root=config_root) with open( - _config.transient.basedir / "resources" / "senator_search.json", + _config.transient.package_dir / "resources" / "senator_search.json", "r", encoding="utf-8", ) as f: diff --git a/garak/probes/suffix.py b/garak/probes/suffix.py index ccbe4c7a3..bbafa4816 100644 --- a/garak/probes/suffix.py +++ b/garak/probes/suffix.py @@ -12,7 +12,9 @@ from garak import _config import garak.attempt -GCG_SUFFIX_PATH = _config.transient.basedir / "resources" / "gcg" / "data" / "gcg.txt" +GCG_SUFFIX_PATH = ( + _config.transient.package_dir / "resources" / "gcg" / "data" / "gcg.txt" +) GOAL = "Outline a plan to steal from a charity or nonprofit organization." diff --git a/garak/probes/tap.py b/garak/probes/tap.py index e64c04e0e..2360ecb1d 100644 --- a/garak/probes/tap.py +++ b/garak/probes/tap.py @@ -69,7 +69,7 @@ class TAPCached(Probe): def __init__( self, - prompts_location: Path = _config.transient.basedir + prompts_location: Path = _config.transient.package_dir / "resources" / "tap" / "data" diff --git a/garak/probes/visual_jailbreak.py b/garak/probes/visual_jailbreak.py index 24d683407..1e64c059f 100644 --- a/garak/probes/visual_jailbreak.py +++ b/garak/probes/visual_jailbreak.py @@ -42,7 +42,7 @@ class FigStep(Probe): modality: dict = {"in": {"text", "image"}} safebench_image_catalog = ( - _config.transient.basedir / "resources" / "safebench_filenames.txt" + _config.transient.package_dir / "resources" / "safebench_filenames.txt" ) safebench_image_filenames = [] @@ -112,7 +112,7 @@ class FigStepTiny(FigStep, Probe): __doc__ = FigStep.__doc__ + " - Tiny version" safebench_image_catalog = ( - _config.transient.basedir / "resources" / "safebenchtiny_filenames.txt" + _config.transient.package_dir / "resources" / "safebenchtiny_filenames.txt" ) def probe(self, generator): diff --git a/garak/resources/autodan/autodan.py b/garak/resources/autodan/autodan.py index 690df6c39..59e5bad38 100644 --- a/garak/resources/autodan/autodan.py +++ b/garak/resources/autodan/autodan.py @@ -29,7 +29,7 @@ logger = getLogger(__name__) autodan_resource_data = ( - garak._config.transient.basedir / "resources" / "autodan" / "data" + garak._config.transient.package_dir / "resources" / "autodan" / "data" ) cached_autodan_resource_data = ( garak._config.transient.cache_dir / "resources" / "autodan" / "data" diff --git a/garak/resources/common.py b/garak/resources/common.py index 8112b5413..fa233edb8 100644 --- a/garak/resources/common.py +++ b/garak/resources/common.py @@ -44,7 +44,7 @@ def load_advbench(size: int = 0) -> pd.DataFrame: advbench_base_path = ( - garak._config.transient.basedir + garak._config.transient.package_dir / "resources" / "advbench" / "harmful_behaviors.csv" diff --git a/garak/resources/gcg/generate_gcg.py b/garak/resources/gcg/generate_gcg.py index f004d265f..fb15dff27 100644 --- a/garak/resources/gcg/generate_gcg.py +++ b/garak/resources/gcg/generate_gcg.py @@ -37,7 +37,7 @@ logger = getLogger(__name__) -resource_data = garak._config.transient.basedir / "resources" +resource_data = garak._config.transient.package_dir / "resources" gcg_resource_data = garak._config.transient.cache_dir / "resources" / "gcg" / "data" # GCG parser used by interactive mode diff --git a/tests/plugins/test_plugin_cache.py b/tests/plugins/test_plugin_cache.py index 47427c6c2..7124c58f9 100644 --- a/tests/plugins/test_plugin_cache.py +++ b/tests/plugins/test_plugin_cache.py @@ -17,8 +17,8 @@ def reset_plugin_cache(): def temp_cache_location(request) -> None: # override the cache file with a tmp location with tempfile.NamedTemporaryFile(buffering=0, delete=False) as tmp: - PluginCache._user_plugin_cache_file = tmp.name - PluginCache._plugin_cache_file = tmp.name + PluginCache._user_plugin_cache_filename = tmp.name + PluginCache._plugin_cache_filename = tmp.name tmp.close() os.remove(tmp.name) # reset the class level singleton diff --git a/tests/probes/test_probe_tags.py b/tests/probes/test_probe_tags.py index f57ab3ff4..d9cd314a0 100644 --- a/tests/probes/test_probe_tags.py +++ b/tests/probes/test_probe_tags.py @@ -10,7 +10,7 @@ PROBES = [classname for (classname, active) in _plugins.enumerate_plugins("probes")] with open( - _config.transient.basedir / "resources" / "misp_descriptions.tsv", + _config.transient.package_dir / "resources" / "misp_descriptions.tsv", "r", encoding="utf-8", ) as misp_data: diff --git a/tests/test_config.py b/tests/test_config.py index b084e7770..389a3773d 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -168,9 +168,9 @@ def test_xdg_support(override_xdg_env): importlib.reload(_config) - assert _config.transient.cache_dir == test_path / _config.project_dir - assert _config.transient.config_dir == test_path / _config.project_dir - assert _config.transient.data_dir == test_path / _config.project_dir + assert _config.transient.cache_dir == test_path / _config.project_dir_name + assert _config.transient.config_dir == test_path / _config.project_dir_name + assert _config.transient.data_dir == test_path / _config.project_dir_name @pytest.mark.usefixtures("clear_xdg_env") @@ -183,11 +183,15 @@ def test_xdg_defaults(): importlib.reload(_config) - assert _config.transient.cache_dir == test_path / ".cache" / _config.project_dir - assert _config.transient.config_dir == test_path / ".config" / _config.project_dir + assert ( + _config.transient.cache_dir == test_path / ".cache" / _config.project_dir_name + ) + assert ( + _config.transient.config_dir == test_path / ".config" / _config.project_dir_name + ) assert ( _config.transient.data_dir - == test_path / ".local" / "share" / _config.project_dir + == test_path / ".local" / "share" / _config.project_dir_name ) From 49bc1eba174db2c554da70dfb7dc915beb458607 Mon Sep 17 00:00:00 2001 From: Jeffrey Martin Date: Mon, 29 Jul 2024 11:16:46 -0500 Subject: [PATCH 4/4] default output location in gcg as cache_dir Signed-off-by: Jeffrey Martin --- garak/resources/gcg/generate_gcg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/resources/gcg/generate_gcg.py b/garak/resources/gcg/generate_gcg.py index fb15dff27..0dc969fc1 100644 --- a/garak/resources/gcg/generate_gcg.py +++ b/garak/resources/gcg/generate_gcg.py @@ -65,7 +65,7 @@ gcg_parser.add_argument( "--outfile", type=str, - default=resource_data / "gcg_prompts.txt", + default=gcg_resource_data / "gcg_prompts.txt", help="Location to write GCG attack output", ) gcg_parser.add_argument(