Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use XDG paths for configuration data and caching #799

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/basic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Config values are loaded in the following priority (lowest-first):

* Plugin defaults in the code
* Core config: from ``garak/resources/garak.core.yaml``; not to be overridden
* Site config: from ``garak/garak.site.yaml``
* Site config: from ``$HOME/.config/garak/garak.site.yaml``
* Runtime config: from an optional config file specified manually, via e.g. CLI parameter
* Command-line options

Expand Down
26 changes: 20 additions & 6 deletions garak/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
import pathlib
from typing import List
import yaml
from xdg_base_dirs import (
xdg_cache_home,
xdg_config_home,
xdg_data_home,
)

DICT_CONFIG_AFTER_LOAD = False

Expand All @@ -26,6 +31,7 @@
run_params = "seed deprefix eval_threshold generations probe_tags interactive".split()
plugins_params = "model_type model_name extended_detectors".split()
reporting_params = "taxonomy report_prefix".split()
project_dir_name = "garak"


loaded = False
Expand All @@ -52,10 +58,18 @@ class TransientConfig(GarakSubConfig):
hitlogfile = None
args = None # only access this when determining what was passed on CLI
run_id = None
basedir = pathlib.Path(__file__).parents[0]
package_dir = pathlib.Path(__file__).parents[0]
config_dir = xdg_config_home() / project_dir_name
data_dir = xdg_data_home() / project_dir_name
cache_dir = xdg_cache_home() / project_dir_name
starttime = None
starttime_iso = None

# initialize the user home and cache paths if they do not exist
config_dir.mkdir(mode=0o740, parents=True, exist_ok=True)
data_dir.mkdir(mode=0o740, parents=True, exist_ok=True)
cache_dir.mkdir(mode=0o740, parents=True, exist_ok=True)


transient = TransientConfig()

Expand Down Expand Up @@ -136,7 +150,7 @@ def _store_config(settings_files) -> None:

def load_base_config() -> None:
global loaded
settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")]
settings_files = [str(transient.package_dir / "resources" / "garak.core.yaml")]
logging.debug("Loading configs from: %s", ",".join(settings_files))
_store_config(settings_files=settings_files)
loaded = True
Expand All @@ -149,9 +163,9 @@ def load_config(
# and then not have cli be upset when these are not given as cli params
global loaded

settings_files = [str(transient.basedir / "resources" / "garak.core.yaml")]
settings_files = [str(transient.package_dir / "resources" / "garak.core.yaml")]

fq_site_config_filename = str(transient.basedir / site_config_filename)
fq_site_config_filename = str(transient.config_dir / site_config_filename)
if os.path.isfile(fq_site_config_filename):
settings_files.append(fq_site_config_filename)
else:
Expand All @@ -163,10 +177,10 @@ def load_config(
if os.path.isfile(run_config_filename):
settings_files.append(run_config_filename)
elif os.path.isfile(
str(transient.basedir / "configs" / (run_config_filename + ".yaml"))
str(transient.package_dir / "configs" / (run_config_filename + ".yaml"))
):
settings_files.append(
str(transient.basedir / "configs" / (run_config_filename + ".yaml"))
str(transient.package_dir / "configs" / (run_config_filename + ".yaml"))
)
else:
message = f"run config not found: {run_config_filename}"
Expand Down
31 changes: 22 additions & 9 deletions garak/_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def default(self, obj):
return sorted(list(obj)) # allow set as list, assumes values can be sorted
if isinstance(obj, Path):
# relative path for now, may be better to suppress `Path` objects
return str(obj).replace(str(_config.transient.basedir), "")
return str(obj).replace(str(_config.transient.package_dir), "")
try:
return json.JSONEncoder.default(self, obj)
except TypeError as e:
Expand All @@ -35,8 +35,12 @@ def default(self, obj):


class PluginCache:
_plugin_cache_file = _config.transient.basedir / "resources" / "plugin_cache.json"
_user_plugin_cache_file = _plugin_cache_file
_plugin_cache_filename = (
_config.transient.package_dir / "resources" / "plugin_cache.json"
)
_user_plugin_cache_filename = (
_config.transient.cache_dir / "resources" / "plugin_cache.json"
)
_plugin_cache_dict = None

def __init__(self) -> None:
Expand All @@ -52,11 +56,16 @@ def _extract_modules_klasses(base_klass):
]

def _load_plugin_cache(self):
if not os.path.exists(self._plugin_cache_file):
if not os.path.exists(self._plugin_cache_filename):
self._build_plugin_cache()
if not os.path.exists(self._user_plugin_cache_file):
shutil.copy2(self._plugin_cache_file, self._user_plugin_cache_file)
with open(self._user_plugin_cache_file, "r", encoding="utf-8") as cache_file:
if not os.path.exists(self._user_plugin_cache_filename):
self._user_plugin_cache_filename.parent.mkdir(
mode=0o740, parents=True, exist_ok=True
)
shutil.copy2(self._plugin_cache_filename, self._user_plugin_cache_filename)
with open(
self._user_plugin_cache_filename, "r", encoding="utf-8"
) as cache_file:
local_cache = json.load(cache_file)
return local_cache

Expand All @@ -79,7 +88,9 @@ def _build_plugin_cache(self):
sorted_keys = sorted(list(plugin_dict.keys()))
local_cache[plugin_type] = {i: plugin_dict[i] for i in sorted_keys}

with open(self._user_plugin_cache_file, "w", encoding="utf-8") as cache_file:
with open(
self._user_plugin_cache_filename, "w", encoding="utf-8"
) as cache_file:
json.dump(local_cache, cache_file, cls=PluginEncoder, indent=2)

def _enumerate_plugin_klasses(self, category: str) -> List[Callable]:
Expand All @@ -93,7 +104,9 @@ def _enumerate_plugin_klasses(self, category: str) -> List[Callable]:

module_plugin_names = set()

for module_filename in sorted(os.listdir(_config.transient.basedir / category)):
for module_filename in sorted(
os.listdir(_config.transient.package_dir / category)
):
if not module_filename.endswith(".py"):
continue
if module_filename.startswith("__"):
Expand Down
4 changes: 2 additions & 2 deletions garak/analyze/report_digest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from garak import _config

templateLoader = jinja2.FileSystemLoader(
searchpath=_config.transient.basedir / "analyze" / "templates"
searchpath=_config.transient.package_dir / "analyze" / "templates"
)
templateEnv = jinja2.Environment(loader=templateLoader)

Expand All @@ -29,7 +29,7 @@


misp_resource_file = (
_config.transient.basedir / "garak" / "resources" / "misp_descriptions.tsv"
_config.transient.package_dir / "garak" / "resources" / "misp_descriptions.tsv"
)
misp_descriptions = {}
if os.path.isfile(misp_resource_file):
Expand Down
32 changes: 20 additions & 12 deletions garak/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@


def start_logging():
from garak import _config
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from garak import _config
""" initialises logging. assumes garak _config has already been loaded. """
from garak import _config

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we consider enforcing this vs documenting it?

Suggested change
from garak import _config
from garak import _config
if not _config.loaded:
raise RuntimeError("Configuration must be loaded to start logging!")

Copy link
Collaborator Author

@jmartin-tech jmartin-tech Jul 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking closer at this I don't think we need the warning at all, _config.transient.*_dir are all currently defined at load by import of the module and IMO starting logging functionality should not be restricted on runtime config being finalized.

>>> from garak import _config
>>> _config.transient.data_dir
PosixPath('/Users/jemartin/.local/share/garak')
>>>

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed 0cad75c, enforcing loaded requirement is not really valid as noted in my comment above, the value consumed in this method is available on import of _config.


logging.basicConfig(
filename="garak.log",
filename=_config.transient.data_dir / "garak.log",
level=logging.DEBUG,
format="%(asctime)s %(levelname)s %(message)s",
)
Expand All @@ -32,6 +34,7 @@ def start_run():
import os
import uuid

from pathlib import Path
from garak import _config

logging.info("started at %s", _config.transient.starttime_iso)
Expand All @@ -41,19 +44,24 @@ def start_run():
"⚠️ The current/default config is optimised for speed rather than thoroughness. Try e.g. --config full for a stronger test, or specify some probes."
)
_config.transient.run_id = str(uuid.uuid4()) # uuid1 is safe but leaks host info
report_path = Path(_config.reporting.report_dir)
if not report_path.is_absolute():
logging.debug("relative report dir provided")
report_path = _config.transient.data_dir / _config.reporting.report_dir
if not os.path.isdir(report_path):
try:
report_path.mkdir(mode=0o740, parents=True, exist_ok=True)
except PermissionError as e:
raise PermissionError(
f"Can't create logging directory {report_path}, quitting"
) from e

filename = f"garak.{_config.transient.run_id}.report.jsonl"
if not _config.reporting.report_prefix:
if not os.path.isdir(_config.reporting.report_dir):
try:
os.mkdir(_config.reporting.report_dir)
except PermissionError as e:
raise PermissionError(
f"Can't create logging directory {_config.reporting.report_dir}, quitting"
) from e
_config.transient.report_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.report.jsonl"
filename = f"garak.{_config.transient.run_id}.report.jsonl"
else:
_config.transient.report_filename = (
_config.reporting.report_prefix + ".report.jsonl"
)
filename = _config.reporting.report_prefix + ".report.jsonl"
_config.transient.report_filename = str(report_path / filename)
_config.transient.reportfile = open(
_config.transient.report_filename, "w", buffering=1, encoding="utf-8"
)
Expand Down
8 changes: 5 additions & 3 deletions garak/detectors/riskywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

surge_list = defaultdict(list)
with open(
_config.transient.basedir / "resources" / "profanity_en.csv", "r", encoding="utf-8"
_config.transient.package_dir / "resources" / "profanity_en.csv",
"r",
encoding="utf-8",
) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
Expand All @@ -24,7 +26,7 @@
MIN_OFCOM_STRENGTH = 2
ofcom_list = defaultdict(list)
with open(
_config.transient.basedir / "resources" / "ofcom-potentially-offensive.txt",
_config.transient.package_dir / "resources" / "ofcom-potentially-offensive.txt",
"r",
encoding="utf-8",
) as csvfile:
Expand Down Expand Up @@ -283,7 +285,7 @@ class LDNOOBW(StringDetector):
def __init__(self, config_root=_config):
substrings = []
with open(
_config.transient.basedir / "resources" / "ldnoobw-en.txt",
_config.transient.package_dir / "resources" / "ldnoobw-en.txt",
"r",
encoding="utf-8",
) as wordlist:
Expand Down
2 changes: 1 addition & 1 deletion garak/detectors/specialwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class SlursReclaimedSlurs(StringDetector):
def __init__(self, config_root=_config):
self.substrings = set([])
with open(
_config.transient.basedir / "resources" / "slurprompts.jsonl",
_config.transient.package_dir / "resources" / "slurprompts.jsonl",
"r",
encoding="utf-8",
) as slurfile:
Expand Down
10 changes: 5 additions & 5 deletions garak/evaluators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import json
import logging
from pathlib import Path
from typing import Iterable

from colorama import Fore, Style
Expand Down Expand Up @@ -72,12 +73,11 @@ def evaluate(self, attempts: Iterable[garak.attempt.Attempt]) -> None:
hitlog_mode = (
"w" if _config.transient.hitlogfile is None else "a"
)
if not _config.reporting.report_prefix:
hitlog_filename = f"{_config.reporting.report_dir}/garak.{_config.transient.run_id}.hitlog.jsonl"
else:
hitlog_filename = (
_config.reporting.report_prefix + ".hitlog.jsonl"
hitlog_filename = Path(
str(_config.transient.report_filename).replace(
".report.jsonl", ".hitlog.jsonl"
)
)
logging.info("hit log in %s", hitlog_filename)
_config.transient.hitlogfile = open(
hitlog_filename,
Expand Down
2 changes: 1 addition & 1 deletion garak/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def settings_ns_provider(self) -> argparse.Namespace:
def banner():
"""Display a random banner"""
with open(
_config.transient.basedir / "resources/banners", "r", encoding="utf-8"
_config.transient.package_dir / "resources" / "banners", "r", encoding="utf-8"
) as bannerfile:
banners = bannerfile.read().lstrip().split("\n\n")
print(Fore.LIGHTRED_EX + f"\n{random.choice(banners)}\n" + Fore.RESET)
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/continuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, config_root=_config):
# append these to each of the prefixes for that term
slur_prefixes = defaultdict(set)
with open(
_config.transient.basedir / "resources" / "slurprompts_80.jsonl",
_config.transient.package_dir / "resources" / "slurprompts_80.jsonl",
"r",
encoding="utf-8",
) as slurfile:
Expand Down
4 changes: 2 additions & 2 deletions garak/probes/dan.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ class AutoDANCached(Probe):

def __init__(
self,
autodan_prompt_location: str = _config.transient.basedir
autodan_prompt_location: str = _config.transient.package_dir
/ "resources"
/ "autodan"
/ "data"
Expand Down Expand Up @@ -645,7 +645,7 @@ def __init__(self, config_root=_config):
super().__init__(config_root=config_root)

inthewild_path = str(
_config.transient.basedir / "resources" / "inthewild_jailbreak_llms.txt"
_config.transient.package_dir / "resources" / "inthewild_jailbreak_llms.txt"
)

with open(inthewild_path, "r", encoding="utf-8") as f:
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/donotanswer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def load_local_data(self):
# prompts
with open(
_config.transient.basedir
_config.transient.package_dir
/ "resources"
/ "donotanswer"
/ f"{self.dna_category}.txt",
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _load_payloads():

payload_library["slur_terms"] = []
with open(
_config.transient.basedir / "resources" / "slurprompts_80.jsonl",
_config.transient.package_dir / "resources" / "slurprompts_80.jsonl",
"r",
encoding="utf-8",
) as slurfile:
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/leakreplay.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from garak import _config
from garak.probes.base import Probe

resources_path = _config.transient.basedir / "resources"
resources_path = _config.transient.package_dir / "resources"


class LiteratureCloze(Probe):
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/misleading.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, config_root=_config):

for prefix in prefixes:
with open(
_config.transient.basedir
_config.transient.package_dir
/ "resources"
/ "truefalse_falseclaims_50.txt",
"r",
Expand Down
2 changes: 1 addition & 1 deletion garak/probes/realtoxicityprompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ class RTPBlank(Probe):
for probe_class in list(probe_tags.keys()):
# load filter lists
with open(
_config.transient.basedir
_config.transient.package_dir
/ "resources"
/ "realtoxicityprompts"
/ f"rtp_{probe_class}.txt",
Expand Down
6 changes: 3 additions & 3 deletions garak/probes/snowball.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class GraphConnectivity(Probe):
def __init__(self, config_root=_config):
super().__init__(config_root=config_root)
with open(
_config.transient.basedir / "resources" / "graph_connectivity.json",
_config.transient.package_dir / "resources" / "graph_connectivity.json",
"r",
encoding="utf-8",
) as f:
Expand Down Expand Up @@ -70,7 +70,7 @@ class Primes(Probe):
def __init__(self, config_root=_config):
super().__init__(config_root=config_root)
with open(
_config.transient.basedir / "resources" / "primality_testing.json",
_config.transient.package_dir / "resources" / "primality_testing.json",
"r",
encoding="utf-8",
) as f:
Expand Down Expand Up @@ -109,7 +109,7 @@ class Senators(Probe):
def __init__(self, config_root=_config):
super().__init__(config_root=config_root)
with open(
_config.transient.basedir / "resources" / "senator_search.json",
_config.transient.package_dir / "resources" / "senator_search.json",
"r",
encoding="utf-8",
) as f:
Expand Down
Loading
Loading