Skip to content

Commit

Permalink
separate out a policy harness, add a hook to let it do its magic
Browse files Browse the repository at this point in the history
  • Loading branch information
leondz committed Feb 20, 2025
1 parent dc39223 commit a23302c
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 18 deletions.
14 changes: 7 additions & 7 deletions garak/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,11 @@ def plugin_info(plugin_name):


# do a run
def probewise_run(generator, probe_names, evaluator, buffs, policy_run=False):
def probewise_run(generator, probe_names, evaluator, buffs):
import garak.harnesses.probewise

probewise_h = garak.harnesses.probewise.ProbewiseHarness()
return list(
probewise_h.run(generator, probe_names, evaluator, buffs, policy_run=policy_run)
)
return list(probewise_h.run(generator, probe_names, evaluator, buffs))


def pxd_run(generator, probe_names, detector_names, evaluator, buffs):
Expand Down Expand Up @@ -321,9 +319,11 @@ def run_policy_scan(generator, _config):

evaluator = garak.evaluators.ThresholdEvaluator(_config.run.eval_threshold)
buffs = []
result = probewise_run(
generator, policy_probe_names, evaluator, buffs, policy_run=True
)

import garak.harnesses.probewise

policy_h = garak.harnesses.probewise.PolicyHarness()
result = list(policy_h.run(generator, policy_probe_names, evaluator, buffs))

policy = garak.policy.Policy()
policy.parse_eval_result(result, threshold=_config.policy.threshold)
Expand Down
30 changes: 19 additions & 11 deletions garak/harnesses/probewise.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@


class ProbewiseHarness(Harness):

def _load_detector(self, detector_name: str) -> Detector:
detector = _plugins.load_plugin(
"detectors." + detector_name, break_on_fail=False
Expand All @@ -27,7 +28,10 @@ def _load_detector(self, detector_name: str) -> Detector:
logging.error(f" detector load failed: {detector_name}, skipping >>")
return False

def run(self, model, probenames, evaluator, buff_names=None, policy_run=False):
def _probe_check(self, probe):
return probe

def run(self, model, probenames, evaluator, buff_names=None):
"""Execute a probe-by-probe scan
Probes are executed in name order. For each probe, the detectors
Expand All @@ -54,16 +58,16 @@ def run(self, model, probenames, evaluator, buff_names=None, policy_run=False):
:type buff_names: List[str]
"""

if buff_names is None:
buff_names = []

if not probenames:
msg = "No probes, nothing to do"
logging.warning(msg)
if hasattr(_config.system, "verbose") and _config.system.verbose >= 2:
print(msg)
raise ValueError(msg)

if buff_names is None:
buff_names = []

self._load_buffs(buff_names)

probenames = sorted(probenames)
Expand All @@ -83,13 +87,7 @@ def run(self, model, probenames, evaluator, buff_names=None, policy_run=False):
continue
detectors = []

if (
policy_run
): # policy run conditions: probe is policy probe; use different generation count (def. 1)
assert (
probe.policy_probe == True
), "only policy probes should be used in policy runs"
setattr(probe, "generations", _config.policy.generations)
probe = self._probe_check(probe)

if probe.primary_detector:
d = self._load_detector(probe.primary_detector)
Expand All @@ -116,3 +114,13 @@ def run(self, model, probenames, evaluator, buff_names=None, policy_run=False):
result = h._execute(model, [probe], detectors, evaluator)
yield list(result) # ensure the generator is executed
logging.debug("harness probewise: complete")


class PolicyHarness(ProbewiseHarness):

def _probe_check(self, probe):
assert (
probe.policy_probe == True
), "only policy probes should be used in policy runs"
setattr(probe, "generations", _config.policy.generations)
return probe

0 comments on commit a23302c

Please sign in to comment.