From c5c8eff9c7b83c8d39e288bad5c3d6d18fa65479 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 25 Oct 2023 19:35:23 +0000 Subject: [PATCH 1/4] remove duplicated blacklist mock during setup --- neurons/validators/validator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neurons/validators/validator.py b/neurons/validators/validator.py index 869c918..9180090 100644 --- a/neurons/validators/validator.py +++ b/neurons/validators/validator.py @@ -190,7 +190,6 @@ def __init__(self): MockRewardModel(RewardModelType.nsfw.value), ] bt.logging.debug(str(self.reward_functions)) - self.blacklist = MockRewardModel(RewardModelType.blacklist.value) else: self.reward_weights = torch.tensor( [ From 71098add455929f66de4665c7cc02abed600c9ed Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 25 Oct 2023 23:39:59 +0000 Subject: [PATCH 2/4] consistent use of mock reward model --- neurons/validators/validator.py | 1 - prompting/validators/mock.py | 18 +++++++++++++++++- prompting/validators/reward/__init__.py | 1 - prompting/validators/reward/reward.py | 16 ---------------- prompting/validators/utils.py | 2 +- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/neurons/validators/validator.py b/neurons/validators/validator.py index 9180090..be24e0d 100644 --- a/neurons/validators/validator.py +++ b/neurons/validators/validator.py @@ -52,7 +52,6 @@ OpenAssistantRewardModel, ReciprocateRewardModel, RelevanceRewardModel, - MockRewardModel, DahoasRewardModel, DiversityRewardModel, PromptRewardModel, diff --git a/prompting/validators/mock.py b/prompting/validators/mock.py index 5c1056c..e71b257 100644 --- a/prompting/validators/mock.py +++ b/prompting/validators/mock.py @@ -45,7 +45,23 @@ def resync( pass -class MockRewardModel(torch.nn.Module): +class MockRewardModel(BaseRewardModel): + @property + def name(self) -> str: + return self.mock_name + + def __init__(self, mock_name: str = "MockReward"): + super().__init__() + self.mock_name = mock_name + self.question_blacklist = {} + + def apply(self, prompt: str, completion: List[str], name: str) -> torch.FloatTensor: + mock_reward = torch.tensor([1 for _ in completion], dtype=torch.float32) + return mock_reward, mock_reward + + def reset(self): + return self + def reward( self, completions_with_prompt: List[str], diff --git a/prompting/validators/reward/__init__.py b/prompting/validators/reward/__init__.py index d26773f..68d9757 100644 --- a/prompting/validators/reward/__init__.py +++ b/prompting/validators/reward/__init__.py @@ -6,7 +6,6 @@ from .reciprocate import ReciprocateRewardModel from .relevance import RelevanceRewardModel from .reward import BaseRewardModel -from .reward import MockRewardModel from .dahoas import DahoasRewardModel from .diversity import DiversityRewardModel from .prompt import PromptRewardModel diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 23e7479..489466c 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -138,19 +138,3 @@ def apply( # Return the filled rewards. return filled_rewards, filled_rewards_normalized - -class MockRewardModel(BaseRewardModel): - @property - def name(self) -> str: - return self.mock_name - - def __init__(self, mock_name: str = "MockReward"): - super().__init__() - self.mock_name = mock_name - - def apply(self, prompt: str, completion: List[str], name: str) -> torch.FloatTensor: - mock_reward = torch.tensor([1 for _ in completion], dtype=torch.float32) - return mock_reward, mock_reward - - def reset(self): - return self diff --git a/prompting/validators/utils.py b/prompting/validators/utils.py index f39daed..f9a294c 100644 --- a/prompting/validators/utils.py +++ b/prompting/validators/utils.py @@ -23,7 +23,7 @@ import bittensor as bt import prompting.validators as validators from prompting.validators.misc import ttl_get_block -from prompting.validators.reward import MockRewardModel +from prompting.validators.mock import MockRewardModel def should_reinit_wandb(self): From f6b8882c4a764ccc327efee1abcf5c4462579c59 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Wed, 25 Oct 2023 23:50:09 +0000 Subject: [PATCH 3/4] run black --- prompting/validators/reward/reward.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prompting/validators/reward/reward.py b/prompting/validators/reward/reward.py index 489466c..c20220d 100644 --- a/prompting/validators/reward/reward.py +++ b/prompting/validators/reward/reward.py @@ -137,4 +137,3 @@ def apply( # Return the filled rewards. return filled_rewards, filled_rewards_normalized - From 4d5e9894bb3b2a1e26f48b2ab9306b0792f0f9c8 Mon Sep 17 00:00:00 2001 From: ifrit98 Date: Thu, 26 Oct 2023 13:04:04 +0000 Subject: [PATCH 4/4] import and mock attr fixes --- prompting/validators/mock.py | 6 ++++-- prompting/validators/utils.py | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/prompting/validators/mock.py b/prompting/validators/mock.py index e71b257..ae07e76 100644 --- a/prompting/validators/mock.py +++ b/prompting/validators/mock.py @@ -21,13 +21,14 @@ import bittensor as bt from prompting.validators.prompts import FirewallPrompt, FollowupPrompt, AnswerPrompt from prompting.validators.gating import BaseGatingModel +from prompting.validators.reward import BaseRewardModel from typing import List class MockGatingModel(BaseGatingModel): def __init__(self, num_uids: int): super(MockGatingModel, self).__init__() - # super(MockGatingModel, self).__init__() + self.num_uids = num_uids self.linear = torch.nn.Linear(256, 10) @@ -53,7 +54,8 @@ def name(self) -> str: def __init__(self, mock_name: str = "MockReward"): super().__init__() self.mock_name = mock_name - self.question_blacklist = {} + self.question_blacklist = [] + self.answer_blacklist = [] def apply(self, prompt: str, completion: List[str], name: str) -> torch.FloatTensor: mock_reward = torch.tensor([1 for _ in completion], dtype=torch.float32) diff --git a/prompting/validators/utils.py b/prompting/validators/utils.py index f9a294c..072542e 100644 --- a/prompting/validators/utils.py +++ b/prompting/validators/utils.py @@ -23,7 +23,6 @@ import bittensor as bt import prompting.validators as validators from prompting.validators.misc import ttl_get_block -from prompting.validators.mock import MockRewardModel def should_reinit_wandb(self): @@ -49,7 +48,7 @@ def init_wandb(self, reinit=False): if self.config.neuron.use_custom_gating_model: tags.append("custom_gating_model") for fn in self.reward_functions: - if not isinstance(fn, MockRewardModel): + if not self.config.neuron.mock_reward_models: tags.append(str(fn.name)) if self.config.neuron.disable_set_weights: tags.append("disable_set_weights")