Skip to content

Commit

Permalink
v1
Browse files Browse the repository at this point in the history
  • Loading branch information
SH-Src committed Jul 10, 2024
1 parent c6cc8a3 commit 2a459cc
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 0 deletions.
1 change: 1 addition & 0 deletions rdagent/app/data_mining/benchmark/Results_MobileNet.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions rdagent/app/data_mining/benchmark/Results_ResNet.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions rdagent/app/data_mining/benchmark/nasbench201_cifar10.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

104 changes: 104 additions & 0 deletions rdagent/app/data_mining/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import json

import numpy as np
from rdagent.oai.llm_utils import APIBackend

benchmark_file = open('rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json')
data = json.load(benchmark_file)
keys = list(data.keys())
rank = np.array([data[k]['mean_acc'] for k in keys]).argsort().argsort()
for k, r in zip(keys, rank):
data[k]['rank'] = (3 ** 8) - r

max_k = None
max_acc = None
for k in keys:
if data[k]['rank'] == 300:
print(data[k]['mean_acc'])
print(k)

system_content = "You are an expert in the field of neural architecture search."

user_input = '''Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10.
We define the 3 available operations as the following:
0: Identity(in_channels, out_channels, stride)
1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3)
2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5)
The implementation of the Identity is as follows:
class Identity(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super(Identity, self).__init__()
if stride != 1 or in_channels != out_channels:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels),
)
else:
self.downsample = None
def forward(self, x):
if self.downsample is not None:
x = self.downsample(x)
return x
The implementation of the InvertedResidual is as follows:
class InvertedResidual(nn.Module):
def __init__(self, in_channels, out_channels, stride, expansion, kernel_size):
super(InvertedResidual, self).__init__()
hidden_dim = in_channels * expansion
self.conv = nn.Sequential(
nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(out_channels),
)
self.use_shortcut = in_channels == out_channels and stride == 1
def forward(self, x):
if self.use_shortcut:
return self.conv(x) + x
return self.conv(x)
The model architecture will be defined as the following.
{
layer1: {defined: True, operation: nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, bias=False)},
layer2: {defined: False, downsample: True , in_channels: 32, out_channels: 64 , stride: 2},
layer3: {defined: False, downsample: False, in_channels: 64, out_channels: 64 , stride: 1},
layer4: {defined: False, downsample: True , in_channels: 64, out_channels: 128, stride: 2},
layer5: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
layer6: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
layer7: {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2},
layer8: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
layer9: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
layer10: {defined: True, operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)},
layer11: {defined: True, operation: nn.AdaptiveAvgPool2d(output_size=1)},
layer12: {defined: True, operation: nn.Linear(in_features=1280, out_features=10)},
}
The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only.
Your response should be an operation ID list for the undefined layers. For example:
[1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9.
'''

experiments_prompt = lambda arch_list, acc_list : '''Here are some experimental results that you can use as a reference:
{}
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
'''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(arch, acc) for arch, acc in zip(arch_list, acc_list)]))

suffix = '''Please do not include anything other than the operation ID list in your response.'''

arch_list = []
acc_list = []

# res = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_input+suffix, system_prompt=system_content)
# operation_id_list = json.loads(res)
# operation_id_list_str = ''.join(str(opid) for opid in operation_id_list)
# print(data[operation_id_list_str]['mean_acc'])
80 changes: 80 additions & 0 deletions rdagent/scenarios/data_mining/prompts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
system_prompt: |-
You are an expert in the field of neural architecture search.
first_user_prompt: |-
"Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10.
We define the 3 available operations as the following:
0: Identity(in_channels, out_channels, stride)
1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3)
2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5)
The implementation of the Identity is as follows:
class Identity(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super(Identity, self).__init__()
if stride != 1 or in_channels != out_channels:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels),
)
else:
self.downsample = None
def forward(self, x):
if self.downsample is not None:
x = self.downsample(x)
return x
The implementation of the InvertedResidual is as follows:
class InvertedResidual(nn.Module):
def __init__(self, in_channels, out_channels, stride, expansion, kernel_size):
super(InvertedResidual, self).__init__()
hidden_dim = in_channels * expansion
self.conv = nn.Sequential(
nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(out_channels),
)
self.use_shortcut = in_channels == out_channels and stride == 1
def forward(self, x):
if self.use_shortcut:
return self.conv(x) + x
return self.conv(x)
The model architecture will be defined as the following.
{
layer1: {defined: True, operation: nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, bias=False)},
layer2: {defined: False, downsample: True , in_channels: 32, out_channels: 64 , stride: 2},
layer3: {defined: False, downsample: False, in_channels: 64, out_channels: 64 , stride: 1},
layer4: {defined: False, downsample: True , in_channels: 64, out_channels: 128, stride: 2},
layer5: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
layer6: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
layer7: {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2},
layer8: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
layer9: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
layer10: {defined: True, operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)},
layer11: {defined: True, operation: nn.AdaptiveAvgPool2d(output_size=1)},
layer12: {defined: True, operation: nn.Linear(in_features=1280, out_features=10)},
}
The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only.
Your response should be an operation ID list for the undefined layers. For example:
[1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9.
"
experiments_prompt: |-
Here are some experimental results that you can use as a reference:
{}
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
suffix: |-
Please do not include anything other than the operation ID list in your response.
90 changes: 90 additions & 0 deletions rdagent/scenarios/data_mining/proposal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import json
from pathlib import Path
from typing import Sequence
from rdagent.core.experiment import Experiment
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import Hypothesis, Hypothesis2Experiment, HypothesisExperiment2Feedback, HypothesisFeedback, HypothesisGen, Trace
from rdagent.core.scenario import Scenario
from rdagent.oai.llm_utils import APIBackend

prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml")

# print(prompt_dict)
class DMScenario(Scenario):
@property
def backgroud(self) -> str:
return prompt_dict["system_prompt"]

@property
def source_data(self) -> Path:
return Path('rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json')

@property
def initial_prompt(self) -> str:
return prompt_dict["first_user_prompt"]

@property
def suffix(self) -> str:
return prompt_dict["suffix"]

class DMTrace(Trace):
def __init__(self, scen) -> None:
self.scen = scen
self.hist = []

def add_exp(self, new_exp):
self.hist.append(new_exp)

def get_exp_prompts(self):
if len(self.hist) == 0:
return ''
else:
return '''Here are some experimental results that you can use as a reference:{}
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
'''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(x[0], x[1]) for x in self.hist]))

class DMHypothesis(Hypothesis):
def __init__(self, hypothesis, reason):
super().__init__(hypothesis, reason)


class DMHypothesisGen(HypothesisGen):
def __init__(self, scen: DMScenario):
super().__init__(scen)

def gen(self, trace: DMTrace) -> Hypothesis:
system_prompts = self.scen.background()
user_prompt = self.scen.initial_prompt()
exp_prompts = trace.get_exp_prompts()
suffix = self.scen.suffix()
res = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_prompt+exp_prompts+suffix, system_prompt=system_prompts)
operation_id_list = json.loads(res)
operation_id_list_str = ''.join(str(opid) for opid in operation_id_list)
return DMHypothesis(operation_id_list_str, None)

class DMHypotheis2exp(Hypothesis2Experiment):
def __init__(self, scen) -> None:
super().__init__()
self.data = json.load(open(scen.source_data()))

def generate(self, hypothesis):
return DMExperiments([self.data[hypothesis.hypothesis]])

class DMExperiments(Experiment):
def __init__(self, sub_tasks: Sequence) -> None:
super().__init__(sub_tasks)

class DMFeedback(HypothesisFeedback):
def __init__(self, acc) -> None:
super().__init__()
self.acc = acc

class DMexp2feedback(HypothesisExperiment2Feedback):
def __init__(self) -> None:
super().__init__()

def generateFeedback(self, ti: Experiment, hypothesis: Hypothesis, trace: Trace):

return


0 comments on commit 2a459cc

Please sign in to comment.