-
-
Notifications
You must be signed in to change notification settings - Fork 119
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
280 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import json | ||
|
||
import numpy as np | ||
from rdagent.oai.llm_utils import APIBackend | ||
|
||
benchmark_file = open('rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json') | ||
data = json.load(benchmark_file) | ||
keys = list(data.keys()) | ||
rank = np.array([data[k]['mean_acc'] for k in keys]).argsort().argsort() | ||
for k, r in zip(keys, rank): | ||
data[k]['rank'] = (3 ** 8) - r | ||
|
||
max_k = None | ||
max_acc = None | ||
for k in keys: | ||
if data[k]['rank'] == 300: | ||
print(data[k]['mean_acc']) | ||
print(k) | ||
|
||
system_content = "You are an expert in the field of neural architecture search." | ||
|
||
user_input = '''Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10. | ||
We define the 3 available operations as the following: | ||
0: Identity(in_channels, out_channels, stride) | ||
1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3) | ||
2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5) | ||
The implementation of the Identity is as follows: | ||
class Identity(nn.Module): | ||
def __init__(self, in_channels, out_channels, stride): | ||
super(Identity, self).__init__() | ||
if stride != 1 or in_channels != out_channels: | ||
self.downsample = nn.Sequential( | ||
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), | ||
nn.BatchNorm2d(out_channels), | ||
) | ||
else: | ||
self.downsample = None | ||
def forward(self, x): | ||
if self.downsample is not None: | ||
x = self.downsample(x) | ||
return x | ||
The implementation of the InvertedResidual is as follows: | ||
class InvertedResidual(nn.Module): | ||
def __init__(self, in_channels, out_channels, stride, expansion, kernel_size): | ||
super(InvertedResidual, self).__init__() | ||
hidden_dim = in_channels * expansion | ||
self.conv = nn.Sequential( | ||
nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False), | ||
nn.BatchNorm2d(hidden_dim), | ||
nn.ReLU(inplace=True), | ||
nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False), | ||
nn.BatchNorm2d(hidden_dim), | ||
nn.ReLU(inplace=True), | ||
nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False), | ||
nn.BatchNorm2d(out_channels), | ||
) | ||
self.use_shortcut = in_channels == out_channels and stride == 1 | ||
def forward(self, x): | ||
if self.use_shortcut: | ||
return self.conv(x) + x | ||
return self.conv(x) | ||
The model architecture will be defined as the following. | ||
{ | ||
layer1: {defined: True, operation: nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, bias=False)}, | ||
layer2: {defined: False, downsample: True , in_channels: 32, out_channels: 64 , stride: 2}, | ||
layer3: {defined: False, downsample: False, in_channels: 64, out_channels: 64 , stride: 1}, | ||
layer4: {defined: False, downsample: True , in_channels: 64, out_channels: 128, stride: 2}, | ||
layer5: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1}, | ||
layer6: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1}, | ||
layer7: {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2}, | ||
layer8: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1}, | ||
layer9: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1}, | ||
layer10: {defined: True, operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)}, | ||
layer11: {defined: True, operation: nn.AdaptiveAvgPool2d(output_size=1)}, | ||
layer12: {defined: True, operation: nn.Linear(in_features=1280, out_features=10)}, | ||
} | ||
The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only. | ||
Your response should be an operation ID list for the undefined layers. For example: | ||
[1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9. | ||
''' | ||
|
||
experiments_prompt = lambda arch_list, acc_list : '''Here are some experimental results that you can use as a reference: | ||
{} | ||
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above. | ||
'''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(arch, acc) for arch, acc in zip(arch_list, acc_list)])) | ||
|
||
suffix = '''Please do not include anything other than the operation ID list in your response.''' | ||
|
||
arch_list = [] | ||
acc_list = [] | ||
|
||
# res = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_input+suffix, system_prompt=system_content) | ||
# operation_id_list = json.loads(res) | ||
# operation_id_list_str = ''.join(str(opid) for opid in operation_id_list) | ||
# print(data[operation_id_list_str]['mean_acc']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
system_prompt: |- | ||
You are an expert in the field of neural architecture search. | ||
first_user_prompt: |- | ||
"Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10. | ||
We define the 3 available operations as the following: | ||
0: Identity(in_channels, out_channels, stride) | ||
1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3) | ||
2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5) | ||
The implementation of the Identity is as follows: | ||
class Identity(nn.Module): | ||
def __init__(self, in_channels, out_channels, stride): | ||
super(Identity, self).__init__() | ||
if stride != 1 or in_channels != out_channels: | ||
self.downsample = nn.Sequential( | ||
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), | ||
nn.BatchNorm2d(out_channels), | ||
) | ||
else: | ||
self.downsample = None | ||
def forward(self, x): | ||
if self.downsample is not None: | ||
x = self.downsample(x) | ||
return x | ||
The implementation of the InvertedResidual is as follows: | ||
class InvertedResidual(nn.Module): | ||
def __init__(self, in_channels, out_channels, stride, expansion, kernel_size): | ||
super(InvertedResidual, self).__init__() | ||
hidden_dim = in_channels * expansion | ||
self.conv = nn.Sequential( | ||
nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False), | ||
nn.BatchNorm2d(hidden_dim), | ||
nn.ReLU(inplace=True), | ||
nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False), | ||
nn.BatchNorm2d(hidden_dim), | ||
nn.ReLU(inplace=True), | ||
nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False), | ||
nn.BatchNorm2d(out_channels), | ||
) | ||
self.use_shortcut = in_channels == out_channels and stride == 1 | ||
def forward(self, x): | ||
if self.use_shortcut: | ||
return self.conv(x) + x | ||
return self.conv(x) | ||
The model architecture will be defined as the following. | ||
{ | ||
layer1: {defined: True, operation: nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, bias=False)}, | ||
layer2: {defined: False, downsample: True , in_channels: 32, out_channels: 64 , stride: 2}, | ||
layer3: {defined: False, downsample: False, in_channels: 64, out_channels: 64 , stride: 1}, | ||
layer4: {defined: False, downsample: True , in_channels: 64, out_channels: 128, stride: 2}, | ||
layer5: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1}, | ||
layer6: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1}, | ||
layer7: {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2}, | ||
layer8: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1}, | ||
layer9: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1}, | ||
layer10: {defined: True, operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)}, | ||
layer11: {defined: True, operation: nn.AdaptiveAvgPool2d(output_size=1)}, | ||
layer12: {defined: True, operation: nn.Linear(in_features=1280, out_features=10)}, | ||
} | ||
The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only. | ||
Your response should be an operation ID list for the undefined layers. For example: | ||
[1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9. | ||
" | ||
experiments_prompt: |- | ||
Here are some experimental results that you can use as a reference: | ||
{} | ||
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above. | ||
suffix: |- | ||
Please do not include anything other than the operation ID list in your response. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import json | ||
from pathlib import Path | ||
from typing import Sequence | ||
from rdagent.core.experiment import Experiment | ||
from rdagent.core.prompts import Prompts | ||
from rdagent.core.proposal import Hypothesis, Hypothesis2Experiment, HypothesisExperiment2Feedback, HypothesisFeedback, HypothesisGen, Trace | ||
from rdagent.core.scenario import Scenario | ||
from rdagent.oai.llm_utils import APIBackend | ||
|
||
prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml") | ||
|
||
# print(prompt_dict) | ||
class DMScenario(Scenario): | ||
@property | ||
def backgroud(self) -> str: | ||
return prompt_dict["system_prompt"] | ||
|
||
@property | ||
def source_data(self) -> Path: | ||
return Path('rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json') | ||
|
||
@property | ||
def initial_prompt(self) -> str: | ||
return prompt_dict["first_user_prompt"] | ||
|
||
@property | ||
def suffix(self) -> str: | ||
return prompt_dict["suffix"] | ||
|
||
class DMTrace(Trace): | ||
def __init__(self, scen) -> None: | ||
self.scen = scen | ||
self.hist = [] | ||
|
||
def add_exp(self, new_exp): | ||
self.hist.append(new_exp) | ||
|
||
def get_exp_prompts(self): | ||
if len(self.hist) == 0: | ||
return '' | ||
else: | ||
return '''Here are some experimental results that you can use as a reference:{} | ||
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above. | ||
'''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(x[0], x[1]) for x in self.hist])) | ||
|
||
class DMHypothesis(Hypothesis): | ||
def __init__(self, hypothesis, reason): | ||
super().__init__(hypothesis, reason) | ||
|
||
|
||
class DMHypothesisGen(HypothesisGen): | ||
def __init__(self, scen: DMScenario): | ||
super().__init__(scen) | ||
|
||
def gen(self, trace: DMTrace) -> Hypothesis: | ||
system_prompts = self.scen.background() | ||
user_prompt = self.scen.initial_prompt() | ||
exp_prompts = trace.get_exp_prompts() | ||
suffix = self.scen.suffix() | ||
res = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_prompt+exp_prompts+suffix, system_prompt=system_prompts) | ||
operation_id_list = json.loads(res) | ||
operation_id_list_str = ''.join(str(opid) for opid in operation_id_list) | ||
return DMHypothesis(operation_id_list_str, None) | ||
|
||
class DMHypotheis2exp(Hypothesis2Experiment): | ||
def __init__(self, scen) -> None: | ||
super().__init__() | ||
self.data = json.load(open(scen.source_data())) | ||
|
||
def generate(self, hypothesis): | ||
return DMExperiments([self.data[hypothesis.hypothesis]]) | ||
|
||
class DMExperiments(Experiment): | ||
def __init__(self, sub_tasks: Sequence) -> None: | ||
super().__init__(sub_tasks) | ||
|
||
class DMFeedback(HypothesisFeedback): | ||
def __init__(self, acc) -> None: | ||
super().__init__() | ||
self.acc = acc | ||
|
||
class DMexp2feedback(HypothesisExperiment2Feedback): | ||
def __init__(self) -> None: | ||
super().__init__() | ||
|
||
def generateFeedback(self, ti: Experiment, hypothesis: Hypothesis, trace: Trace): | ||
|
||
return | ||
|
||
|