v1

microsoft · Jul 10, 2024 · 2a459cc · 2a459cc
1 parent c6cc8a3
commit 2a459cc
Show file tree

Hide file tree

Showing 9 changed files with 280 additions and 0 deletions.
diff --git a/rdagent/app/data_mining/benchmark/Results_MobileNet.json b/rdagent/app/data_mining/benchmark/Results_MobileNet.json
diff --git a/rdagent/app/data_mining/benchmark/Results_ResNet.json b/rdagent/app/data_mining/benchmark/Results_ResNet.json
diff --git a/rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json b/rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json
diff --git a/rdagent/app/data_mining/benchmark/nasbench201_cifar10.json b/rdagent/app/data_mining/benchmark/nasbench201_cifar10.json
diff --git a/rdagent/app/data_mining/benchmark/nasbench201_cifar100.json b/rdagent/app/data_mining/benchmark/nasbench201_cifar100.json
diff --git a/rdagent/app/data_mining/benchmark/nasbench201_imagenet.json b/rdagent/app/data_mining/benchmark/nasbench201_imagenet.json
diff --git a/rdagent/app/data_mining/search.py b/rdagent/app/data_mining/search.py
@@ -0,0 +1,104 @@
+import json
+
+import numpy as np
+from rdagent.oai.llm_utils import APIBackend
+
+benchmark_file = open('rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json')
+data = json.load(benchmark_file)
+keys = list(data.keys())
+rank = np.array([data[k]['mean_acc'] for k in keys]).argsort().argsort()
+for k, r in zip(keys, rank):
+    data[k]['rank'] = (3 ** 8) - r
+
+max_k = None
+max_acc = None
+for k in keys:
+    if data[k]['rank'] == 300:
+        print(data[k]['mean_acc'])
+        print(k)
+
+system_content = "You are an expert in the field of neural architecture search."
+
+user_input = '''Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10.
+
+We define the 3 available operations as the following:
+0: Identity(in_channels, out_channels, stride)
+1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3)
+2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5)
+
+The implementation of the Identity is as follows:
+class Identity(nn.Module):
+    def __init__(self, in_channels, out_channels, stride):
+        super(Identity, self).__init__()
+        if stride != 1 or in_channels != out_channels:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_channels),
+            )
+        else:
+            self.downsample = None
+
+    def forward(self, x):
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+
+The implementation of the InvertedResidual is as follows:
+class InvertedResidual(nn.Module):
+    def __init__(self, in_channels, out_channels, stride, expansion, kernel_size):
+        super(InvertedResidual, self).__init__()
+        hidden_dim = in_channels * expansion
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False),
+            nn.BatchNorm2d(hidden_dim),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False),
+            nn.BatchNorm2d(hidden_dim),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(out_channels),
+        )
+        self.use_shortcut = in_channels == out_channels and stride == 1
+
+    def forward(self, x):
+        if self.use_shortcut:
+            return self.conv(x) + x
+        return self.conv(x)
+        
+
+The model architecture will be defined as the following.
+{
+    layer1:  {defined: True,  operation: nn.Conv2d(in_channels=3,  out_channels=32, kernel_size=3, padding=1, bias=False)},
+    layer2:  {defined: False, downsample: True , in_channels: 32,  out_channels: 64 , stride: 2},
+    layer3:  {defined: False, downsample: False, in_channels: 64,  out_channels: 64 , stride: 1},
+    layer4:  {defined: False, downsample: True , in_channels: 64,  out_channels: 128, stride: 2},
+    layer5:  {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
+    layer6:  {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
+    layer7:  {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2},
+    layer8:  {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
+    layer9:  {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
+    layer10: {defined: True,  operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)},
+    layer11: {defined: True,  operation: nn.AdaptiveAvgPool2d(output_size=1)},
+    layer12: {defined: True,  operation: nn.Linear(in_features=1280, out_features=10)},
+}
+
+The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only. 
+
+Your response should be an operation ID list for the undefined layers. For example:
+[1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9.
+'''
+
+experiments_prompt = lambda arch_list, acc_list : '''Here are some experimental results that you can use as a reference:
+{}
+Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
+'''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(arch, acc) for arch, acc in zip(arch_list, acc_list)]))
+
+suffix = '''Please do not include anything other than the operation ID list in your response.'''
+
+arch_list = []
+acc_list = []
+
+# res = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_input+suffix, system_prompt=system_content)
+# operation_id_list = json.loads(res)
+# operation_id_list_str = ''.join(str(opid) for opid in operation_id_list)
+# print(data[operation_id_list_str]['mean_acc'])
diff --git a/rdagent/scenarios/data_mining/prompts.yaml b/rdagent/scenarios/data_mining/prompts.yaml
@@ -0,0 +1,80 @@
+system_prompt: |-
+    You are an expert in the field of neural architecture search.
+
+first_user_prompt: |-
+    "Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10.
+
+    We define the 3 available operations as the following:
+    0: Identity(in_channels, out_channels, stride)
+    1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3)
+    2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5)
+
+    The implementation of the Identity is as follows:
+    class Identity(nn.Module):
+        def __init__(self, in_channels, out_channels, stride):
+            super(Identity, self).__init__()
+            if stride != 1 or in_channels != out_channels:
+                self.downsample = nn.Sequential(
+                    nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
+                    nn.BatchNorm2d(out_channels),
+                )
+            else:
+                self.downsample = None
+
+        def forward(self, x):
+            if self.downsample is not None:
+                x = self.downsample(x)
+            return x
+
+    The implementation of the InvertedResidual is as follows:
+    class InvertedResidual(nn.Module):
+        def __init__(self, in_channels, out_channels, stride, expansion, kernel_size):
+            super(InvertedResidual, self).__init__()
+            hidden_dim = in_channels * expansion
+            self.conv = nn.Sequential(
+                nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False),
+                nn.BatchNorm2d(hidden_dim),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(out_channels),
+            )
+            self.use_shortcut = in_channels == out_channels and stride == 1
+
+        def forward(self, x):
+            if self.use_shortcut:
+                return self.conv(x) + x
+            return self.conv(x)
+            
+
+    The model architecture will be defined as the following.
+    {
+        layer1:  {defined: True,  operation: nn.Conv2d(in_channels=3,  out_channels=32, kernel_size=3, padding=1, bias=False)},
+        layer2:  {defined: False, downsample: True , in_channels: 32,  out_channels: 64 , stride: 2},
+        layer3:  {defined: False, downsample: False, in_channels: 64,  out_channels: 64 , stride: 1},
+        layer4:  {defined: False, downsample: True , in_channels: 64,  out_channels: 128, stride: 2},
+        layer5:  {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
+        layer6:  {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
+        layer7:  {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2},
+        layer8:  {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
+        layer9:  {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
+        layer10: {defined: True,  operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)},
+        layer11: {defined: True,  operation: nn.AdaptiveAvgPool2d(output_size=1)},
+        layer12: {defined: True,  operation: nn.Linear(in_features=1280, out_features=10)},
+    }
+
+    The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only. 
+
+    Your response should be an operation ID list for the undefined layers. For example:
+    [1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9.
+    "
+
+experiments_prompt: |-
+  Here are some experimental results that you can use as a reference:
+  {}
+  Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
+
+suffix: |-
+  Please do not include anything other than the operation ID list in your response.
diff --git a/rdagent/scenarios/data_mining/proposal.py b/rdagent/scenarios/data_mining/proposal.py
@@ -0,0 +1,90 @@
+import json
+from pathlib import Path
+from typing import Sequence
+from rdagent.core.experiment import Experiment
+from rdagent.core.prompts import Prompts
+from rdagent.core.proposal import Hypothesis, Hypothesis2Experiment, HypothesisExperiment2Feedback, HypothesisFeedback, HypothesisGen, Trace
+from rdagent.core.scenario import Scenario
+from rdagent.oai.llm_utils import APIBackend
+
+prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
+
+# print(prompt_dict)
+class DMScenario(Scenario):
+    @property
+    def backgroud(self) -> str:
+        return prompt_dict["system_prompt"]
+
+    @property
+    def source_data(self) -> Path:
+        return Path('rdagent/app/data_mining/benchmark/nas-bench-macro_cifar10.json')
+
+    @property
+    def initial_prompt(self) -> str:
+        return prompt_dict["first_user_prompt"]
+
+    @property
+    def suffix(self) -> str:
+        return prompt_dict["suffix"]
+
+class DMTrace(Trace):
+    def __init__(self, scen) -> None:
+        self.scen = scen
+        self.hist = []
+
+    def add_exp(self, new_exp):
+        self.hist.append(new_exp)
+
+    def get_exp_prompts(self):
+        if len(self.hist) == 0:
+            return ''
+        else:
+            return '''Here are some experimental results that you can use as a reference:{}
+            Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
+            '''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(x[0], x[1]) for x in self.hist]))
+
+class DMHypothesis(Hypothesis):
+    def __init__(self, hypothesis, reason):
+        super().__init__(hypothesis, reason)
+
+
+class DMHypothesisGen(HypothesisGen):
+    def __init__(self, scen: DMScenario):
+        super().__init__(scen)
+
+    def gen(self, trace: DMTrace) -> Hypothesis:
+        system_prompts = self.scen.background()
+        user_prompt = self.scen.initial_prompt()
+        exp_prompts = trace.get_exp_prompts()
+        suffix = self.scen.suffix()
+        res = APIBackend().build_messages_and_create_chat_completion(user_prompt=user_prompt+exp_prompts+suffix, system_prompt=system_prompts)
+        operation_id_list = json.loads(res)
+        operation_id_list_str = ''.join(str(opid) for opid in operation_id_list)
+        return DMHypothesis(operation_id_list_str, None)
+
+class DMHypotheis2exp(Hypothesis2Experiment):
+    def __init__(self, scen) -> None:
+        super().__init__()
+        self.data = json.load(open(scen.source_data()))
+
+    def generate(self, hypothesis):
+        return DMExperiments([self.data[hypothesis.hypothesis]])
+
+class DMExperiments(Experiment):
+    def __init__(self, sub_tasks: Sequence) -> None:
+        super().__init__(sub_tasks)
+
+class DMFeedback(HypothesisFeedback):
+    def __init__(self, acc) -> None:
+        super().__init__()
+        self.acc = acc
+
+class DMexp2feedback(HypothesisExperiment2Feedback):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def generateFeedback(self, ti: Experiment, hypothesis: Hypothesis, trace: Trace):
+
+        return 
+
+