Skip to content

Commit

Permalink
Merge branch 'develop' into develop-lx
Browse files Browse the repository at this point in the history
  • Loading branch information
lx200916 authored Nov 2, 2023
2 parents 53052fd + 503551d commit 88a8462
Show file tree
Hide file tree
Showing 13 changed files with 25 additions and 18 deletions.
4 changes: 2 additions & 2 deletions demo/main_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ int main() {
Net net(c->sub_param_, bn);
net.convert();
// net.Run();
ParamLoader param_loader("../models/llama-2-7b-fp32.mllm");
// ParamLoader param_loader("../models/llama-2-7b-q4_0-LinearOnly.mllm");
// ParamLoader param_loader("../models/llama-2-7b-fp32.mllm");
ParamLoader param_loader("../models/llama-2-7b-q4_0-LinearOnly.mllm");
Executor ex(&net, &param_loader);
// Executor ex(&net);
shared_ptr<Tensor> input = std::make_shared<Tensor>();
Expand Down
4 changes: 2 additions & 2 deletions src/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
namespace mllm {
void Executor::init() {
//
weights_dtype_ = MLLM_TYPE_F32;
activation_dtype_ = MLLM_TYPE_F32;
// weights_dtype_ = MLLM_TYPE_F32;
// activation_dtype_ = MLLM_TYPE_F32;
}

void Executor::execute(vector<int> input_size) {
Expand Down
4 changes: 2 additions & 2 deletions src/Executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ class Executor {
vector<shared_ptr<Tensor>> result_;
ParamLoader *data_loader_;

DataType weights_dtype_;
DataType activation_dtype_;
// DataType weights_dtype_;
// DataType activation_dtype_;
};

} // namespace mllm
Expand Down
7 changes: 6 additions & 1 deletion src/Graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ Graph::Graph(const NetParameter &param, Backend *bn, unordered_map<string, share
my_op.reset(new_op);
// string lname = net_op->name;
// my_op->setName(lname);
my_op->setDtype(weights_dtype_, activation_dtype_);
auto op_type = net_op->type;
if(op_type ==LINEAR || op_type == ATTENTION){
my_op->setDtype(weights_dtype_, activation_dtype_);
} else{
my_op->setDtype(MLLM_TYPE_F32, activation_dtype_);
}
ops_[net_op->name] = my_op;
}
// shapeInit(external_tensors);
Expand Down
2 changes: 1 addition & 1 deletion src/Graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class Graph {
unordered_map<string, shared_ptr<Op>> ops_; // opname: op
// unordered_map<string, shared_ptr<Tensor>> external_tensors_;

DataType weights_dtype_ = MLLM_TYPE_F32;
DataType weights_dtype_ = MLLM_TYPE_Q4_0;//MLLM_TYPE_F32;
DataType activation_dtype_ = MLLM_TYPE_F32;
};

Expand Down
4 changes: 2 additions & 2 deletions src/Net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Net::Net(const vector<NetParameter> &param, BackendConfig config) :
tensor_names_[0].push_back(in_tensor->name);
printf("Net init\n");
}

/*
void Net::convert() {
// auto bn = new CPUBackend(mm); //TODO
// backends_["cpu"] = bn;
Expand All @@ -64,7 +64,7 @@ void Net::convert() {
subg_1.reset(new Graph(sub_param, backends_[BackendType::MLLM_CPU], tensors_));
subGraphs_["G" + std::to_string(i)] = subg_1;
}
}
}*/

void Net::convert(BackendType backend_type) {
for (int i = 0; i < (int)net_param_.size(); ++i) {
Expand Down
4 changes: 2 additions & 2 deletions src/Net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ class Net {
explicit Net(const vector<NetParameter> &param, BackendConfig config);
virtual ~Net() = default;

void convert();
//void convert();
// TODO: remove
// convert all subgraph to specified backend, just for develop
void convert(BackendType backend_type);
void convert(BackendType backend_type = BackendType::MLLM_CPU);

void reshapeInput();
void reshapeInput(vector<int> shape);
Expand Down
6 changes: 3 additions & 3 deletions src/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void Tensor::alloc() {
// 如果原有内存已经分配,则释放它
backend_->free(host_ptr_);
}
backend_->alloc(&host_ptr_, CntSize());
backend_->alloc(&host_ptr_, cntSize());
allocated_ = true;
}

Expand Down Expand Up @@ -94,13 +94,13 @@ void Tensor::copyFrom(const Tensor &source, bool copy_diff, bool reshape) {
CHECK_EQ(source.dtype(), dtype());
CHECK_EQ(source.count(), count());
// copy
memcpy(host_ptr_, source.host_ptr_, CntSize());
memcpy(host_ptr_, source.host_ptr_, cntSize());
}
void Tensor::copyFrom(const shared_ptr<Tensor> &source, bool reshape) {
CHECK_EQ(source->dtype(), dtype());
CHECK_EQ(source->count(), count());
// copy
memcpy(host_ptr_, source->host_ptr_, CntSize());
memcpy(host_ptr_, source->host_ptr_, cntSize());
}
void Tensor::permute(int axis0, int axis1, int axis2, int axis3, bool copy) {
// 检查轴的合法性
Expand Down
2 changes: 1 addition & 1 deletion src/Tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class Tensor {
return dtype_;
}

int CntSize() {
int cntSize() {
return DataTypeSize(dtype_, count_);
}

Expand Down
2 changes: 1 addition & 1 deletion src/backends/cpu/CPUEmbedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ErrorCode mllm::CPUEmbedding::reshape(vector<shared_ptr<Tensor>> inputs, vector<
outputs[0]->setDtype(activationDtype());
weight_.reshape(1, 1, vocabSize_, hiddenSize_);
weight_.setName(name() + ".weight");
weight_.setDtype(weightsDtype());
weight_.setDtype(MLLM_TYPE_F32);
return NO_ERROR;
}
ErrorCode mllm::CPUEmbedding::setUp(vector<shared_ptr<Tensor>> inputs, vector<shared_ptr<Tensor>> outputs) {
Expand Down
2 changes: 1 addition & 1 deletion src/backends/cpu/CPURMSNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ErrorCode CPURMSNorm::reshape(vector<shared_ptr<Tensor>> inputs, vector<shared_p
// RMSNorm 类似于LayerNorm作用于channel维度
weight_.reshape(1, 1, 1, inputs[0]->dimension()); // (C, 1, 1, 1)
weight_.setName(name() + ".weight");
weight_.setDtype(weightsDtype());
weight_.setDtype(MLLM_TYPE_F32);
outputs[0]->reshape(inputs[0]->batch(), inputs[0]->shape(1), inputs[0]->shape(2), inputs[0]->shape(3));
outputs[0]->setDtype(activationDtype());
std::cout << name() << " CPURMSNorm reshape" << std::endl;
Expand Down
1 change: 1 addition & 0 deletions src/quantizer/ParamWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ static void writeInt(FILE *fp, int32_t val) {
static void writeString(FILE *fp, const std::string &str) {
writeInt(fp, str.size());
fwrite(str.c_str(), sizeof(char), str.size(), fp);

}
static void write_dtype(FILE *fp, DataType dtype) {
writeInt(fp, dtype);
Expand Down
1 change: 1 addition & 0 deletions src/quantizer/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <string>
#include "QuantWriter.hpp"


int main(int argc, char **argv) {
if (argc != 4) {
std::cout << "Usage: ./quantize <input_path> <output_path> <quant_type>\n";
Expand Down

0 comments on commit 88a8462

Please sign in to comment.