Skip to content

Commit

Permalink
fix: weight.DType
Browse files Browse the repository at this point in the history
  • Loading branch information
yirongjie committed Nov 2, 2023
1 parent 8471867 commit 503551d
Show file tree
Hide file tree
Showing 13 changed files with 30 additions and 27 deletions.
4 changes: 2 additions & 2 deletions demo/main_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ int main() {
Net net(c->sub_param_, bn);
net.convert();
// net.Run();
ParamLoader param_loader("../models/llama-2-7b-fp32.mllm");
// ParamLoader param_loader("../models/llama-2-7b-q4_0-LinearOnly.mllm");
// ParamLoader param_loader("../models/llama-2-7b-fp32.mllm");
ParamLoader param_loader("../models/llama-2-7b-q4_0-LinearOnly.mllm");
Executor ex(&net, &param_loader);
// Executor ex(&net);
shared_ptr<Tensor> input = std::make_shared<Tensor>();
Expand Down
4 changes: 2 additions & 2 deletions src/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
namespace mllm {
void Executor::init() {
//
weights_dtype_ = MLLM_TYPE_F32;
activation_dtype_ = MLLM_TYPE_F32;
// weights_dtype_ = MLLM_TYPE_F32;
// activation_dtype_ = MLLM_TYPE_F32;
}

void Executor::execute(vector<int> input_size) {
Expand Down
4 changes: 2 additions & 2 deletions src/Executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ class Executor {
vector<shared_ptr<Tensor>> result_;
ParamLoader *data_loader_;

DataType weights_dtype_;
DataType activation_dtype_;
// DataType weights_dtype_;
// DataType activation_dtype_;
};

} // namespace mllm
Expand Down
7 changes: 6 additions & 1 deletion src/Graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ Graph::Graph(const NetParameter &param, Backend *bn, unordered_map<string, share
my_op.reset(new_op);
// string lname = net_op->name;
// my_op->setName(lname);
my_op->setDtype(weights_dtype_, activation_dtype_);
auto op_type = net_op->type;
if(op_type ==LINEAR || op_type == ATTENTION){
my_op->setDtype(weights_dtype_, activation_dtype_);
} else{
my_op->setDtype(MLLM_TYPE_F32, activation_dtype_);
}
ops_[net_op->name] = my_op;
}
// shapeInit(external_tensors);
Expand Down
2 changes: 1 addition & 1 deletion src/Graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class Graph {
unordered_map<string, shared_ptr<Op>> ops_; // opname: op
// unordered_map<string, shared_ptr<Tensor>> external_tensors_;

DataType weights_dtype_ = MLLM_TYPE_F32;
DataType weights_dtype_ = MLLM_TYPE_Q4_0;//MLLM_TYPE_F32;
DataType activation_dtype_ = MLLM_TYPE_F32;
};

Expand Down
4 changes: 2 additions & 2 deletions src/Net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Net::Net(const vector<NetParameter> &param, BackendConfig config) :
tensor_names_[0].push_back(in_tensor->name);
printf("Net init\n");
}

/*
void Net::convert() {
// auto bn = new CPUBackend(mm); //TODO
// backends_["cpu"] = bn;
Expand All @@ -64,7 +64,7 @@ void Net::convert() {
subg_1.reset(new Graph(sub_param, backends_[BackendType::MLLM_CPU], tensors_));
subGraphs_["G" + std::to_string(i)] = subg_1;
}
}
}*/

void Net::convert(BackendType backend_type) {
for (int i = 0; i < (int)net_param_.size(); ++i) {
Expand Down
4 changes: 2 additions & 2 deletions src/Net.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ class Net {
explicit Net(const vector<NetParameter> &param, BackendConfig config);
virtual ~Net() = default;

void convert();
//void convert();
// TODO: remove
// convert all subgraph to specified backend, just for develop
void convert(BackendType backend_type);
void convert(BackendType backend_type = BackendType::MLLM_CPU);

void reshapeInput();
void reshapeInput(vector<int> shape);
Expand Down
6 changes: 3 additions & 3 deletions src/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ void Tensor::alloc() {
// 如果原有内存已经分配,则释放它
backend_->free(host_ptr_);
}
backend_->alloc(&host_ptr_, CntSize());
backend_->alloc(&host_ptr_, cntSize());
allocated_ = true;
}

Expand Down Expand Up @@ -94,13 +94,13 @@ void Tensor::copyFrom(const Tensor &source, bool copy_diff, bool reshape) {
CHECK_EQ(source.dtype(), dtype());
CHECK_EQ(source.count(), count());
// copy
memcpy(host_ptr_, source.host_ptr_, CntSize());
memcpy(host_ptr_, source.host_ptr_, cntSize());
}
void Tensor::copyFrom(const shared_ptr<Tensor> &source, bool reshape) {
CHECK_EQ(source->dtype(), dtype());
CHECK_EQ(source->count(), count());
// copy
memcpy(host_ptr_, source->host_ptr_, CntSize());
memcpy(host_ptr_, source->host_ptr_, cntSize());
}
void Tensor::permute(int axis0, int axis1, int axis2, int axis3, bool copy) {
// 检查轴的合法性
Expand Down
2 changes: 1 addition & 1 deletion src/Tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class Tensor {
return dtype_;
}

int CntSize() {
int cntSize() {
return DataTypeSize(dtype_, count_);
}

Expand Down
2 changes: 1 addition & 1 deletion src/backends/cpu/CPUEmbedding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ErrorCode mllm::CPUEmbedding::reshape(vector<shared_ptr<Tensor>> inputs, vector<
outputs[0]->setDtype(activationDtype());
weight_.reshape(1, 1, vocabSize_, hiddenSize_);
weight_.setName(name() + ".weight");
weight_.setDtype(weightsDtype());
weight_.setDtype(MLLM_TYPE_F32);
return NO_ERROR;
}
ErrorCode mllm::CPUEmbedding::setUp(vector<shared_ptr<Tensor>> inputs, vector<shared_ptr<Tensor>> outputs) {
Expand Down
2 changes: 1 addition & 1 deletion src/backends/cpu/CPURMSNorm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ErrorCode CPURMSNorm::reshape(vector<shared_ptr<Tensor>> inputs, vector<shared_p
// RMSNorm 类似于LayerNorm作用于channel维度
weight_.reshape(1, 1, 1, inputs[0]->dimension()); // (C, 1, 1, 1)
weight_.setName(name() + ".weight");
weight_.setDtype(weightsDtype());
weight_.setDtype(MLLM_TYPE_F32);
outputs[0]->reshape(inputs[0]->batch(), inputs[0]->shape(1), inputs[0]->shape(2), inputs[0]->shape(3));
outputs[0]->setDtype(activationDtype());
std::cout << name() << " CPURMSNorm reshape" << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion src/quantizer/ParamWriter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ static void writeInt(FILE *fp, int32_t val) {
}
static void writeString(FILE *fp, const std::string &str) {
writeInt(fp, str.size());
fwrite(str.c_str(), str.size(), 1, fp);
fwrite(str.c_str(), sizeof(char ), str.size(), fp);
}
static void write_dtype(FILE *fp, DataType dtype) {
writeInt(fp, dtype);
Expand Down
14 changes: 6 additions & 8 deletions src/quantizer/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,14 @@ void QuantWriter::QuantParams(DataType dataType) {
if (quant_ptr != nullptr) {
std::cout<<name<<std::endl;
if (name.find("norm") != std::string::npos) {
auto s = param_loader_->offsets_[name].second / sizeof(float);
auto tsize = alloc_quant_block(s, MLLM_TYPE_F32).second;
writeParam(name, MLLM_TYPE_F32, param, tsize);
std::cout<<"-----has norm-----"<<tsize<<std::endl;
auto s = param_loader_->offsets_[name].second;
writeParam(name, MLLM_TYPE_F32, param, s);
std::cout<<"-----has norm-----"<<s<<std::endl;
}
else if (name.find("tok_embeddings") != std::string::npos){
auto s = param_loader_->offsets_[name].second / sizeof(float);
auto tsize = alloc_quant_block(s, MLLM_TYPE_F32).second;
writeParam(name, MLLM_TYPE_F32, param, tsize);
std::cout<<"-----has ebd-----"<<tsize<<std::endl;
auto s = param_loader_->offsets_[name].second;
writeParam(name, MLLM_TYPE_F32, param, s);
std::cout<<"-----has ebd-----"<<s<<std::endl;
}else {
writeParam(name, quant_type_, quant_ptr, size);
}
Expand Down

0 comments on commit 503551d

Please sign in to comment.