diff --git a/demo/main_test.cpp b/demo/main_test.cpp index 34020db6..807fca98 100644 --- a/demo/main_test.cpp +++ b/demo/main_test.cpp @@ -122,8 +122,8 @@ int main() { Net net(c->sub_param_, bn); net.convert(); // net.Run(); - ParamLoader param_loader("../models/llama-2-7b-fp32.mllm"); -// ParamLoader param_loader("../models/llama-2-7b-q4_0-LinearOnly.mllm"); +// ParamLoader param_loader("../models/llama-2-7b-fp32.mllm"); + ParamLoader param_loader("../models/llama-2-7b-q4_0-LinearOnly.mllm"); Executor ex(&net, ¶m_loader); // Executor ex(&net); shared_ptr input = std::make_shared(); diff --git a/src/Executor.cpp b/src/Executor.cpp index 1bd8cec8..c651fc06 100644 --- a/src/Executor.cpp +++ b/src/Executor.cpp @@ -2,8 +2,8 @@ namespace mllm { void Executor::init() { // - weights_dtype_ = MLLM_TYPE_F32; - activation_dtype_ = MLLM_TYPE_F32; +// weights_dtype_ = MLLM_TYPE_F32; +// activation_dtype_ = MLLM_TYPE_F32; } void Executor::execute(vector input_size) { diff --git a/src/Executor.hpp b/src/Executor.hpp index 4a0cc43c..ede7313d 100644 --- a/src/Executor.hpp +++ b/src/Executor.hpp @@ -76,8 +76,8 @@ class Executor { vector> result_; ParamLoader *data_loader_; - DataType weights_dtype_; - DataType activation_dtype_; +// DataType weights_dtype_; +// DataType activation_dtype_; }; } // namespace mllm diff --git a/src/Graph.cpp b/src/Graph.cpp index f1c111b5..32b7e9af 100644 --- a/src/Graph.cpp +++ b/src/Graph.cpp @@ -39,7 +39,12 @@ Graph::Graph(const NetParameter ¶m, Backend *bn, unordered_mapname; // my_op->setName(lname); - my_op->setDtype(weights_dtype_, activation_dtype_); + auto op_type = net_op->type; + if(op_type ==LINEAR || op_type == ATTENTION){ + my_op->setDtype(weights_dtype_, activation_dtype_); + } else{ + my_op->setDtype(MLLM_TYPE_F32, activation_dtype_); + } ops_[net_op->name] = my_op; } // shapeInit(external_tensors); diff --git a/src/Graph.hpp b/src/Graph.hpp index 698209a8..08f374fa 100644 --- a/src/Graph.hpp +++ b/src/Graph.hpp @@ -84,7 +84,7 @@ class Graph { unordered_map> ops_; // opname: op // unordered_map> external_tensors_; - DataType weights_dtype_ = MLLM_TYPE_F32; + DataType weights_dtype_ = MLLM_TYPE_Q4_0;//MLLM_TYPE_F32; DataType activation_dtype_ = MLLM_TYPE_F32; }; diff --git a/src/Net.cpp b/src/Net.cpp index fdc426a4..3534c104 100644 --- a/src/Net.cpp +++ b/src/Net.cpp @@ -50,7 +50,7 @@ Net::Net(const vector ¶m, BackendConfig config) : tensor_names_[0].push_back(in_tensor->name); printf("Net init\n"); } - +/* void Net::convert() { // auto bn = new CPUBackend(mm); //TODO // backends_["cpu"] = bn; @@ -64,7 +64,7 @@ void Net::convert() { subg_1.reset(new Graph(sub_param, backends_[BackendType::MLLM_CPU], tensors_)); subGraphs_["G" + std::to_string(i)] = subg_1; } -} +}*/ void Net::convert(BackendType backend_type) { for (int i = 0; i < (int)net_param_.size(); ++i) { diff --git a/src/Net.hpp b/src/Net.hpp index 0dd6855e..98d29544 100644 --- a/src/Net.hpp +++ b/src/Net.hpp @@ -14,10 +14,10 @@ class Net { explicit Net(const vector ¶m, BackendConfig config); virtual ~Net() = default; - void convert(); + //void convert(); // TODO: remove // convert all subgraph to specified backend, just for develop - void convert(BackendType backend_type); + void convert(BackendType backend_type = BackendType::MLLM_CPU); void reshapeInput(); void reshapeInput(vector shape); diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 2f9c092a..f557cfaf 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -59,7 +59,7 @@ void Tensor::alloc() { // 如果原有内存已经分配,则释放它 backend_->free(host_ptr_); } - backend_->alloc(&host_ptr_, CntSize()); + backend_->alloc(&host_ptr_, cntSize()); allocated_ = true; } @@ -94,13 +94,13 @@ void Tensor::copyFrom(const Tensor &source, bool copy_diff, bool reshape) { CHECK_EQ(source.dtype(), dtype()); CHECK_EQ(source.count(), count()); // copy - memcpy(host_ptr_, source.host_ptr_, CntSize()); + memcpy(host_ptr_, source.host_ptr_, cntSize()); } void Tensor::copyFrom(const shared_ptr &source, bool reshape) { CHECK_EQ(source->dtype(), dtype()); CHECK_EQ(source->count(), count()); // copy - memcpy(host_ptr_, source->host_ptr_, CntSize()); + memcpy(host_ptr_, source->host_ptr_, cntSize()); } void Tensor::permute(int axis0, int axis1, int axis2, int axis3, bool copy) { // 检查轴的合法性 diff --git a/src/Tensor.hpp b/src/Tensor.hpp index f069b133..cec2af22 100644 --- a/src/Tensor.hpp +++ b/src/Tensor.hpp @@ -285,7 +285,7 @@ class Tensor { return dtype_; } - int CntSize() { + int cntSize() { return DataTypeSize(dtype_, count_); } diff --git a/src/backends/cpu/CPUEmbedding.cpp b/src/backends/cpu/CPUEmbedding.cpp index 1874d48d..be14e72f 100644 --- a/src/backends/cpu/CPUEmbedding.cpp +++ b/src/backends/cpu/CPUEmbedding.cpp @@ -17,7 +17,7 @@ ErrorCode mllm::CPUEmbedding::reshape(vector> inputs, vector< outputs[0]->setDtype(activationDtype()); weight_.reshape(1, 1, vocabSize_, hiddenSize_); weight_.setName(name() + ".weight"); - weight_.setDtype(weightsDtype()); + weight_.setDtype(MLLM_TYPE_F32); return NO_ERROR; } ErrorCode mllm::CPUEmbedding::setUp(vector> inputs, vector> outputs) { diff --git a/src/backends/cpu/CPURMSNorm.cpp b/src/backends/cpu/CPURMSNorm.cpp index 7db50677..76a40d11 100644 --- a/src/backends/cpu/CPURMSNorm.cpp +++ b/src/backends/cpu/CPURMSNorm.cpp @@ -16,7 +16,7 @@ ErrorCode CPURMSNorm::reshape(vector> inputs, vectordimension()); // (C, 1, 1, 1) weight_.setName(name() + ".weight"); - weight_.setDtype(weightsDtype()); + weight_.setDtype(MLLM_TYPE_F32); outputs[0]->reshape(inputs[0]->batch(), inputs[0]->shape(1), inputs[0]->shape(2), inputs[0]->shape(3)); outputs[0]->setDtype(activationDtype()); std::cout << name() << " CPURMSNorm reshape" << std::endl; diff --git a/src/quantizer/ParamWriter.hpp b/src/quantizer/ParamWriter.hpp index e0956e31..0282ea7e 100644 --- a/src/quantizer/ParamWriter.hpp +++ b/src/quantizer/ParamWriter.hpp @@ -13,7 +13,7 @@ static void writeInt(FILE *fp, int32_t val) { } static void writeString(FILE *fp, const std::string &str) { writeInt(fp, str.size()); - fwrite(str.c_str(), str.size(), 1, fp); + fwrite(str.c_str(), sizeof(char ), str.size(), fp); } static void write_dtype(FILE *fp, DataType dtype) { writeInt(fp, dtype); diff --git a/src/quantizer/main.cpp b/src/quantizer/main.cpp index 90fbbc9f..406c79e4 100644 --- a/src/quantizer/main.cpp +++ b/src/quantizer/main.cpp @@ -118,16 +118,14 @@ void QuantWriter::QuantParams(DataType dataType) { if (quant_ptr != nullptr) { std::cout<offsets_[name].second / sizeof(float); - auto tsize = alloc_quant_block(s, MLLM_TYPE_F32).second; - writeParam(name, MLLM_TYPE_F32, param, tsize); - std::cout<<"-----has norm-----"<offsets_[name].second; + writeParam(name, MLLM_TYPE_F32, param, s); + std::cout<<"-----has norm-----"<offsets_[name].second / sizeof(float); - auto tsize = alloc_quant_block(s, MLLM_TYPE_F32).second; - writeParam(name, MLLM_TYPE_F32, param, tsize); - std::cout<<"-----has ebd-----"<offsets_[name].second; + writeParam(name, MLLM_TYPE_F32, param, s); + std::cout<<"-----has ebd-----"<