Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Quantizer.. #22

Merged
merged 4 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# temp executable name
set(TEST_EXE main_test)
option(TEST "test mode" ON)
option(QUANT "quantize tools" ON)
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif ()
Expand Down Expand Up @@ -45,6 +46,8 @@ aux_source_directory(${PROJECT_SOURCE_DIR}/src/express DIR_SRC_EXP)
#aux_source_directory(${PROJECT_SOURCE_DIR}/src/quantize DIR_SRC_QUANT)
aux_source_directory(${PROJECT_SOURCE_DIR}/examples EMP_SRC)
aux_source_directory(${PROJECT_SOURCE_DIR}/test TEST_SRC)
aux_source_directory(${PROJECT_SOURCE_DIR}/src/quantizer QUANT_SRC)


include_directories(${PROJECT_SOURCE_DIR}/src)
include_directories(${PROJECT_SOURCE_DIR}/include)
Expand Down Expand Up @@ -108,4 +111,25 @@ if(NNAPI)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/src/backends/nnapi)
add_executable(nnapi_test ${PROJECT_SOURCE_DIR}/demo/nnapi_test.cpp ${DIR_SRC_CPU} ${DIR_SRC_EXP} ${DIR_SRC} )#${DIR_SRC_QUANT})
target_link_libraries(nnapi_test MLLM_CPU MLLM_NNAPI)
endif()
endif()

if (QUANT)
include_directories(${PROJECT_SOURCE_DIR}/src/quantizer)
file(GLOB_RECURSE MLLM_QUANT
${CMAKE_CURRENT_LIST_DIR}/src/quantizer/*.cpp
${CMAKE_CURRENT_LIST_DIR}/src/quantizer/*.hpp
${PROJECT_SOURCE_DIR}/src/backends/cpu/quantize/*.hpp
${PROJECT_SOURCE_DIR}/src/backends/cpu/quantize/*.cpp
)

message(STATUS "MLLM_Quant: ${MLLM_QUANT}")
add_executable(
quantize
${PROJECT_SOURCE_DIR}/src/quantizer/main.cpp
${MLLM_QUANT}
${DIR_SRC}

)


endif ()
3 changes: 3 additions & 0 deletions include/Types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ struct BackendConfig {
enum DataType {
FP32 = 0,
FP16,
INT8,
INT4,
DATA_TYPE_COUNT,
};

} // namespace mllm
Expand Down
4 changes: 2 additions & 2 deletions src/Executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ class Executor {
vector<shared_ptr<Tensor>> result_;
ParamLoader *data_loader_;

mllm_dtype weights_dtype_;
mllm_dtype activation_dtype_;
DataType weights_dtype_;
DataType activation_dtype_;
};

} // namespace mllm
Expand Down
5 changes: 2 additions & 3 deletions src/Graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,8 @@ class Graph {
unordered_map<string, shared_ptr<Op>> ops_; // opname: op
// unordered_map<string, shared_ptr<Tensor>> external_tensors_;


mllm_dtype weights_dtype_ = MLLM_TYPE_F32;
mllm_dtype activation_dtype_ = MLLM_TYPE_F32;
DataType weights_dtype_ = MLLM_TYPE_F32;
DataType activation_dtype_ = MLLM_TYPE_F32;
};

} // namespace mllm
Expand Down
10 changes: 5 additions & 5 deletions src/Op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,15 +106,15 @@ class Op {
return NO_ERROR;
}

virtual ErrorCode setDtype(mllm_dtype weight_dtype, mllm_dtype activation_dtype) {
virtual ErrorCode setDtype(DataType weight_dtype, DataType activation_dtype) {
weights_dtype_ = weight_dtype;
activation_dtype_ = activation_dtype;
return NO_ERROR;
}
mllm_dtype weightsDtype() const {
DataType weightsDtype() const {
return weights_dtype_;
}
mllm_dtype activationDtype() const {
DataType activationDtype() const {
return activation_dtype_;
}
/**
Expand Down Expand Up @@ -143,8 +143,8 @@ class Op {
// BackendType backend_type_;
// tensor w
// vector<>
mllm_dtype weights_dtype_ = MLLM_TYPE_F32;
mllm_dtype activation_dtype_ = MLLM_TYPE_F32;
DataType weights_dtype_ = MLLM_TYPE_F32;
DataType activation_dtype_ = MLLM_TYPE_F32;
};

// unordered_map<OpType, function<shared_ptr<Op>(Backend*)>> opMap;
Expand Down
14 changes: 14 additions & 0 deletions src/ParamLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,18 @@ ParamLoader::ParamLoader(std::string filename, bool use_mmap) :
bool ParamLoader::load(std::shared_ptr<mllm::Tensor> tensor) {
return load(tensor.get());
}
vector<std::string> ParamLoader::getParamNames() {
// get keys of data_type_
vector<std::string> keys;
for (auto &iter : data_type_) {
keys.push_back(iter.first);
}
return keys;
}
uint8_t *ParamLoader::load(string name) {
std::pair<uint64_t, uint64_t> offset = offsets_[name];
uint8_t *data = new uint8_t[offset.second];
fseek(fp_, offset.first, SEEK_SET);
fread(data, sizeof(uint8_t), offset.second, fp_);
}
} // namespace mllm
6 changes: 5 additions & 1 deletion src/ParamLoader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace mllm {
class Tensor;
static int readInt(FILE *fp_) {
int tmp;
fread(&tmp, sizeof(int), 1, fp_);
fread(&tmp, sizeof(int32_t), 1, fp_);
return tmp;
}
static uint64_t readu64(FILE *fp_) {
Expand Down Expand Up @@ -44,6 +44,8 @@ static std::string readString(FILE *fp_) {
}
#define _MAGIC_NUMBER 20012
class ParamLoader {
friend class QuantWriter;

public:
ParamLoader(std::string filename, bool use_mmap = false);
#ifdef USE_MMAP
Expand All @@ -52,6 +54,7 @@ class ParamLoader {
~ParamLoader();
bool load(mllm::Tensor *tensor);
bool load(std::shared_ptr<mllm::Tensor> tensor);
vector<std::string> getParamNames();

private:
FILE *fp_;
Expand All @@ -60,6 +63,7 @@ class ParamLoader {
std::uint64_t size_;
std::map<std::string, std::pair<uint64_t, uint64_t>> offsets_; // offsets,length
std::map<std::string, int> data_type_;
uint8_t *load(string name);
bool use_mmap_;
};

Expand Down
30 changes: 5 additions & 25 deletions src/Tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Tensor {
void setBackend(Backend *bn) {
backend_ = bn;
};
void setDtype(mllm_dtype dtype) {
void setDtype(DataType dtype) {
dtype_ = dtype;
}

Expand All @@ -50,7 +50,7 @@ class Tensor {
bool reshape(const vector<int> &shape);

void alloc();
void alloc(mllm_dtype dtype) {
void alloc(DataType dtype) {
dtype_ = dtype;
alloc();
}
Expand Down Expand Up @@ -281,32 +281,12 @@ class Tensor {
}
}

mllm_dtype dtype() const {
DataType dtype() const {
return dtype_;
}

float dtypeSize() {
switch (dtype_) {
case MLLM_TYPE_F32:
return sizeof(float);
case MLLM_TYPE_F16:
return sizeof(short);
case MLLM_TYPE_I32:
return sizeof(int);
case MLLM_TYPE_I16:
return sizeof(short);
case MLLM_TYPE_I8:
return sizeof(char);
// TODO WRONG?
case MLLM_TYPE_Q4_0:
return (sizeof(block_q4_0)) / (QK4_0 / 2);
case MLLM_TYPE_Q4_K:
return (sizeof(block_q4_K)) / (QK_K / 2);
case MLLM_TYPE_Q8_0:
return (sizeof(block_q8_0)) / (QK8_0);
case MLLM_TYPE_Q8_K:
return (sizeof(block_q8_K)) / (QK_K);
}
return DataTypeSize(dtype_);
}
//
// void setByteWidth(int bw) {
Expand Down Expand Up @@ -362,7 +342,7 @@ class Tensor {
string name_;
// shared_ptr<Backend> backend_;
// int byte_width_; // 32/16/8/4 //enum
mllm_dtype dtype_;
DataType dtype_;
Backend *backend_;
void *host_ptr_;
void *device_ptr_;
Expand Down
2 changes: 1 addition & 1 deletion src/backends/cpu/CPUAttention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ ErrorCode CPUAttention::free(vector<shared_ptr<Tensor>> inputs, vector<shared_pt
O_proj_->free({kqv_state_}, outputs);
return Op::free(inputs, outputs);
}
ErrorCode CPUAttention::setDtype(mllm_dtype weight_dtype, mllm_dtype activation_dtype) {
ErrorCode CPUAttention::setDtype(DataType weight_dtype, DataType activation_dtype) {
Q_proj_->setDtype(weight_dtype, activation_dtype);
K_proj_->setDtype(weight_dtype, activation_dtype);
V_proj_->setDtype(weight_dtype, activation_dtype);
Expand Down
4 changes: 2 additions & 2 deletions src/backends/cpu/CPUAttention.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class CPUAttention final : public Op {
virtual ErrorCode execute(vector<shared_ptr<Tensor>> inputs, vector<shared_ptr<Tensor>> outputs) override;
virtual ErrorCode reshapeOutputs(vector<shared_ptr<Tensor>> inputs, vector<shared_ptr<Tensor>> outputs) override;
virtual ErrorCode free(vector<shared_ptr<Tensor>> inputs, vector<shared_ptr<Tensor>> outputs) override;
virtual ErrorCode setDtype(mllm_dtype weight_dtype, mllm_dtype activation_dtype) override;
virtual ErrorCode setDtype(DataType weight_dtype, DataType activation_dtype) override;

virtual ErrorCode load(ParamLoader &loader) override;

private:
Expand Down
50 changes: 50 additions & 0 deletions src/quantizer/ParamWriter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//
// Created by lx on 23-10-30.
//

#include "ParamWriter.hpp"

ParamWriter::ParamWriter(std::string filename) :
path_(std::move(filename)) {
fp_ = fopen(path_.c_str(), "wb");
writeInt(fp_, _MAGIC_NUMBER);
}
ParamWriter::~ParamWriter() {
if (fp_)
fclose(fp_);
}
int ParamWriter::calcIndexSize(const vector<string> names) {
int size = 0;
for (const auto &name : names) {
// One Tensor Index Item Contains: Name_Len(Int)+Name(str)+Weights_Len(UInt64)+Offset(UInt64)+DataType(Int)
size += sizeof(int) + name.size() + sizeof(uint64_t) + sizeof(uint64_t) + sizeof(int);
}
return size;
}
void ParamWriter::writeIndex() {
fseek(fp_, sizeof(int32_t) + sizeof(uint64_t), SEEK_SET);
for (const auto &param : param_info_) {
writeString(fp_, param.name);
write_u64(fp_, param.size);
write_u64(fp_, param.offset);
writeInt(fp_, param.type);
}
}

void ParamWriter::writeParam(string name, DataType type, void *data, uint64_t size) {
auto param = param_info_[index_];
param.name = std::move(name);
param.type = type;
param.offset = ftell(fp_);
fwrite(data, sizeof(char), size, fp_);
param.size = ftell(fp_) - param.offset;
index_++;
}
void ParamWriter::paddingIndex(const vector<string> names) {
param_info_.resize(names.size());
// write 0 padding to preserve space for index
int index_size = calcIndexSize(names);
write_u64(fp_, index_size);
char i = '\0';
fwrite(&i, 1, index_size, fp_);
}
46 changes: 46 additions & 0 deletions src/quantizer/ParamWriter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//
// Created by lx on 23-10-30.
//

#ifndef MLLM_PARAMWRITER_HPP
#define MLLM_PARAMWRITER_HPP
#include "ParamLoader.hpp"
static void write_u64(FILE *fp, uint64_t val) {
fwrite(&val, sizeof(uint64_t), 1, fp);
}
static void writeInt(FILE *fp, int32_t val) {
fwrite(&val, sizeof(int32_t), 1, fp);
}
static void writeString(FILE *fp, const std::string &str) {
writeInt(fp, str.size());
fwrite(str.c_str(), str.size(), 1, fp);
}
static void write_dtype(FILE *fp, DataType dtype) {
writeInt(fp, dtype);
}

struct ParmInfo {
std::string name;
DataType type;
uint64_t offset;
uint64_t size;
};
class ParamWriter {
public:
~ParamWriter();
ParamWriter(std::string filename);
int calcIndexSize(vector<string> names);
void writeIndex();
void writeParam(string name, DataType type, void *data, uint64_t size);

private:
uint64_t index_ = 0;
FILE *fp_;
std::string path_;
std::vector<ParmInfo> param_info_;

protected:
void paddingIndex(vector<string> names);
};

#endif // MLLM_PARAMWRITER_HPP
Loading
Loading