Skip to content

Commit

Permalink
Merge pull request #28 from UbiquitousLearning/develop-lx
Browse files Browse the repository at this point in the history
Develop lx
  • Loading branch information
lx200916 authored Nov 17, 2023
2 parents 40dbbcc + b6933ae commit c22ddb6
Show file tree
Hide file tree
Showing 7 changed files with 669 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Checks: >
# Turn all the warnings from the checks above into errors.
WarningsAsErrors: "*"

HeaderFilterRegex: ".*\.hpp$&!gguf\.hpp"
CheckOptions:
- key: readability-identifier-naming.NamespaceCase
value: lower_case
Expand Down
1 change: 1 addition & 0 deletions .clang-tidy.ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/quantizer/gguf.hpp
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,16 @@ if(QUANT)
# ${DIR_SRC}
${PROJECT_SOURCE_DIR}/src/ParamLoader.cpp
)
add_executable(
from_gguf
${PROJECT_SOURCE_DIR}/src/quantizer/gguf.cpp
${MLLM_QUANT}
${MLLM_QUANTIZER}

# ${DIR_SRC}
${PROJECT_SOURCE_DIR}/src/ParamLoader.cpp
)

endif()

if(TEST)
Expand Down
11 changes: 10 additions & 1 deletion include/Types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ enum DataType {
// MLLM_TYPE_Q3_K = 11,
MLLM_TYPE_Q4_K = 12,
// MLLM_TYPE_Q5_K = 13,
// MLLM_TYPE_Q6_K = 14,
MLLM_TYPE_Q6_K = 14,
MLLM_TYPE_Q8_K = 15,
MLLM_TYPE_I8,
MLLM_TYPE_I16,
Expand Down Expand Up @@ -100,6 +100,15 @@ typedef struct {
static_assert(sizeof(block_q4_K) == 2 * sizeof(mllm_fp16_t) + K_SCALE_SIZE + QK_K / 2, "wrong q4_K block size/padding");
#endif

#pragma pack(1)
typedef struct {
uint8_t ql[QK_K / 2]; // quants, lower 4 bits
uint8_t qh[QK_K / 4]; // quants, upper 2 bits
int8_t scales[QK_K / 16]; // scales, quantized with 8 bits
mllm_fp16_t d; // super-block scale
} block_q6_K;
#pragma pack()

#define QK8_0 32
#pragma pack(1)
typedef struct {
Expand Down
2 changes: 1 addition & 1 deletion src/backends/cpu/CPUSoftMax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ ErrorCode CPUSoftMax::execute(vector<shared_ptr<Tensor>> inputs, vector<shared_p
uint16_t scvt;
for (int i = 0; i < num_classes; i++) {
if (input->dataAt<float>({n, h, s, i}) == -INFINITY) {
dp[i] = 0.0f;
dp[i] = 0.0F;
} else {
mllm_fp16_t tmp = MLLM_FP32_TO_FP16(input->dataAt<float>({n, h, s, i}) - max);
memcpy(&scvt, &tmp, sizeof(scvt));
Expand Down
15 changes: 15 additions & 0 deletions src/quantizer/gguf.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "gguf.hpp"
#include "ParamWriter.hpp"
#include <string>
// gguf input_file outfile
int main(int argc, char **argv) {
if (argc != 3) {
std::cerr << "Usage: " << argv[0] << " input_file outfile" << std::endl;
return 1;
}
std::string input_file(argv[1]);
std::string output_file(argv[2]);
auto *writer = new ParamWriter(output_file);
from_gguf(input_file, writer);
return 0;
}
Loading

0 comments on commit c22ddb6

Please sign in to comment.