-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathslice.cpp
161 lines (138 loc) · 5.22 KB
/
slice.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#include <cmath>
#include <filesystem>
#include <format>
#include <fstream>
#include <ios>
#include <iostream>
#include <memory>
#include <string>
#include <utility>
void checkFileExists(const std::string &path) {
if (!std::filesystem::exists(path)) {
throw std::runtime_error("File not found: " + path);
}
}
bool checkFileSize(const std::string &path, unsigned expectLen) {
return std::filesystem::file_size(path) == expectLen;
}
std::fstream openBinaryFile(const std::string &path,
std::ios_base::openmode mode) {
std::fstream reader(path, std::ios::binary | mode);
if (!reader.is_open()) {
std::cout << std::format("ERROR: Failed Open File [{}]", path) << std::endl;
throw std::runtime_error("Failed to open file");
}
return std::move(reader);
}
unsigned getCutSize(unsigned expectSize, unsigned npts) {
if (expectSize > npts) {
std::cout << std::format(
"WARNING: Expected cut size: {} greater than npts "
"{} of dataset, Using npts as cut size ",
expectSize, npts)
<< std::endl;
return npts;
} else {
return expectSize;
}
}
unsigned readDims(const std::string &path, bool isBin = true) {
unsigned dims{0};
std::fstream reader = openBinaryFile(path, std::ios::in);
if (isBin) {
reader.seekg(sizeof(unsigned), std::ios::beg);
}
reader.read(reinterpret_cast<char *>(&dims), sizeof(unsigned));
return dims;
}
std::unique_ptr<float[]> readBin(const std::string &path, const unsigned size) {
std::fstream reader = openBinaryFile(path, std::ios::in);
unsigned npts{0}, dims{0};
reader.read(reinterpret_cast<char *>(&npts), sizeof(unsigned));
reader.read(reinterpret_cast<char *>(&dims), sizeof(unsigned));
unsigned ss{getCutSize(size, npts)};
std::unique_ptr<float[]> cutData = std::make_unique<float[]>(ss * dims);
reader.read(reinterpret_cast<char *>(cutData.get()),
sizeof(float) * ss * dims);
reader.close();
return cutData;
}
std::unique_ptr<char[]> readVec(const std::string &path, const unsigned size) {
std::fstream reader = openBinaryFile(path, std::ios::in);
unsigned npts{0}, dims{0};
reader.read(reinterpret_cast<char *>(&dims), sizeof(unsigned));
// NOTE: compute npts using file_size
auto totalFileSize{std::filesystem::file_size(path)};
npts = totalFileSize / (sizeof(unsigned) + sizeof(float) * dims);
unsigned ss{getCutSize(size, npts)};
unsigned bufSize{static_cast<unsigned int>(
(sizeof(float) * dims + sizeof(unsigned)) * ss)};
reader.seekg(0, std::ios::beg);
std::unique_ptr<char[]> cutData = std::make_unique<char[]>(bufSize);
reader.read(reinterpret_cast<char *>(cutData.get()), bufSize);
return cutData;
}
void writeBin(const std::string &path, const std::unique_ptr<float[]> data,
const unsigned npts, const unsigned dims) {
std::fstream writer = openBinaryFile(path, std::ios::out);
writer.write(reinterpret_cast<const char *>(&npts), sizeof(unsigned));
writer.write(reinterpret_cast<const char *>(&dims), sizeof(unsigned));
writer.write(reinterpret_cast<const char *>(data.get()),
sizeof(float) * npts * dims);
writer.flush();
writer.close();
}
void writeVec(const std::string &path, const std::unique_ptr<char[]> data,
const unsigned bufSize) {
std::fstream writer = openBinaryFile(path, std::ios::out);
writer.write(reinterpret_cast<const char *>(data.get()), bufSize);
writer.flush();
writer.close();
}
int main(int argc, char **argv) {
if (argc != 6) {
std::cerr << "ERROR: Argument Mismatch, Please Follow Usage" << std::endl;
std::cout << "Usage: ./slice [source dataset path] [source dataset format] "
"[target dataset path] [target dataset format] [size]"
<< std::endl;
exit(EXIT_FAILURE);
}
unsigned counts{0};
std::string sourcePath{};
std::string sourceFormat{argv[2]};
std::string targetPath{argv[3]};
std::string targetFormat{argv[4]};
try {
sourcePath = argv[1];
checkFileExists(sourcePath);
} catch (const std::runtime_error &e) {
std::cerr << "Error: " << e.what() << std::endl;
exit(EXIT_FAILURE);
}
try {
counts = std::stoi(argv[5]);
} catch (const std::invalid_argument &e) {
std::cerr << "Error: Size Argument must be an integer." << std::endl;
exit(EXIT_FAILURE);
}
std::cout << std::format(
"slice first [{}] vectors of [{}] and save it into [{}]",
counts, sourcePath, targetPath)
<< std::endl;
if (sourceFormat == std::string("bin")) {
unsigned dims{readDims(sourcePath)};
writeBin(targetPath, readBin(sourcePath, counts), counts, dims);
} else if (sourceFormat == std::string("vecs")) {
unsigned dims{readDims(sourcePath, false)};
writeVec(targetPath, readVec(sourcePath, counts),
counts * (dims * sizeof(float) + sizeof(unsigned)));
std::cout << std::format("Write {} bytes into {}",
counts * (dims * sizeof(float) + sizeof(unsigned)),
targetPath)
<< std::endl;
} else {
std::cerr << "ERROR: Input format does not meet requirements" << std::endl;
exit(EXIT_FAILURE);
}
std::cout << "Cut Done!" << std::endl;
}