-
Notifications
You must be signed in to change notification settings - Fork 0
/
dsplit.c
106 lines (96 loc) · 3 KB
/
dsplit.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/* Copyright surrim 2015-2016
*
* This file is part of dsplit.
*
* dsplit is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dsplit is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dsplit. If not, see <http://www.gnu.org/licenses/>.
*/
#include "dsplit.h"
#include <gcrypt.h>
#include <inttypes.h>
static const char TMP_FILE_NAME[] = "tmp";
static const uint16_t PRIME = 21613; // "god" in base64
static const uint16_t UNDO_PRIME = 41985; // PRIME ** 256 % 65536
static const int HASH_ALGORITHM = GCRY_MD_SHA256;
static const size_t HASH_BITS = 256;
static void split(gcry_md_hd_t md, FILE *chunk, const char *directory) {
const size_t HASH_BYTES = (HASH_BITS + 7) / 8;
uint8_t *hashed = gcry_md_read(md, HASH_ALGORITHM);
char name[2 * HASH_BYTES + 1];
for (size_t i = 0; i != HASH_BYTES; i++) {
const char HEX[] = "0123456789abcdef";
name[2 * i] = HEX[hashed[i] >> 4];
name[2 * i + 1] = HEX[hashed[i] & 15];
}
name[2 * HASH_BYTES] = '\0';
if (directory) {
char fileName[strlen(directory) + 1 + 2 * HASH_BYTES + 1];
sprintf(fileName, "%s/%s", directory, name);
remove(fileName);
rename(TMP_FILE_NAME, fileName);
} else {
remove(name);
rename(TMP_FILE_NAME, name);
}
gcry_md_reset(md);
freopen(TMP_FILE_NAME, "wb", chunk);
printf("%s\n", name);
}
static void openFile(gcry_md_hd_t *md, FILE **chunk) {
gcry_md_open(md, HASH_ALGORITHM, 0);
*chunk = fopen(TMP_FILE_NAME, "wb");
}
static void updateFile(gcry_md_hd_t md, FILE *chunk, const void *data, size_t dataSize) {
gcry_md_write(md, data, dataSize);
fwrite(data, 1, dataSize, chunk);
}
static void closeFile(gcry_md_hd_t md, FILE *chunk) {
gcry_md_close(md);
fclose(chunk);
remove(TMP_FILE_NAME);
}
void dsplit(FILE *file, const char *directory) {
gcry_md_hd_t md;
FILE *chunk;
openFile(&md, &chunk);
uint8_t buffer0[256] = {};
uint8_t buffer1[256] = {};
uint8_t *oldBuffer = buffer0;
uint8_t *newBuffer = buffer1;
uint16_t hash = 0;
while (1) {
int readBytes = fread(newBuffer, 1, 256, file);
size_t begin = 0;
for (int i = 0; i < readBytes; i++) {
uint8_t oldByte = oldBuffer[i];
uint8_t newByte = newBuffer[i];
hash = hash * PRIME - UNDO_PRIME * oldByte + newByte;
if (hash == 0xFFFF) {
updateFile(md, chunk, newBuffer + begin, i - begin + 1);
begin = i + 1;
split(md, chunk, directory);
}
}
updateFile(md, chunk, newBuffer + begin, readBytes - begin);
if (readBytes != 256) {
break;
}
uint8_t *tmpBuffer = newBuffer;
newBuffer = oldBuffer;
oldBuffer = tmpBuffer;
}
if (ftell(chunk)) {
split(md, chunk, directory);
}
closeFile(md, chunk);
}