forked from antirez/smaz
-
Notifications
You must be signed in to change notification settings - Fork 8
/
nonalpha.c
126 lines (101 loc) · 3.59 KB
/
nonalpha.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*
Copyright (C) 2012 Paul Gardner-Stephen
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <stdio.h>
#include <strings.h>
#include <math.h>
#include <stdlib.h>
#include <unistd.h>
#include <ctype.h>
#include "arithmetic.h"
#include "charset.h"
int stripNonAlpha(unsigned short *in,int in_len,
unsigned short *out,int *out_len)
{
int l=0;
int i;
for(i=0;i<in_len;i++)
if (in[i]>0x80||charIdx(tolower(in[i]))>=0) out[l++]=in[i];
*out_len=l;
return 0;
}
unsigned int probNoNonAlpha=0.95*0xffffff;
int decodeNonAlpha(range_coder *c,int nonAlphaPositions[],
unsigned char nonAlphaValues[],int *nonAlphaCount,int messageLength)
{
int i;
int containsNonAlpha=range_decode_symbol(c,&probNoNonAlpha,2);
if (containsNonAlpha) {
int count=range_decode_equiprobable(c,messageLength+1);
// printf("Decoding %d non-alpha characters.\n",count);
/* Decode the positions of special characters */
ic_decode_recursive(nonAlphaPositions,count,messageLength,c);
/* Decode the characters */
for(i=0;i<count;i++) {
nonAlphaValues[i]=range_decode_equiprobable(c,256);
}
// for(i=0;i<count;i++)
// printf(" nonalpha char #%d @ %d = 0x%02x\n",i,nonAlphaPositions[i],nonAlphaValues[i]);
*nonAlphaCount=count;
return 0;
} else {
*nonAlphaCount=0;
return 0;
}
}
int encodeNonAlpha(range_coder *c,unsigned short *m,int messageLength)
{
/* Get positions and values of non-alpha chars.
Encode count, then write the chars, then use interpolative encoding to
encode their positions. */
unsigned char v[1024];
int pos[1024];
int count=0;
int i;
for(i=0;i<messageLength;i++)
/* (non-alpha characters can only be <0x80,
since higher codepoints are encoded using unicode processing mechanisms) */
if (m[i]<0x80) {
if (charIdx(tolower(m[i]))>=0) {
/* alpha or space -- so ignore */
} else {
/* non-alpha, so remember it */
v[count]=m[i];
// printf("non-alpha char: 0x%02x '%c' @ %d\n",m[i],m[i],i);
pos[count++]=i;
}
}
// XXX - The following assumes that 50% of messages have special characters.
// This is a patently silly assumption.
if (!count) {
// printf("Using 1 bit to indicate no non-alpha/space characters.\n");
range_encode_symbol(c,&probNoNonAlpha,2,0);
return 0;
} else {
// There are special characters present
range_encode_symbol(c,&probNoNonAlpha,2,1);
}
// printf("Using 8-bits to encode each of %d non-alpha chars.\n",count);
/* Encode number of non-alpha chars */
range_encode_equiprobable(c,messageLength+1,count);
// printf("Using %f bits to encode the number of non-alpha/space chars.\n",countBits);
/* Encode the positions of special characters */
ic_encode_recursive(pos,count,messageLength,c);
/* Encode the characters */
for(i=0;i<count;i++) {
range_encode_equiprobable(c,256,v[i]);
}
// printf("Using interpolative coding for positions, total = %d bits.\n",posBits);
return 0;
}