-
Notifications
You must be signed in to change notification settings - Fork 1
/
string.ps
686 lines (655 loc) · 15 KB
/
string.ps
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
% PostScript String Procedures
% by Raymond Luckhurst, scriptit.uk
% constants
/WHITESPACE ( \n\r\t\b\f\000) def % whitespace chars (from PHP)
/REALNUMBER (0123456789.Ee+-) def % real (and integer) signed number chars
/REPLACEMENT 16#FFFD def % Unicode Replacement Character code point in the Specials table
% get string representation of ASCII value
% <int> chr <str>
/chr {
16#FF and 1 string dup 0 4 -1 roll put
} bind def
% get ASCII value of (first) string character
% <str> ord <int>
/ord {
0 get
} bind def
% get string hash (32 bit unsigned)
% djb2 algorithm by Dan Bernstein (xor variant), see http://www.cse.yorku.ca/~oz/hash.html
% <str> hash <hash>
/hash {
5381 % hash
exch {
exch
dup 5 bitshift iadd32 xor % hash *= 33 ^ char
} forall
} bind def
% convert any to string
/cvas {
dup type /stringtype ne { dup type /nametype eq { dup length }{ 20 } ifelse string cvs } if
} bind def
% concatenate two strings
% <str1> <str2> cat <str1str2>
/cat {
exch
dup length 2 index length add string
dup dup 5 2 roll
copy length exch putinterval
} bind def
% concatenate two objects
% <any> <any> concat2 <string>
/concat2 {
cvas exch cvas exch cat
} bind def
% concatenate array of objects with separator
% <[any]> <separator> concatas <string>
/concatas {
exch [ exch { cvas } forall ] exch % any to string
2 copy length exch length 1 sub mul % separator space
2 index { length add } forall % strings space
string % result
dup /NullEncode filter % write as a file
3 -1 roll
()
5 -1 roll
{
3 index 2 index writestring
3 index exch writestring
pop dup
} forall
pop pop
closefile
} bind def
% concatenate array of objects
% <[any]> concata <string>
/concata {
() concatas
} bind def
% strip whitespace from beginning and end of string
% returns substring
% <string> trim <string>
/trim {
//WHITESPACE exch
dup length 1 sub exch 0
0 1 4 index {
2 index exch 1 getinterval
4 index exch search { pop pop pop 1 add }{ pop exit } ifelse
} for
exch 3 -1 roll
dup -1 4 index {
2 index exch 1 getinterval
4 index exch search { pop pop pop 1 sub }{ pop exit } ifelse
} for
exch 3 1 roll
1 add 1 index sub getinterval
exch pop
} bind def
% split string by given substring
% <string> <seek> split <array>
/split {
[ 3 1 roll { search { 3 1 roll }{ exit } ifelse } loop ]
} bind def
% split CSV string into array
% <string> csv <array>
/csv {
(,) split
} bind def
% replace occurrences of search string
% <string> <seek> <replace> replace <string>
/replace {
3 1 roll split exch concatas
} bind def
% convert to lower case (ASCII only)
% <string> tolower <string>
/tolower {
dup length string copy
0 1 2 index length 1 sub {
2 copy get
dup dup 65 ge exch 90 le and { 32 add 3 copy put } if pop
pop
} for
} bind def
% convert to upper case (ASCII only)
% <string> toupper <string>
/toupper {
dup length string copy
0 1 2 index length 1 sub {
2 copy get
dup dup 97 ge exch 122 le and { 32 sub 3 copy put } if pop
pop
} for
} bind def
% read inline heredoc up to marker string e.g. (%EOD)
% <EODString> heredoc <string>
/heredoc {
currentfile exch 0 exch /SubFileDecode filter 65535 string readstring pop
} bind def
% crunch postscript code string and optionally fold at 80 characters maximum
% <ps> <fold> crunchps <string>
/crunchps {
exch
dup length string copy
20 dict begin
/$depth 0 def % parenthesis depth
/$escape false def % escape
/$whitespace null def % last whitespace char
/$comment false def % in comment or primed for DSC
/$dsc false def % in DSC comment
/$str exch def % src/dest string
/$r $str /ReusableStreamDecode filter def % reader
/$w 0 def % write index
/$this null def % this read char
/$last null def % last read char
/$save null def % last saved char
/r_more { $r bytesavailable 0 gt } bind def % have more chars
/r_getc { /$last $this def $r read not { null } if dup /$this exch def } bind def % read
/r_peek { $r read { $r dup fileposition 1 sub setfileposition }{ null } ifelse } bind def % peek
/w_putc { /$save exch def $str $w $save put /$w $w 1 add def } bind def % write
{
r_more not { exit } if % end
r_getc { % case stmt
$depth 0 ne { % in string
dup 92 eq { % escape
/$escape $escape not def
}{
dup dup 40 eq exch 41 eq or $escape not and { % unescaped parenthesis
/$depth $depth 2 index 40 eq { 1 add }{ 1 sub } ifelse def
}{
dup 32 eq 2 index and { % space and folding
pop 31 % substitute space -> unit separator
} if
} ifelse
/$escape false def
} ifelse
}{
dup 10 eq { % newline
/$whitespace exch def
/$comment false def
$dsc { % in DSC comment
10 w_putc % save \n
/$dsc false def
} if
exit
} if
$dsc not { % not in DSC comment
$comment { % in comment
pop
exit
} if
dup 37 eq { % percent: start comment
pop
r_peek
dup dup 33 eq exch 37 eq or { % %! or %%
$last 10 eq $last null eq or { % beginning of line: start DSC comment
$save 10 ne $save null ne and { 10 w_putc } if % save \n
37 w_putc % save %
r_getc w_putc % save ! or %
/$dsc true def
} if
} if
pop
/$comment true def
exit
} if
dup 32 le { % whitespace
/$whitespace exch def
exit
} if
dup 40 eq { % open parenthesis: start string
/$depth 1 def
/$escape false def
} if
dup 47 le { % slash
$last 10 eq $last null eq or { % beginning of line: new definition
$save 10 ne $save null ne and { 10 w_putc } if % save \n
} if
} if
$whitespace null ne $save null ne and { % insert space?
$save 123 ne $save 91 ne $save 60 ne and and % last != { or [ or <
$this 125 ne $this 93 ne $this 62 ne and and % this != } or ] or >
and {
$save 32 gt { 32 w_putc } if % save space
} if
} if
} if
} ifelse
w_putc % save
/$whitespace null def
exit
} loop
} loop
$str 0 $w getinterval
end
exch { % fold
80 fold
0 1 2 index length 1 sub {
2 copy get
31 eq { 2 copy 32 put } if % restore unit separator -> space
pop
} for
} if
} bind def
% fold lines at character width without breaking words
% <string> <width> fold <string>
/fold {
exch dup length string copy exch % copy
-1 % last newline
0 % last space
0 1 5 index length 1 sub {
4 index 1 index get
dup 10 eq {
pop
exch 3 -1 roll pop
}{
32 eq { exch pop dup } if
2 index sub
3 index gt {
exch pop
2 index 1 index 10 put
dup
} if
} ifelse
} for
pop pop pop
} bind def
% convert int to octal escape string
% <int> octal8 <string>
/octal8 {
16#FF and
(\\000) 4 string copy
dup 3 -1 roll
8 3 string cvrs
4 1 index length sub
exch putinterval
} bind def
% convert int to two octal escape strings
% <int> octal16 <string>
/octal16 {
dup -8 bitshift octal8 exch octal8 cat
} bind def
% convert int to 16-bit hexadecimal radix string
% <int> hex16 <string>
/hex16 {
16#FFFF and
(0000) 4 string copy
dup 3 -1 roll
16 4 string cvrs
4 1 index length sub
exch putinterval
} bind def
% convert UTF-8 to UTF-16BE
% naive implementation (4 bytes max, cf. MySQL utf8mb4)
% surrogate pairs produced for code points > 16#FFFF (Supplementary Characters)
% see https://www.ghostscript.com/doc/base/gsargs.c
% and https://en.wikipedia.org/wiki/UTF-8#Codepage_layout
% and http://www.i18nguy.com/unicode-plane1-utf8.html
% <utf8mb4 string> utf8mb4toutf16be <utf16 string>
/utf8mb4toutf16be {
[ exch
0 % # continuation bytes
exch {
exch
dup 0 eq { % leading byte in sequence
pop
dup 16#C0 lt {
16#7F and 0 % no continuation
}{
dup 16#E0 lt {
16#1F and 1 % 1 continuation byte
}{
dup 16#F0 lt {
16#0F and 2 % 2 continuation bytes
}{
16#07 and 16#13 % 3 continuation bytes; surrogates marker
} ifelse
} ifelse
} ifelse
}{ % continuation
1 sub
3 1 roll
8#77 and exch 6 bitshift or
exch
dup 16#10 eq { % supplementary characters
pop
16#10000 sub
dup -10 bitshift 16#3FF and 16#D800 or % high surrogate
exch 16#3FF and 16#DC00 or % low surrogate
0
} if
} ifelse
} forall
pop
]
int2str16
} bind def
% convert UTF-8 to UTF-16BE for Basic Multilingual Plane only
% naive implementation (3 bytes max, cf. MySQL utf8mb3)
% code point FFFD replacement character produced for code points > 16#FFFF (Supplementary Characters)
% <utf8 string> utf8toutf16be <utf16 string>
/utf8toutf16be {
[ exch
0 % # continuation bytes
exch {
exch
dup 0 eq { % leading byte in sequence
pop
dup 16#C0 lt {
16#7F and 0 % no continuation
}{
dup 16#E0 lt {
16#1F and 1 % 1 continuation byte
}{
dup 16#F0 lt {
16#0F and 2 % 2 continuation bytes
}{
16#07 and 16#13 % 3 continuation bytes; surrogates marker
} ifelse
} ifelse
} ifelse
}{ % continuation
1 sub
3 1 roll
8#77 and exch 6 bitshift or
exch
dup 16#10 eq { % supplementary characters
pop pop //REPLACEMENT 0
} if
} ifelse
} forall
pop
]
int2str16
} bind def
% convert UCS-2 to UTF-8 for Basic Multilingual Plane
% naive implementation
% see https://www.ghostscript.com/doc/base/gsargs.c
% <ucs2 string> ucs2toutf8 <utf8 string>
/ucs2toutf8 {
dup length dup add array exch 0 exch
0 % ucs2 index
{
2 copy exch length ge { exit } if
2 copy 2 copy 1 add get 3 1 roll get 8 bitshift or
[ exch
dup 16#80 ge {
dup 16#800 lt {
dup -6 bitshift 16#C0 or
}{
dup -12 bitshift 16#E0 or
exch dup -6 bitshift 16#3F and 16#80 or
} ifelse
exch 16#3F and 16#80 or
} if
]
4 -1 roll exch
4 index 2 index 2 index putinterval
length add 3 1 roll
2 add
} loop
pop pop
0 exch getinterval
dup length string exch
0 exch {
3 copy put pop
1 add
} forall
pop
} bind def
% convert UTF16BE string to array of 2 byte char strings
% <string> utf16chars <array>
/utf16chars {
dup length 2 idiv array % UTF16BE char strings
exch 0 % index
exch {
2 index 2 index get dup null eq {
pop
2 index 2 index 2 string dup 0 6 -1 roll put put
}{
exch 1 exch put
1 add
} ifelse
} forall
pop
} bind def
% convert array of 2 byte char strings to UTF16BE string
% <array> utf16string <string>
/utf16string {
dup length dup add string % dest
exch 0 % index
exch {
3 copy putinterval pop
2 add
} forall
pop
} bind def
% convert UTF16 string to integer array
% <string> str2int16 <array>
/str2int16 {
dup length 2 idiv array
exch 0 % index
exch {
2 index 2 index get dup null eq {
pop
8 bitshift 3 copy put pop
}{
or 3 copy put pop
1 add
} ifelse
} forall
pop
} bind def
% convert integer array to UTF16 string
% <array> utf16ints <array>
/int2str16 {
dup length 1 bitshift string
exch 0 % index
exch {
3 copy -8 bitshift 16#FF and put
exch 1 add exch
3 copy 16#FF and put
pop
1 add
} forall
pop
} bind def
% encode dictionary or array as a JSON data-interchange string
% (simple implementation, outputs UTF8 JSON)
% <any> jsonencode <string>
/jsonencode {
[ exch
dup type { % case stmt
dup /dicttype eq {
pop
({) exch {
2 index ({) ne { (,) 3 1 roll } if
exch (") exch (":) 4 -1 roll jsonencode
} forall
(})
exit
} if
dup dup /arraytype eq exch /packedarraytype eq or {
pop
([) exch {
1 index ([) ne { (,) exch } if
jsonencode
} forall
(])
exit
} if
dup /nametype eq { pop cvas /stringtype } if % fall through
dup /stringtype eq {
pop
dup length 2 bitshift string 0
2 copy (") putinterval 1 add
3 -1 roll {
chr {
dup (") eq { pop (\\") exit } if
dup (\\) eq { pop (\\\\) exit } if
dup (/) eq { pop (\\/) exit } if
dup (\b) eq { pop (\\b) exit } if
dup (\f) eq { pop (\\f) exit } if
dup (\n) eq { pop (\\n) exit } if
dup (\r) eq { pop (\\r) exit } if
dup (\t) eq { pop (\\t) exit } if
dup ord dup 16#1F le exch 16#7F eq or { % control char
ord
(\\u00) 1 index 16 lt { (0) cat } if
exch 16 2 string cvrs cat
exit
} if
exit % assume UTF8/ASCII
} loop
3 copy putinterval length add
} forall
2 copy (") putinterval 1 add
0 exch getinterval
exit
} if
dup /integertype eq { pop exit } if
dup /realtype eq { pop exit } if
dup /booleantype eq { pop exit } if
dup /nulltype eq { pop pop (null) exit } if
pop pop exit % invalid
} loop
] concata
} bind def
% decode JSON data-interchange string
% (simple implementation, assumes valid UTF8 JSON)
% <string> jsondecode <any>
/jsondecode {
/_jsonskip { 1 index length 1 index sub getinterval } bind def
mark exch
{ % case stmt
dup length 0 eq { pop exit } if % all parsed
dup 0 1 getinterval { % case stmt
//WHITESPACE 1 index search { % whitespace
pop pop pop
pop
1 exit
}{
pop
} ifelse
dup ([) eq { % array
pop
[ exch
1 exit
} if
dup (]) eq { % array end
pop
counttomark 1 add 1 roll ] exch
1 exit
} if
dup ({) eq { % object
pop
<< exch
1 exit
} if
dup (}) eq { % object end
pop
counttomark 1 add 1 roll >> exch
1 exit
} if
dup (:) eq { % name/value separator
pop
exch cvn exch
1 exit
} if
dup (,) eq { % separator
pop
1 exit
} if
dup (") eq { % string
pop
1 _jsonskip
0 {
2 copy 1 getinterval
dup (") eq { pop exit } if
dup (\\) eq {
pop
2 copy 0 exch getinterval
2 index 2 index 1 add 1 getinterval {
dup (b) eq { pop (\b) exit } if
dup (f) eq { pop (\f) exit } if
dup (n) eq { pop (\n) exit } if
dup (r) eq { pop (\r) exit } if
dup (t) eq { pop (\t) exit } if
dup (u) eq { % \uXXXX
pop
[
(16#) 4 index 4 index 2 add 4 getinterval cat
cvx exec
] int2str16 ucs2toutf8
3 -1 roll 4 add 3 1 roll
exit
} if
exit % keep other chars
} loop
cat
dup length
exch 4 2 roll
2 add _jsonskip % \ and next
cat exch
}{
pop
1 add % next char
} ifelse
} loop
2 copy 0 exch getinterval
3 1 roll
1 add exit
} if
dup (f) eq { % false
pop
false exch
5 exit
} if
dup (t) eq { % true
pop
true exch
4 exit
} if
dup (n) eq { % null
pop
null exch
4 exit
} if
{ % number
pop
0 {
2 copy exch length eq { exit } if
2 copy 1 getinterval
//REALNUMBER exch search {
pop pop pop
}{
pop exit
} ifelse
1 add
} loop
2 copy 0 exch getinterval
cvx exec
3 1 roll
exit
} exec
} loop
_jsonskip
} loop
counttomark 0 eq { null } if % null if empty
exch pop
} bind def
% translate string (char ints) to BMP PUA if symbol font
% <char array> <mincid> symboloffset <cid array>
/symboloffset {
16#F000 and dup 16#E000 ge { % offset is 16#E000 or 16#F000
exch
0 1 2 index length 1 sub {
2 copy get
dup dup 5 index lt exch 16#20 ge and { % < PUA, > ctrl codes
3 index add 2 index 3 1 roll put
}{
pop pop
} ifelse
} for
exch
} if
pop
} bind def