-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathStdioFileEx.cpp
1121 lines (963 loc) · 39.4 KB
/
StdioFileEx.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// StdioFileEx.cpp: implementation of the CStdioFileEx class.
//
// Version 1.1 23 August 2003. Incorporated fixes from Dennis Jeryd.
// Version 1.3 19 February 2005. Incorporated fixes from Howard J Oh and some of my own.
// Version 1.4 26 February 2005. Fixed stupid screw-up in code from 1.3.
// Version 1.5 18 November 2005. - Incorporated fixes from Andy Goodwin.
// - Allows code page to be specified for reading/writing
// - Properly calculates multibyte buffer size instead of
// assuming lstrlen(s).
// - Should handle UTF8 properly.
// Version 1.6 19 July 2007. - ReadString incorrectly removed \r or \n characters
// immediately preceding line breaks.
// Fixed tab problem in these comments! (Perry).
// Made GetMultiByteStringFromUnicodeString input string const
// (Perry).
// Avoided double conversion if code page not set.
// (Konrad Windszus).
// Fixed ASSERT in GetUnicodeStringFromMultiByteString
// (Konrad Windszus).
// Maximum line length restriction removed. Lines of any length
// can now be read thanks to C.B. Falconer's fggets (fgoodgets),
// ably assisted by Ana Sayfa and Dave Kondrad.
// Substantial code reorganisation and tidying.
// Use of strlen/lstrlen eliminated. Conversion functions always used
// to calculate required buffers.
// Serious, systematic tests are now included with the code.
// Options included to switch off BOM writing and alter the Unicode
// filler char.
// BOM is only stripped off if actually there.
// UTF-8 BOM is now read and written. UTF-8 conversion works.
//
// Copyright David Pritchard 2003-2007. [email protected]
//
// You can use this class freely, but please keep my ego happy
// by leaving this comment in place.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "StdioFileEx.h"
#include "ggets.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
const unsigned char UTF8_BOM[] = { unsigned char(0xEF), unsigned char(0xBB), unsigned char(0xBF) };
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
// Add this flag to write in Unicode. For the moment, out of range of all the Visual Studio 2005 flags
/*static*/ const UINT CStdioFileEx::modeWriteUnicode = 0x200000;
CStdioFileEx::CStdioFileEx():
m_bCheckFilePos(true),
m_bIsUnicodeText(false),
m_nFileCodePage(-1),
m_cUnicodeFillerChar(sDEFAULT_UNICODE_FILLER_CHAR),
m_bWriteBOM(true), // By default, write the BOM
CStdioFile()
{
}
CStdioFileEx::CStdioFileEx(LPCTSTR lpszFileName,UINT nOpenFlags):
m_bCheckFilePos(true),
m_bIsUnicodeText(false),
m_nFileCodePage(-1),
m_cUnicodeFillerChar(sDEFAULT_UNICODE_FILLER_CHAR),
m_bWriteBOM(true), // By default, write the BOM
CStdioFile(lpszFileName, nOpenFlags)
{
}
// Set the code page for reading/writing
void CStdioFileEx::SetCodePage(IN const UINT nCodePage)
{
m_nFileCodePage = (int)nCodePage;
}
// Set the Unicode filler char - the char written when no conversion is possible for the target multibyte char set
void CStdioFileEx::SetFillerChar(IN const char cFiller)
{
m_cUnicodeFillerChar = cFiller;
}
// Determines whether the byte-order-mark is written at the start of a Unicode file
void CStdioFileEx::SetWriteBOM(IN const bool bWrite)
{
m_bWriteBOM = bWrite;
}
// Determines whether we try to interpret this file as Unicode
//void CStdioFileEx::SetUnicode(IN const bool bIsUnicode)
//{
// m_bIsUnicodeText = bIsUnicode;
//}
BOOL CStdioFileEx::Open(LPCTSTR lpszFileName,UINT nOpenFlags,CFileException* pError /*=NULL*/)
{
// Process any Unicode stuff. This no longer checks for the Unicode BOM. We do this on
// opening for efficiency.
ProcessFlags(nOpenFlags);
BOOL bOK = CStdioFile::Open(lpszFileName, nOpenFlags, pError);
if (bOK)
{
// If we are reading, see if it has a BOM. I tried making the Unicode-ness independent of the BOM (i.e. allowed the file to
// be identified as Unicode by the caller, with the BOM just being used as a check, or thrown away).
// But for some reason it wouldn't work. I'll no doubt try again at some point.
// if (!(nOpenFlags & CFile::modeCreate) && (nOpenFlags & CFile::modeRead || nOpenFlags & CFile::modeReadWrite))
if (!(nOpenFlags & CFile::modeCreate) && !(nOpenFlags & CFile::modeWrite ))
{
wchar_t cBOMTest;
wchar_t cBOM = nUNICODE_BOM;
Read(&cBOMTest, sizeof(wchar_t));
// If the first characters are NOT a BOM, reset to start of file
m_bIsUnicodeText = (wmemcmp(&cBOMTest, &cBOM, 1) == 0);
// Reset to start of file
SeekToBegin();
m_bCheckFilePos = true;
}
}
return bOK;
}
BOOL CStdioFileEx::ReadString(CString& rString)
{
ASSERT(m_pStream != NULL);
BOOL bReadData = FALSE;
LPTSTR lpsz;
int nLen = 0;
// If at position 0, discard byte-order mark before reading. To optimise reading, we only
// check this when the m_bCheckFilePos is set (this avoids a call to ftell every time we
// read a line)
if (m_bCheckFilePos && GetPosition() == 0)
{
// m_bReadBOM = false;
// Look for Unicode BOM
if (m_bIsUnicodeText)
{
wchar_t cBOMDummy;
// wchar_t cBOM = nUNICODE_BOM;
Read(&cBOMDummy, sizeof(wchar_t));
// // If the first characters are NOT a BOM, reset to start of file
// if (wmemcmp(&cBOMTest, &cBOM, 1) != 0)
// {
// SeekToBegin();
// ASSERT(GetPosition() == 0);
// }
// else
// {
// // Set read BOM flag
// m_bReadBOM = true;
// }
}
// Look for UTF8 BOM
else if (CP_UTF8 == m_nFileCodePage)
{
BYTE arrUTF8BOMTest[sizeof(UTF8_BOM)];
Read(arrUTF8BOMTest, sizeof(arrUTF8BOMTest));
// // If the first characters are NOT a BOM, reset to start of file
// if (memcmp(&arrUTF8BOMTest, UTF8_BOM, sizeof(arrUTF8BOMTest)) != 0)
// {
// SeekToBegin();
// ASSERT(GetPosition() == 0);
// }
// else
// {
// // Set read BOM flag
// m_bReadBOM = true;
// }
}
}
// Read Unicode line or multibyte line (implementations
// differ depending on the compilation)
if (m_bIsUnicodeText)
{
bReadData = ReadUnicodeLine(rString);
}
else
{
bReadData = ReadMultiByteLine(rString);
}
// Then remove end-of-line character as necessary.
// fggets keeps the end-of-line confusion level at maximum by stripping the \n
// from the end of lines, but leaving the \r. Grrrr.
// Remember that you could quite legitimately have a \r or \n at the end of
// your line before the actual \r\n line break.
if (bReadData)
{
// Copied from FileTxt.cpp but adapted to use of fgets
nLen = rString.GetLength();
lpsz = rString.GetBuffer(0);
// Strip \r from the end
if (nLen != 0 && (lpsz[nLen-1] == _T('\r') ))
{
rString.GetBufferSetLength(nLen-1);
}
rString.ReleaseBuffer();
// Now we've moved on in the file, don't bother to check any more unless the
// file pointer is moved
m_bCheckFilePos = false;
}
return bReadData;
}
/*virtual*/ LPTSTR CStdioFileEx::ReadString(LPTSTR lpsz,UINT nMax)
{
// Can't really provide compatible behaviour at the moment, so no Unicode handling.
ASSERT(false);
return CStdioFile::ReadString(lpsz, nMax);
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::WriteString()
//
// --------------------------------------------------------------------------------------------
// Returns: void
// Parameters: LPCTSTR lpsz
//
// Purpose: Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
// CStdioFileEx::modeWriteUnicode flag. Override of base class function.
// Notes: If writing in Unicode we need to:
// a) Write the Byte-order-mark at the beginning of the file
// b) Write all strings in byte-mode
// - If we were compiled in Unicode, we need to convert Unicode to multibyte if
// we want to write in multibyte
// - If we were compiled in multi-byte, we need to convert multibyte to Unicode if
// we want to write in Unicode.
// Exceptions: None.
//
void CStdioFileEx::WriteString(LPCTSTR lpsz)
{
// Write Byte Order Mark if needed
if (m_bWriteBOM && (!m_pStream || GetPosition() == 0))
{
// If writing Unicode and at the start of the file, need to write byte mark
if (m_nFlags & CStdioFileEx::modeWriteUnicode)
{
// If at position 0, write byte-order mark before writing anything else
wchar_t cBOM = (wchar_t)nUNICODE_BOM;
CFile::Write(&cBOM, sizeof(wchar_t));
}
// Fix by philfar
// http://www.codeproject.com/file/stdiofileex.asp?forumid=15472&select=1805024&df=100#xx1805024xx
// otherwise, if we are writing UTF-8 and at the start of the file, need to write UTF-8 byte mark
else if (m_nFileCodePage == CP_UTF8)
{
// If at position 0, write byte-order mark before writing anything else
CFile::Write(UTF8_BOM, sizeof(UTF8_BOM));
}
}
// If writing Unicode...
if (m_nFlags & CStdioFileEx::modeWriteUnicode)
{
WriteUnicodeLine(lpsz);
}
// Else write multibyte/ANSI
else
{
WriteMultiByteLine(lpsz);
}
}
UINT CStdioFileEx::ProcessFlags(UINT& nOpenFlags)
{
m_bIsUnicodeText = false;
// If we have writeUnicode we must have write or writeRead as well
#ifdef _DEBUG
if (nOpenFlags & CStdioFileEx::modeWriteUnicode)
{
ASSERT(nOpenFlags & CFile::modeWrite || nOpenFlags & CFile::modeReadWrite);
}
#endif
// Konrad Windszus 05/04/2006: handle case of writing to Unicode (obviously always Unicode)
if (nOpenFlags & CStdioFileEx::modeWriteUnicode)
{
m_bIsUnicodeText = true;
}
// Always use binary mode, for any type of writing
if (nOpenFlags & CFile::typeText)
{
nOpenFlags ^= CFile::typeText;
}
nOpenFlags |= CFile::typeBinary;
m_nFlags = nOpenFlags;
return nOpenFlags;
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::IsFileUnicode()
//
// --------------------------------------------------------------------------------------------
// Returns: bool
// Parameters: const CString& sFilePath
//
// Purpose: Determines whether a file is Unicode by reading the first character and detecting
// whether it's the Unicode byte marker.
// Notes: None.
// Exceptions: None.
//
/*static*/ bool CStdioFileEx::IsFileUnicode(const CString& sFilePath)
{
CFile file;
bool bIsUnicode = false;
wchar_t cFirstChar;
CFileException exFile;
// Open file in binary mode and read first character
if (file.Open(sFilePath, CFile::typeBinary | CFile::modeRead, &exFile))
{
// If byte is Unicode byte-order marker, let's say it's Unicode
if (file.Read(&cFirstChar, sizeof(wchar_t)) > 0 && cFirstChar == (wchar_t)nUNICODE_BOM)
{
bIsUnicode = true;
}
file.Close();
}
else
{
// Handle error here if you like
}
return bIsUnicode;
}
// Rough character count. Likely to be way out for multibyte files using non-western
// code pages. Anybody got a better method?
STDIOEXLONG CStdioFileEx::GetCharCount()
{
int nCharSize;
ULONGLONG nByteCount, nCharCount = 0;
if (m_pStream)
{
// Get size of chars in file
nCharSize = m_bIsUnicodeText ? sizeof(wchar_t): sizeof(char);
// If Unicode, remove byte order mark from count
nByteCount = GetLength();
if (m_bIsUnicodeText)
{
nByteCount = nByteCount - sizeof(wchar_t);
}
// Calc chars
nCharCount = (nByteCount / nCharSize);
}
return nCharCount;
}
/*virtual*/ STDIOEXLONG CStdioFileEx::Seek(LONGLONG lOff,UINT nFrom)
{
// Note that we should check the file position on the next read
m_bCheckFilePos = true;
return CStdioFile::Seek(lOff, nFrom);
}
// Get the current user´s code page
UINT CStdioFileEx::GetCurrentLocaleCodePage()
{
_TCHAR szLocalCodePage[10];
UINT nLocaleCodePage = 0;
int nLocaleChars = ::GetLocaleInfo(LOCALE_USER_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, szLocalCodePage, 10);
// If OK
if (nLocaleChars > 0)
{
nLocaleCodePage = (UINT)_ttoi(szLocalCodePage);
ASSERT(nLocaleCodePage > 0);
}
else
{
ASSERT(false);
}
// O means either: no ANSI code page (Unicode-only locale?) or failed to get locale
// In the case of Unicode-only locales, what do multibyte apps do? Answers on a postcard.
return nLocaleCodePage;
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetUnicodeStringFromMultiByteString()
//
// --------------------------------------------------------------------------------------------
// Returns: int - number of chars written
// Parameters: LPCSTR szMultiByteString (IN) Multi-byte input string
// wchar_t* szUnicodeString (OUT) Unicode outputstring
// size_t nUnicodeBufferSize (IN) Size of Unicode output buffer in chars
// UINT nCodePage (IN) Code page used to perform conversion
// Default = CP_ACP (Get local code page).
//
// Purpose: Gets a Unicode string from a MultiByte string.
// Notes: None.
// Exceptions: None.
//
int CStdioFileEx::GetUnicodeStringFromMultiByteString(IN LPCSTR szMultiByteString, OUT wchar_t* szUnicodeString, IN const size_t nUnicodeBufferSize, IN UINT nCodePage)
{
int nCharsWritten = 0;
if (szUnicodeString && szMultiByteString)
{
// If no code page specified, take default for system
if (nCodePage == (UINT)-1)
{
nCodePage = GetACP();
}
try
{
// Zero out buffer first
memset((void*)szUnicodeString, '\0', sizeof(wchar_t) * nUnicodeBufferSize);
// When converting to UTF8, don't set any flags (see Q175392).
nCharsWritten = MultiByteToWideChar((UINT)nCodePage,(nCodePage==CP_UTF8 ?
0:MB_PRECOMPOSED),szMultiByteString,-1,szUnicodeString, (int)nUnicodeBufferSize);
}
catch(...)
{
// Level 4 compile says this is unreachable code in VS2005. I guess MultiByteToWideChar doesn't throw exceptions.
// TRACE(_T("Controlled exception in MultiByteToWideChar!\n"));
}
}
else
{
ASSERT(false);
}
// Konrad Windszus 29/3/2006: ASSERT in wrong place!
// Should have at least the terminator (right?)
ASSERT(nCharsWritten > 0);
// Now fix nCharsWritten to exclude \0 terminator
if (nCharsWritten > 0)
{
nCharsWritten--;
}
return nCharsWritten;
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetRequiredUnicodeLengthFromMultiByteString()
//
// --------------------------------------------------------------------------------------------
// Returns: int - number of chars needed
// Parameters: LPCSTR szMultiByteString (IN) Multi-byte input string
// UINT nCodePage (IN) Code page of input string
// Default = CP_ACP (local code page).
//
// Purpose: Gets the length required, in wchar_t values (chars) to convert a MultiByte string to a Unicode string.
// Notes: None.
// Exceptions: None.
//
/*static*/ int CStdioFileEx::GetRequiredUnicodeLengthFromMultiByteString(IN LPCSTR szMultiByteString,IN UINT nCodePage /*=CP_ACP OPTIONAL*/)
{
int nCharsNeeded = 0;
if (szMultiByteString)
{
// If no code page specified, take default for system
if (nCodePage == (UINT)-1)
{
nCodePage = GetACP();
}
try
{
// When converting to UTF8, don't set any flags (see Q175392).
nCharsNeeded = MultiByteToWideChar((UINT)nCodePage,(nCodePage==CP_UTF8 ?
0:MB_PRECOMPOSED),
szMultiByteString,
-1, // Get the function to work out the length
NULL, // No output buffer needed
0 // Ask to be told how many chars we need (includes space for terminator)
);
}
catch(...)
{
// Level 4 compile says this is unreachable code in VS2005. I guess MultiByteToWideChar doesn't throw exceptions.
// TRACE(_T("Controlled exception in MultiByteToWideChar!\n"));
}
}
else
{
ASSERT(false);
}
// Konrad Windszus 29/3/2006: ASSERT in wrong place!
// Should have at least the terminator (right?). The value returned by MultiByteToWideChar includes the terminator.
ASSERT(nCharsNeeded > 0);
return nCharsNeeded;
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetNewUnicodeStringFromMultiByteString()
//
// --------------------------------------------------------------------------------------------
// Returns: int - number of chars written (0 means error)
// Parameters: LPCSTR szMultiByteString (IN) Multi-byte input string
// CTemplateSmartPtrArray<wchar_t>&
// spUnicodeString (IN/OUT) Smart pointer containing default buffer (or NULL)
// on input, and pointing to buffer used for conversion
// on output. A newly allocated buffer will be automatically
// deleted when the smart ptr object is destroyed.
// This allows a default buffer to be declared and used for
// most strings. Dynamic allocation is only performed when
// the default buffer would not be large enough.
// int nDefaultBufferSize (IN) Size of default buffer in smart ptr (may be 0).
// UINT nCodePage (IN) Code page used to perform conversion
// Default = CP_ACP (Get local code page).
//
// Purpose: Gets a Unicode string from a MultiByte string. Calculates the buffer for you and
// allocates it with "new".
// Notes: It's better to ask this function to allocate the buffer for you, because it will
// calculate the correct size. If we just take the number of bytes from the multibyte
// string as the size, we won't be in danger of allocating too little memory, but we
// may well allocate too much.
//
// The use of a smart ptr array combines this flexibility with efficiency. A default buffer can be passed in
// and used wherever is it sufficient to contain the output string. This avoids lots of unnecessary "new"s and
// "delete"s when reading or writing large files.
// Exceptions: None.
//
/*static*/ int CStdioFileEx::GetNewUnicodeStringFromMultiByteString(IN LPCSTR szMultiByteString,IN OUT CTemplateSmartPtrArray<wchar_t>& spUnicodeString, IN const int nDefaultBufferSize/*=0*/,IN UINT nCodePage /*=CP_ACP OPTIONAL*/)
{
int nActualBufferSize = nDefaultBufferSize;
// Calculate the required buffer size and allocate
int nUnicodeBufferSizeChars = GetRequiredUnicodeLengthFromMultiByteString(szMultiByteString, nCodePage);
// If we have enough in the default buffer, don't bother to allocate
if (nUnicodeBufferSizeChars > nDefaultBufferSize)
{
//pszUnicodeString = new wchar_t[nUnicodeBufferSizeChars];
// The new buffer will be marked as "owned" by the smart ptr by default, and therefore automatically deleted by the smart ptr
spUnicodeString.Assign(new wchar_t[nUnicodeBufferSizeChars]);
nActualBufferSize = nUnicodeBufferSizeChars;
}
// Call standard
return GetUnicodeStringFromMultiByteString(szMultiByteString, spUnicodeString.GetBuffer(), nActualBufferSize, nCodePage);
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetMultiByteStringFromUnicodeString()
//
// --------------------------------------------------------------------------------------------
// Returns: int - number of chars written. 0 if error.
// Parameters: wchar_t * szUnicodeString (IN) Unicode input string
// char* szMultiByteString (OUT) Multibyte output string
// int nMultiByteBufferSize (IN) Multibyte buffer size
// UINT nCodePage (IN) Code page used to perform conversion
// Default = CP_ACP (Get local code page).
// char cFillerChar (IN) Unicode-to-multibyte filler char
// Default = #
//
// Purpose: Gets a MultiByte string from a Unicode string.
// Notes: It's better to ask this function to allocate the buffer for you, because it will
// calculate the correct size. Multibyte code pages will require larger buffers than
// the normal Western code pages, so we can't just say new char[numchars]!
// Exceptions: None.
//
int CStdioFileEx::GetMultiByteStringFromUnicodeString(IN const wchar_t * szUnicodeString, OUT char* szMultiByteString,
IN const int nMultiByteBufferSize, IN UINT nCodePage/*=CP_ACP OPTIONAL*/,
IN char cFillerChar/*=sDEFAULT_UNICODE_FILLER_CHAR OPTIONAL*/)
{
BOOL bUsedDefChar = FALSE;
int nBytesWritten = 0;
// Fix by Andy Goodwin: don't do anything if buffer is 0
if ( nMultiByteBufferSize > 0 )
{
if (szUnicodeString && szMultiByteString)
{
// Zero out buffer first
memset((void*)szMultiByteString, '\0', sizeof(char) * nMultiByteBufferSize);
// If no code page specified, take default for system
if (nCodePage == (UINT)-1)
{
nCodePage = (UINT)GetACP();
}
try
{
// If writing to UTF8, flags, default char and boolean flag must be NULL
nBytesWritten = WideCharToMultiByte((UINT)nCodePage,
(nCodePage==CP_UTF8 ? 0 : WC_COMPOSITECHECK | WC_SEPCHARS), // Flags
szUnicodeString,-1,
szMultiByteString,
nMultiByteBufferSize,
(nCodePage==CP_UTF8 ? NULL: &cFillerChar), // Filler char
(nCodePage==CP_UTF8 ? NULL: &bUsedDefChar)); // Did we use filler char?
// If no chars were written and the buffer is not 0, error!
if (nBytesWritten == 0 && nMultiByteBufferSize > 0)
{
TRACE1("Error in WideCharToMultiByte: %d\n", ::GetLastError());
}
}
catch(...)
{
TRACE0("Controlled exception in WideCharToMultiByte!\n");
}
}
}
// Now fix nCharsWritten to exclude \0 terminator
if (nBytesWritten > 0)
{
nBytesWritten = nBytesWritten - sizeof('\0');
}
return nBytesWritten;
}
//---------------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetRequiredMultiByteLengthForUnicodeString()
//
//---------------------------------------------------------------------------------------------------
// Returns: int - no of bytes required
// Parameters: wchar_t * szUnicodeString (IN) String to convert
// UINT nCodePage=CP_ACP (IN) Code page to which to convert
//
// Purpose: Obtains the multi-byte buffer size (in bytes) needed to accommodate a converted Unicode string.
// Notes: We can't assume that the buffer length is simply equal to the number of characters
// because that wouldn't accommodate multibyte characters!
//
/*static*/ int CStdioFileEx::GetRequiredMultiByteLengthForUnicodeString(IN const wchar_t * szUnicodeString,IN UINT nCodePage /*=CP_ACP OPTIONAL*/)
{
int nBytesNeeded = 0;
try
{
// If no code page specified, take default for system
if (nCodePage == -1)
{
nCodePage = GetACP();
}
// If writing to UTF8, flags, default char and boolean flag must be NULL
nBytesNeeded = WideCharToMultiByte((UINT)nCodePage,
(nCodePage==CP_UTF8 ? 0 : WC_COMPOSITECHECK | WC_SEPCHARS), // Flags
szUnicodeString,-1,
NULL,
0, // Calculate required buffer, please! (Includes space for terminator)
NULL, // Filler char doesn't matter here
NULL);
}
catch(...)
{
// TRACE(_T("Controlled exception in WideCharToMultiByte!\n"));
// Gives us "unreachable code" error compiling on level 4
}
return nBytesNeeded;
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetNewMultiByteStringFromUnicodeString()
//
// --------------------------------------------------------------------------------------------
// Returns: int - number of chars written. 0 if error.
// Parameters: wchar_t * szUnicodeString (IN) Unicode input string
// CTemplateSmartPtrArray<char>&
// spMultiByteString (IN/OUT) Smart pointer containing default buffer (or NULL)
// on input, and pointing to buffer used for conversion
// on output. A newly allocated buffer will be automatically
// deleted when the smart ptr object is destroyed.
// This allows a default buffer to be declared and used for
// most strings. Dynamic allocation is only performed when
// the default buffer would not be large enough.
// int nDefaultBufferSize (IN) Size of default buffer in smart ptr (may be 0).
// UINT nCodePage (IN) Code page used to perform conversion
// Default = CP_ACP (Get local code page).
// char cFillerChar (IN) Unicode-to-multibyte filler char
// Default = #
//
// Purpose: Gets a MultiByte string from a Unicode string. Calculates the buffer for you and
// allocates it with new.
// Notes: It's better to ask this function to allocate the buffer for you, because it will
// calculate the correct size. Multibyte code pages will require larger buffers than
// the normal Western code pages, so we can't just say new char[numchars]!
//
// The use of a smart ptr array combines this flexibility with efficiency. A default buffer can be passed in
// and used wherever is it sufficient to contain the output string. This avoids lots of unnecessary "new"s and
// "delete"s when reading or writing large files.
// Exceptions: None.
//
/*static*/ int CStdioFileEx::GetNewMultiByteStringFromUnicodeString(IN const wchar_t * szUnicodeString,IN OUT CTemplateSmartPtrArray<char>& spMultiByteString, IN const int nDefaultBufferSize/*=0*/,
IN UINT nCodePage /*=CP_ACP OPTIONAL*/, IN char cFillerChar/*=sDEFAULT_UNICODE_FILLER_CHAR OPTIONAL*/)
{
int nActualBufferSize = nDefaultBufferSize;
// Calculate the required buffer size and allocate
int nMultibyteBufferSizeBytes = GetRequiredMultiByteLengthForUnicodeString(szUnicodeString, nCodePage);
// If we have enough in the default buffer, don't bother to allocate
if (nMultibyteBufferSizeBytes > nDefaultBufferSize)
{
// The new buffer will be marked as "owned" by the smart ptr by default, and therefore automatically deleted by the smart ptr
spMultiByteString.Assign(new char[nMultibyteBufferSizeBytes]);
nActualBufferSize = nMultibyteBufferSizeBytes;
}
// Call standard
return GetMultiByteStringFromUnicodeString(szUnicodeString, spMultiByteString.GetBuffer(), nActualBufferSize, nCodePage, cFillerChar);
}
//---------------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetNewUTF8StringFromUnicodeString()
//
//---------------------------------------------------------------------------------------------------
// Returns: bool - true if successful, false if it fails.
// Parameters: const wchar_t* szUnicodeString (IN) Input Unicode string
// unsigned char*& pszUTF8String (OUT) Receives a ptr. If the function returns
// successfully.
// the ptr points to the output string
//
// Purpose: Does conversion from Unicode to UTF8. Allocates memory for the output string
// Notes: Culled from http://www.bytemycode.com/snippets/snippet/438/
// Contributed by Dean
//
/*static*/ /*bool CStdioFileEx::GetNewUTF8StringFromUnicodeString( IN const wchar_t* szUnicodeString, OUT unsigned char*& pszUTF8String )
{
bool bConvertedOK = false;
const wchar_t* w;
// Calculate length needed for output string, taking account
// of the variable number of bytes needed to represent each
// Unicode character in UTF8
int len = 0;
for ( w = szUnicodeString; *w; w++ )
{
if ( *w < 0×0080 ) len++;
else if ( *w < 0×0800 ) len += 2;
else len += 3;
}
//unsigned char* szOut = ( unsigned char* )malloc( len+1 );
pszUTF8String = new char[len+1];
if ( pszUTF8String != NULL )
{
int i = 0;
for ( w = szUnicodeString; *w; w++ )
{
// Handle ASCII chars
if ( *w < 0×0080 )
{
pszUTF8String[i++] = ( char ) *w;
}
else if ( *w < 0×0800 )
{
pszUTF8String[i++] = 0xc0 | (( *w ) >> 6 );
pszUTF8String[i++] = 0×80 | (( *w ) & 0×3f );
}
else
{
pszUTF8String[i++] = 0xe0 | (( *w ) >> 12 );
pszUTF8String[i++] = 0×80 | (( ( *w ) >> 6 ) & 0×3f );
pszUTF8String[i++] = 0×80 | (( *w ) & 0×3f );
}
}
pszUTF8String[ i ] = \0;
bConvertedOK = true;
}
//return ( char* )szOut;
return bConvertedOK;
}*/
/**********************************************************************************/
/* Reading */
/**********************************************************************************/
#ifdef _UNICODE
// Read Unicode in Unicode compilation
BOOL CStdioFileEx::ReadUnicodeLine(OUT CString& sOutputLine)
{
BOOL bReadData = FALSE;
wchar_t* pszUnicodeString = (wchar_t*)&m_arrUnicodeDefaultBuffer;
bool bNeedToDelete = false;
try
{
bReadData = (FGGETS_OK == fggets<wchar_t>(&pszUnicodeString, m_pStream, bNeedToDelete, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE));
if (bReadData)
{
sOutputLine = (CString)pszUnicodeString;
}
else
{
sOutputLine.Empty();
}
}
// Ensure we always clean up, no matter what
FINALLY( if (bNeedToDelete) DELETE_SAFE_ARRAY(pszUnicodeString); )
return bReadData;
}
// Read Multibyte in Unicode compilation
BOOL CStdioFileEx::ReadMultiByteLine(OUT CString& sOutputLine)
{
BOOL bReadData = FALSE;
char * pszMultiByteString = (char*)&m_arrMultibyteDefaultBuffer;;
int nChars = 0;
bool bNeedToDelete = false;
try
{
// Read the string -- the function dynamically allocates the necessary memory according
// to the line length
bReadData = (FGGETS_OK == fggets<char>(&pszMultiByteString, m_pStream, bNeedToDelete, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE));
if (bReadData)
{
// Assign default buffer to take care of 99% of cases. Not owned by smart ptr array, so won't be deleted
CTemplateSmartPtrArray<wchar_t> spUnicodeString((wchar_t*)&m_arrUnicodeDefaultBuffer, bTEMPLATESMARTPTR_NOTOWNED);
// Use all-in-one allocation and conversion function. Avoid _mbslen and such like since they're unreliable
// with UTF8
nChars = GetNewUnicodeStringFromMultiByteString(pszMultiByteString, spUnicodeString, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE, m_nFileCodePage);
if (nChars > 0)
{
sOutputLine = (CString)spUnicodeString.GetBuffer();
}
}
// Empty the string if we failed to read anything
if (nChars == 0)
{
sOutputLine.Empty();
}
}
// Ensure we always clean up, no matter what
FINALLY( if (bNeedToDelete) DELETE_SAFE_ARRAY(pszMultiByteString); )
return bReadData;
}
#else
// Read Unicode in Multibyte compilation
BOOL CStdioFileEx::ReadUnicodeLine(OUT CString& sOutputLine)
{
BOOL bReadData = FALSE;
wchar_t* pszUnicodeString = (wchar_t*)&m_arrUnicodeDefaultBuffer;
bool bNeedToDelete = false;
int nChars = 0;
try
{
bReadData = (FGGETS_OK == fggets<wchar_t>(&pszUnicodeString, m_pStream, bNeedToDelete, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE));
if (bReadData)
{
// Assign default buffer to take care of 99% of cases. Not owned by smart ptr array, so won't be deleted
CTemplateSmartPtrArray<char> spMultiByteString((char*)&m_arrMultibyteDefaultBuffer, bTEMPLATESMARTPTR_NOTOWNED);
// Call all-in-one function to calculate required buffer size and allocate
nChars = GetNewMultiByteStringFromUnicodeString(pszUnicodeString, spMultiByteString, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE, m_nFileCodePage, m_cUnicodeFillerChar);
if (nChars > 0)
{
sOutputLine = (CString)spMultiByteString.GetBuffer();
}
}
// Empty the string if we failed to read anything
if (nChars == 0)
{
sOutputLine.Empty();
}
}
// Ensure we always clean up, no matter what
FINALLY( if (bNeedToDelete) DELETE_SAFE_ARRAY(pszUnicodeString); )
return bReadData;
}
// Read Multibyte in Multibyte compilation
BOOL CStdioFileEx::ReadMultiByteLine(OUT CString& sOutputLine)
{
BOOL bReadData = FALSE;
char * pszMultiByteString = (char*)&m_arrMultibyteDefaultBuffer;;
int nChars = 0;
UINT nLocaleCodePage = 0;
bool bNeedToDelete = false;
try
{
// Read multibyte from file
bReadData = (FGGETS_OK == fggets<char>(&pszMultiByteString, m_pStream, bNeedToDelete, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE));
if (bReadData)
{
// Convert to CString
sOutputLine = (CString)pszMultiByteString;
// Now see if we've got to convert to another code page. Get the current code page
nLocaleCodePage = GetCurrentLocaleCodePage();
// If we got it OK...
if (nLocaleCodePage > 0)
{
// If file code page does not match the system code page (and we have a code page!),
// we need to do a double conversion!
// Konrad Windszus 29/3/2006: Do nothing if we haven't set a code page
if (m_nFileCodePage > 0 && nLocaleCodePage !=(UINT)m_nFileCodePage)
{
// Assign default buffer to take care of 99% of cases. Not owned by smart ptr array, so won't be deleted
CTemplateSmartPtrArray<wchar_t> spUnicodeString((wchar_t*)&m_arrUnicodeDefaultBuffer, bTEMPLATESMARTPTR_NOTOWNED);
// Use all-in-one allocation and conversion function. Avoid _mbslen and such like since they're unreliable
// with UTF8
nChars = GetNewUnicodeStringFromMultiByteString(sOutputLine, spUnicodeString, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE, m_nFileCodePage);
// Convert back to multibyte using the system code page
// (This doesn't really confer huge advantages except to avoid "mangling" of non-convertible special
// characters. So, if a file in the E.European code page is displayed on a system using the
// western European code page, special accented characters which the system cannot display will be
// replaced by the default character (a hash or something), rather than being incorrectly mapped to
// other, western European accented characters).
if (nChars > 0)
{
// Assign default buffer to take care of 99% of cases. Not owned by smart ptr array, so won't be deleted
CTemplateSmartPtrArray<char> spMultiByteString((char*)&m_arrMultibyteDefaultBuffer, bTEMPLATESMARTPTR_NOTOWNED);
// Call all-in-one function to calculate required buffer size and allocate
nChars = GetNewMultiByteStringFromUnicodeString(spUnicodeString.GetBuffer(), spMultiByteString, nSTDIOFILEEX_DEFAULT_BUFFER_SIZE, nLocaleCodePage, m_cUnicodeFillerChar);
sOutputLine = (CString)spMultiByteString.GetBuffer();
}
}
}
}
// Empty the string if we failed to read anything
if (!bReadData)
{