Skip to content

Commit

Permalink
bug fix issue pascalabcnet#30
Browse files Browse the repository at this point in the history
  • Loading branch information
ibond84 committed Dec 29, 2015
1 parent 8826c33 commit 2abf92d
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 19 deletions.
154 changes: 154 additions & 0 deletions ParserTools/FileReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// <file>
// <copyright see="prj:///doc/copyright.txt"/>
// <license see="prj:///doc/license.txt"/>
// <owner name="Daniel Grunwald" email="[email protected]"/>
// <version>$Revision: 2682 $</version>
// </file>

using System;
using System.IO;
using System.Text;

namespace PascalABCCompiler
{
/// <summary>
/// Class that can open text files with auto-detection of the encoding.
/// </summary>
public static class FileReader
{
public static bool IsUnicode(Encoding encoding)
{
int codepage = encoding.CodePage;
// return true if codepage is any UTF codepage
return codepage == 65001 || codepage == 65000 || codepage == 1200 || codepage == 1201;
}

public static string ReadFileContent(Stream fs, ref Encoding encoding)
{
using (StreamReader reader = OpenStream(fs, encoding)) {
reader.Peek();
encoding = reader.CurrentEncoding;
return reader.ReadToEnd();
}
}

public static string ReadFileContent(string fileName, Encoding encoding)
{
if (encoding == null)
encoding = System.Text.Encoding.GetEncoding(1251);
using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
return ReadFileContent(fs, ref encoding);
}
}

public static StreamReader OpenStream(Stream fs, Encoding defaultEncoding)
{
if (fs == null)
throw new ArgumentNullException("fs");

if (fs.Length >= 2) {
// the autodetection of StreamReader is not capable of detecting the difference
// between ISO-8859-1 and UTF-8 without BOM.
int firstByte = fs.ReadByte();
int secondByte = fs.ReadByte();
switch ((firstByte << 8) | secondByte) {
case 0x0000: // either UTF-32 Big Endian or a binary file; use StreamReader
case 0xfffe: // Unicode BOM (UTF-16 LE or UTF-32 LE)
case 0xfeff: // UTF-16 BE BOM
case 0xefbb: // start of UTF-8 BOM
// StreamReader autodetection works
fs.Position = 0;
return new StreamReader(fs);
default:
return AutoDetect(fs, (byte)firstByte, (byte)secondByte, defaultEncoding);
}
} else {
if (defaultEncoding != null) {
return new StreamReader(fs, defaultEncoding);
} else {
return new StreamReader(fs);
}
}
}

static StreamReader AutoDetect(Stream fs, byte firstByte, byte secondByte, Encoding defaultEncoding)
{
int max = (int)Math.Min(fs.Length, 500000); // look at max. 500 KB
const int ASCII = 0;
const int Error = 1;
const int UTF8 = 2;
const int UTF8Sequence = 3;
int state = ASCII;
int sequenceLength = 0;
byte b;
for (int i = 0; i < max; i++) {
if (i == 0) {
b = firstByte;
} else if (i == 1) {
b = secondByte;
} else {
b = (byte)fs.ReadByte();
}
if (b < 0x80) {
// normal ASCII character
if (state == UTF8Sequence) {
state = Error;
break;
}
} else if (b < 0xc0) {
// 10xxxxxx : continues UTF8 byte sequence
if (state == UTF8Sequence) {
--sequenceLength;
if (sequenceLength < 0) {
state = Error;
break;
} else if (sequenceLength == 0) {
state = UTF8;
}
} else {
state = Error;
break;
}
} else if (b >= 0xc2 && b < 0xf5) {
// beginning of byte sequence
if (state == UTF8 || state == ASCII) {
state = UTF8Sequence;
if (b < 0xe0) {
sequenceLength = 1; // one more byte following
} else if (b < 0xf0) {
sequenceLength = 2; // two more bytes following
} else {
sequenceLength = 3; // three more bytes following
}
} else {
state = Error;
break;
}
} else {
// 0xc0, 0xc1, 0xf5 to 0xff are invalid in UTF-8 (see RFC 3629)
state = Error;
break;
}
}
fs.Position = 0;
switch (state) {
case ASCII:
case Error:
// when the file seems to be ASCII or non-UTF8,
// we read it using the user-specified encoding so it is saved again
// using that encoding.
if (IsUnicode(defaultEncoding)) {
// the file is not Unicode, so don't read it using Unicode even if the
// user has choosen Unicode as the default encoding.

// If we don't do this, SD will end up always adding a Byte Order Mark
// to ASCII files.
defaultEncoding = Encoding.Default; // use system encoding instead
}
return new StreamReader(fs, defaultEncoding);
default:
return new StreamReader(fs);
}
}
}
}
1 change: 1 addition & 0 deletions ParserTools/ParserTools.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
</Compile>
<Compile Include="Config\AssemblyInfo.cs" />
<Compile Include="DocumentationConstructor.cs" />
<Compile Include="FileReader.cs" />
<Compile Include="ParsersController.cs" />
<Compile Include="ParserTools\BaseParser.cs" />
<Compile Include="ParserTools\IParser.cs" />
Expand Down
5 changes: 3 additions & 2 deletions ParserTools/Tools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ public static object DefaultSourceFilesProvider(string FileName, SourceFileOpera
{
case SourceFileOperation.GetText:
if (!File.Exists(FileName)) return null;
TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
/*TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
//TextReader tr = new StreamReader(FileName, System.Text.Encoding.);
string Text = tr.ReadToEnd();
tr.Close();
tr.Close();*/
string Text = FileReader.ReadFileContent(FileName, null);
return Text;
case SourceFileOperation.Exists:
return File.Exists(FileName);
Expand Down
19 changes: 9 additions & 10 deletions TestSuite/CompilationSamples/PABCSystem.pas
Original file line number Diff line number Diff line change
Expand Up @@ -2971,16 +2971,6 @@ procedure string.operator*=(var left: string; n: integer);
left := sb.ToString;
end;

/// Возвращает инверсию строки
function string.Inverse(): string;
begin
var sb := new System.Text.StringBuilder(Self.Length);
for var i:= Self.Length downto 1 do
sb.Append(Self[i]);
Result := sb.ToString;
end;



//------------------------------------------------------------------------------
// Extension methods for BigInteger
Expand Down Expand Up @@ -7270,6 +7260,15 @@ function string.ToReals(): array of real;
Result := Self.ToWords().Select(s -> StrToFloat(s)).ToArray();
end;

/// Возвращает инверсию строки
function string.Inverse(): string;
begin
var sb := new System.Text.StringBuilder(Self.Length);
for var i:= Self.Length downto 1 do
sb.Append(Self[i]);
Result := sb.ToString;
end;

//------------------------------------------------------------------------------
function GetEXEFileName: string;
begin
Expand Down
12 changes: 12 additions & 0 deletions TestSuite/ansitest.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
begin
var arr: array['à'..'ÿ'] of integer;
arr['ä'] := 2;
assert(arr['ä'] = 2);
var c := 'ä';
var i := 0;
case c of
'ë': i := 3;
'ä': i := 5;
end;
assert(i = 5);
end.
13 changes: 13 additions & 0 deletions TestSuite/unicodetest.pas
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//üя
begin
var arr: array['а'..'я'] of integer;
arr['д'] := 2;
assert(arr['д'] = 2);
var c := 'д';
var i := 0;
case c of
'л': i := 3;
'д': i := 5;
end;
assert(i = 5);
end.
5 changes: 3 additions & 2 deletions VisualPascalABCNET/DS/VisualEnvironmentCompiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,10 @@ public object SourceFilesProvider(string FileName, PascalABCCompiler.SourceFileO
return ed.Document.TextContent;
if (!File.Exists(FileName))
return null;
TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
/*TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
string Text = tr.ReadToEnd();
tr.Close();
tr.Close();*/
string Text = PascalABCCompiler.FileReader.ReadFileContent(FileName, null);
return Text;
case PascalABCCompiler.SourceFileOperation.Exists:
if (tp != null)
Expand Down
5 changes: 3 additions & 2 deletions pabcnetc/CommandConsoleCompiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ public object SourceFilesProvider(string FileName, SourceFileOperation FileOpera
return text;
if (!File.Exists(FileName))
return null;
TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
/*TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
text = tr.ReadToEnd();
tr.Close();
tr.Close();*/
text = PascalABCCompiler.FileReader.ReadFileContent(FileName, null);
return text;
case SourceFileOperation.Exists:
if (SourceFiles.ContainsKey(fn))
Expand Down
2 changes: 1 addition & 1 deletion pabcnetc/ConsoleCompiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ public static void Reset()
DateTime ldt = DateTime.Now;
Compiler = new PascalABCCompiler.Compiler(null,ChangeCompilerState);
//GC.Collect();
WriteColorText(Compiler.Banner + "\nCopyright (c) 2005,2014 PascalABCCompiler Team\n", ConsoleColor.Black, ConsoleColor.Green);
WriteColorText(Compiler.Banner + "\nCopyright (c) 2005,2015 PascalABCCompiler Team\n", ConsoleColor.Black, ConsoleColor.Green);
Console.WriteLine("OK {0}ms", (DateTime.Now - ldt).TotalMilliseconds);
if (Compiler.SupportedSourceFiles.Length == 0)
WriteColorText(StringResourcesGet("ERROR_PARSERS_NOT_FOUND")+Environment.NewLine,ConsoleColor.Black,ConsoleColor.Red);
Expand Down
5 changes: 3 additions & 2 deletions pabcnetc_clear/CommandConsoleCompiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ public object SourceFilesProvider(string FileName, SourceFileOperation FileOpera
return text;
if (!File.Exists(FileName))
return null;
TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
/*TextReader tr = new StreamReader(FileName, System.Text.Encoding.GetEncoding(1251));
text = tr.ReadToEnd();
tr.Close();
tr.Close();*/
text = PascalABCCompiler.FileReader.ReadFileContent(FileName, null);
return text;
case SourceFileOperation.Exists:
if (SourceFiles.ContainsKey(fn))
Expand Down

0 comments on commit 2abf92d

Please sign in to comment.