From 53af842d038a52358647600abc91a824050435a8 Mon Sep 17 00:00:00 2001 From: Adrian Mos Date: Wed, 27 Jul 2022 00:14:51 +0300 Subject: [PATCH] Automatic code arrangement (#157) * Some automatic code arrangement, removed a ton of useless whitespace. * More whitespace removed. * Added editorconfig. * Unused usings and whitespace removal --- .editorconfig | 160 ++++++++++++++++++ UTF-unknown.sln | 18 +- src/CharsetDetector.cs | 27 ++- .../Analyzers/CharDistributionAnalyser.cs | 10 +- .../Chinese/BIG5DistributionAnalyser.cs | 16 +- .../Chinese/EUCTWDistributionAnalyser.cs | 10 +- .../Chinese/GB18030DistributionAnalyser.cs | 8 +- .../Japanese/EUCJPContextAnalyser.cs | 10 +- .../Japanese/EUCJPDistributionAnalyser.cs | 6 +- .../Japanese/JapaneseContextAnalyser.cs | 51 +++--- .../MultiByte/Japanese/SJISContextAnalyser.cs | 9 +- .../Japanese/SJISDistributionAnalyser.cs | 22 +-- .../Korean/EUCKRDistributionAnalyser.cs | 24 ++- src/Core/BitPackage.cs | 24 +-- src/Core/CodepageName.cs | 40 ++--- .../Models/MultiByte/Chinese/BIG5SMModel.cs | 81 +++++---- .../Models/MultiByte/Chinese/EUCTWSMModel.cs | 87 +++++----- .../MultiByte/Chinese/GB18030_SMModel.cs | 99 ++++++----- .../MultiByte/Chinese/HZ_GB_2312_SMModel.cs | 91 +++++----- .../MultiByte/Chinese/Iso_2022_CN_SMModel.cs | 91 +++++----- .../Models/MultiByte/Japanese/EUCJPSMModel.cs | 88 +++++----- .../MultiByte/Japanese/Iso_2022_JP_SMModel.cs | 94 +++++----- .../Models/MultiByte/Japanese/SJIS_SMModel.cs | 86 +++++----- .../Models/MultiByte/Korean/CP949SMModel.cs | 74 ++++---- .../Models/MultiByte/Korean/EUCKRSMModel.cs | 80 ++++----- .../MultiByte/Korean/Iso_2022_KR_SMModel.cs | 84 ++++----- src/Core/Models/MultiByte/UCS2BE_SMModel.cs | 90 +++++----- src/Core/Models/MultiByte/UCS2LE_SMModel.cs | 90 +++++----- src/Core/Models/MultiByte/UTF8_SMModel.cs | 128 +++++++------- src/Core/Models/SequenceModel.cs | 26 +-- .../Models/SingleByte/Arabic/ArabicModel.cs | 2 - .../SingleByte/Bulgarian/BulgarianModel.cs | 12 +- .../Bulgarian/Iso_8859_5_BulgarianModel.cs | 2 +- .../Bulgarian/Windows_1251_BulgarianModel.cs | 2 +- .../SingleByte/Croatian/CroatianModel.cs | 2 - .../Models/SingleByte/Czech/CzechModel.cs | 2 - .../Models/SingleByte/Danish/DanishModel.cs | 2 - .../SingleByte/Esperanto/EsperantoModel.cs | 2 - .../SingleByte/Estonian/EstonianModel.cs | 2 - .../Models/SingleByte/Finnish/FinnishModel.cs | 2 - .../Models/SingleByte/French/FrenchModel.cs | 2 - .../Models/SingleByte/German/GermanModel.cs | 2 - .../Models/SingleByte/Greek/GreekModel.cs | 10 +- .../Models/SingleByte/Hebrew/HebrewModel.cs | 8 +- .../Hebrew/Windows_1255_HebrewModel.cs | 2 +- .../SingleByte/Hungarian/HungarianModel.cs | 10 +- .../Models/SingleByte/Irish/IrishModel.cs | 2 - .../Models/SingleByte/Italian/ItalianModel.cs | 2 - .../Models/SingleByte/Latvian/LatvianModel.cs | 2 - .../SingleByte/Lithuanian/LithuanianModel.cs | 2 - .../Models/SingleByte/Maltese/MalteseModel.cs | 2 - .../Models/SingleByte/Polish/PolishModel.cs | 2 - .../SingleByte/Portuguese/PortugueseModel.cs | 2 - .../SingleByte/Romanian/RomanianModel.cs | 2 - .../SingleByte/Russian/Ibm855_RussianModel.cs | 2 +- .../SingleByte/Russian/Ibm866_RussianModel.cs | 2 +- .../Models/SingleByte/Russian/RussianModel.cs | 10 +- .../Models/SingleByte/Slovak/SlovakModel.cs | 2 - .../Models/SingleByte/Slovene/SloveneModel.cs | 2 - .../Models/SingleByte/Spanish/SpanishModel.cs | 2 - .../Models/SingleByte/Swedish/SwedishModel.cs | 2 - src/Core/Models/SingleByte/Thai/ThaiModel.cs | 2 - .../Models/SingleByte/Turkish/TurkishModel.cs | 2 - .../SingleByte/Vietnamese/VietnameseModel.cs | 2 - src/Core/Models/StateMachineModel.cs | 10 +- src/Core/Probers/CharsetProber.cs | 13 +- src/Core/Probers/CodingStateMachine.cs | 42 ++--- src/Core/Probers/EscCharsetProber.cs | 16 +- src/Core/Probers/HebrewProber.cs | 77 +++++---- src/Core/Probers/Latin1Prober.cs | 11 +- src/Core/Probers/MBCSGroupProber.cs | 5 +- .../MultiByte/Chinese/GB18030Prober.cs | 2 +- src/Core/Probers/ProbingState.cs | 6 +- src/Core/Probers/SBCSGroupProber.cs | 11 +- src/Core/Probers/SingleByteCharSetProber.cs | 48 +++--- src/DetectionDetail.cs | 2 +- src/DetectionResult.cs | 1 - src/UTF-unknown.csproj | 4 +- tests/BitPackageTest.cs | 6 +- tests/CharsetDetectorTest.cs | 2 +- tests/CharsetDetectorTestBatch.cs | 12 +- tests/DetectionDetailTests.cs | 2 - tests/EncodingJsonConverter.cs | 3 - 83 files changed, 1086 insertions(+), 1019 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..b8473b0 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,160 @@ +# To learn more about .editorconfig see https://aka.ms/editorconfigdocs +############################### +# Core EditorConfig Options # +############################### +# All files +[*] +indent_style = space +insert_final_newline = false + +# XML project files +[*.{csproj,proj,projitems,shproj}] +indent_size = 2 + +# XML config files +[*.{props,targets,ruleset,config,nuspec,resx,vsixmanifest,vsct}] +indent_size = 2 + +# Code files +[*.{cs,csx,vb,vbx}] +indent_size = 4 +insert_final_newline = true +charset = utf-8-bom +############################### +# .NET Coding Conventions # +############################### +[*.{cs,vb}] +# Organize usings +dotnet_sort_system_directives_first = true +# this. preferences +dotnet_style_qualification_for_field = false:silent +dotnet_style_qualification_for_property = false:silent +dotnet_style_qualification_for_method = false:silent +dotnet_style_qualification_for_event = false:silent +# Language keywords vs BCL types preferences +dotnet_style_predefined_type_for_locals_parameters_members = true:silent +dotnet_style_predefined_type_for_member_access = true:silent +# Parentheses preferences +dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent +dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent +dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent +dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent +# Modifier preferences +dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent +dotnet_style_readonly_field = true:suggestion +# Expression-level preferences +dotnet_style_object_initializer = true:suggestion +dotnet_style_collection_initializer = true:suggestion +dotnet_style_explicit_tuple_names = true:suggestion +dotnet_style_null_propagation = true:suggestion +dotnet_style_coalesce_expression = true:suggestion +dotnet_style_prefer_is_null_check_over_reference_equality_method = true:silent +dotnet_style_prefer_inferred_tuple_names = true:suggestion +dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion +dotnet_style_prefer_auto_properties = true:silent +dotnet_style_prefer_conditional_expression_over_assignment = true:silent +dotnet_style_prefer_conditional_expression_over_return = true:silent +############################### +# Naming Conventions # +############################### +# Style Definitions +dotnet_naming_style.pascal_case_style.capitalization = pascal_case +# Use PascalCase for constant fields +dotnet_naming_rule.constant_fields_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.constant_fields_should_be_pascal_case.symbols = constant_fields +dotnet_naming_rule.constant_fields_should_be_pascal_case.style = pascal_case_style +dotnet_naming_symbols.constant_fields.applicable_kinds = field +dotnet_naming_symbols.constant_fields.applicable_accessibilities = * +dotnet_naming_symbols.constant_fields.required_modifiers = const +dotnet_style_prefer_simplified_boolean_expressions = true:suggestion +dotnet_style_prefer_compound_assignment = true:suggestion +dotnet_style_prefer_simplified_interpolation = true:suggestion +dotnet_style_namespace_match_folder = true:suggestion +dotnet_style_operator_placement_when_wrapping = beginning_of_line +tab_width = 4 +end_of_line = crlf +dotnet_style_allow_multiple_blank_lines_experimental = true:silent +dotnet_style_allow_statement_immediately_after_block_experimental = true:silent +dotnet_code_quality_unused_parameters = all:suggestion +############################### +# C# Coding Conventions # +############################### +[*.cs] +# var preferences +csharp_style_var_for_built_in_types = true:silent +csharp_style_var_when_type_is_apparent = true:silent +csharp_style_var_elsewhere = true:silent +# Expression-bodied members +csharp_style_expression_bodied_methods = false:silent +csharp_style_expression_bodied_constructors = false:silent +csharp_style_expression_bodied_operators = false:silent +csharp_style_expression_bodied_properties = true:silent +csharp_style_expression_bodied_indexers = true:silent +csharp_style_expression_bodied_accessors = true:silent +# Pattern matching preferences +csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion +csharp_style_pattern_matching_over_as_with_null_check = true:suggestion +# Null-checking preferences +csharp_style_throw_expression = true:suggestion +csharp_style_conditional_delegate_call = true:suggestion +# Modifier preferences +csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion +# Expression-level preferences +csharp_prefer_braces = true:silent +csharp_style_deconstructed_variable_declaration = true:suggestion +csharp_prefer_simple_default_expression = true:suggestion +csharp_style_pattern_local_over_anonymous_function = true:suggestion +csharp_style_inlined_variable_declaration = true:suggestion +############################### +# C# Formatting Rules # +############################### +# New line preferences +csharp_new_line_before_open_brace = all +csharp_new_line_before_else = true +csharp_new_line_before_catch = true +csharp_new_line_before_finally = true +csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_members_in_anonymous_types = true +csharp_new_line_between_query_expression_clauses = true +# Indentation preferences +csharp_indent_case_contents = true +csharp_indent_switch_labels = true +csharp_indent_labels = flush_left +# Space preferences +csharp_space_after_cast = false +csharp_space_after_keywords_in_control_flow_statements = true +csharp_space_between_method_call_parameter_list_parentheses = false +csharp_space_between_method_declaration_parameter_list_parentheses = false +csharp_space_between_parentheses = false +csharp_space_before_colon_in_inheritance_clause = true +csharp_space_after_colon_in_inheritance_clause = true +csharp_space_around_binary_operators = before_and_after +csharp_space_between_method_declaration_empty_parameter_list_parentheses = false +csharp_space_between_method_call_name_and_opening_parenthesis = false +csharp_space_between_method_call_empty_parameter_list_parentheses = false +# Wrapping preferences +csharp_preserve_single_line_statements = true +csharp_preserve_single_line_blocks = true +csharp_using_directive_placement = outside_namespace:silent +csharp_prefer_simple_using_statement = true:suggestion +csharp_style_namespace_declarations = block_scoped:silent +csharp_style_prefer_method_group_conversion = true:silent +csharp_style_expression_bodied_lambdas = true:silent +csharp_style_expression_bodied_local_functions = false:silent +csharp_style_prefer_extended_property_pattern = true:suggestion +csharp_style_prefer_null_check_over_type_check = true:suggestion +csharp_style_prefer_local_over_anonymous_function = true:suggestion +csharp_style_prefer_range_operator = true:suggestion +csharp_style_prefer_index_operator = true:suggestion +csharp_style_prefer_tuple_swap = true:suggestion +csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion +csharp_style_unused_value_expression_statement_preference = discard_variable:silent +csharp_style_unused_value_assignment_preference = discard_variable:suggestion +csharp_prefer_static_local_function = true:suggestion +csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent +csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent +csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent +csharp_style_prefer_parameter_null_checking = true:suggestion +csharp_style_prefer_switch_expression = true:suggestion +csharp_style_prefer_pattern_matching = true:silent +csharp_style_prefer_not_pattern = true:suggestion \ No newline at end of file diff --git a/UTF-unknown.sln b/UTF-unknown.sln index 03eb114..f9be16e 100644 --- a/UTF-unknown.sln +++ b/UTF-unknown.sln @@ -1,13 +1,18 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.26228.9 +# Visual Studio Version 17 +VisualStudioVersion = 17.2.32616.157 MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UTF-unknown", "src\UTF-unknown.csproj", "{64CA7BA7-EFD9-4475-BB66-40B187622A73}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UTF-unknown", "src\UTF-unknown.csproj", "{64CA7BA7-EFD9-4475-BB66-40B187622A73}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleExample", "example\ConsoleExample.csproj", "{386C6ABF-44EA-4418-B90E-E8D21E4C2475}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ConsoleExample", "example\ConsoleExample.csproj", "{386C6ABF-44EA-4418-B90E-E8D21E4C2475}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UTF-unknown.Tests", "Tests\UTF-unknown.Tests.csproj", "{1922DCC9-A45F-4627-9087-CD492BBF7F38}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UTF-unknown.Tests", "Tests\UTF-unknown.Tests.csproj", "{1922DCC9-A45F-4627-9087-CD492BBF7F38}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{052846B2-CA56-482F-B477-6E33523C091E}" + ProjectSection(SolutionItems) = preProject + .editorconfig = .editorconfig + EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -31,4 +36,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {0C7AF656-EF20-4880-8EB9-9BF101340A03} + EndGlobalSection EndGlobal diff --git a/src/CharsetDetector.cs b/src/CharsetDetector.cs index a9f9e6e..47ddc5e 100644 --- a/src/CharsetDetector.cs +++ b/src/CharsetDetector.cs @@ -48,9 +48,9 @@ namespace UtfUnknown { /// - /// Default implementation of charset detection interface. + /// Default implementation of charset detection interface. /// The detector can be fed by a System.IO.Stream: - /// + /// public class CharsetDetector { internal InputState InputState; @@ -135,7 +135,7 @@ public static DetectionResult DetectFromBytes(byte[] bytes) } /// - /// Detect the character encoding form this byte array. + /// Detect the character encoding form this byte array. /// It searchs for BOM from bytes[offset]. /// /// The byte array containing the text @@ -170,7 +170,7 @@ public static DetectionResult DetectFromBytes(byte[] bytes, int offset, int len) /// /// Detect the character encoding by reading the stream. - /// + /// /// Note: stream position is not reset before and after. /// /// The steam. @@ -186,7 +186,7 @@ public static DetectionResult DetectFromStream(Stream stream) /// /// Detect the character encoding by reading the stream. - /// + /// /// Note: stream position is not reset before and after. /// /// The steam. @@ -403,10 +403,10 @@ private static string FindCharSetByBom(byte[] buf, int offset, int len) if (buf0 == 0xEF && buf1 == 0xBB && buf[offset + 2] == 0xBF) return CodepageName.UTF8; - + if (len < 4) return null; - + //Here, because anyway further more than 3 positions are checked. if (buf0 == 0x00 && buf1 == 0x00) { @@ -422,24 +422,24 @@ private static string FindCharSetByBom(byte[] buf, int offset, int len) if (buf0 == 0x2B && buf1 == 0x2F && buf[offset + 2] == 0x76) if (buf[offset + 3] == 0x38 || buf[offset + 3] == 0x39 || buf[offset + 3] == 0x2B || buf[offset + 3] == 0x2F) return CodepageName.UTF7; - + // Detect GB18030 with bom (see table in https://en.wikipedia.org/wiki/Byte_order_mark) // TODO: If you remove this check, GB18030Prober will still be defined as GB18030 -- It's feature or bug? if (buf0 == 0x84 && buf1 == 0x31 && buf[offset + 2] == 0x95 && buf[offset + 3] == 0x33) return CodepageName.GB18030; - + return null; } /// - /// Notify detector that no further data is available. + /// Notify detector that no further data is available. /// private DetectionResult DataEnd() { if (!_gotData) { - // we haven't got any data yet, return immediately - // caller program sometimes call DataEnd before anything has + // we haven't got any data yet, return immediately + // caller program sometimes call DataEnd before anything has // been sent to detector return new DetectionResult(); } @@ -499,5 +499,4 @@ internal IList GetNewProbers() } } } -} - +} \ No newline at end of file diff --git a/src/Core/Analyzers/CharDistributionAnalyser.cs b/src/Core/Analyzers/CharDistributionAnalyser.cs index a1b632a..94698b5 100644 --- a/src/Core/Analyzers/CharDistributionAnalyser.cs +++ b/src/Core/Analyzers/CharDistributionAnalyser.cs @@ -1,4 +1,4 @@ -/* ***** BEGIN LICENSE BLOCK ***** +/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version @@ -38,7 +38,7 @@ namespace UtfUnknown.Core.Analyzers { /// - /// Base class for the Character Distribution Method, used for + /// Base class for the Character Distribution Method, used for /// the CJK encodings /// public abstract class CharDistributionAnalyser @@ -60,7 +60,7 @@ public abstract class CharDistributionAnalyser // Mapping table to get frequency order from char order (get from GetOrder()) protected int[] charToFreqOrder; - // This constant value varies from language to language. It is used in calculating confidence. + // This constant value varies from language to language. It is used in calculating confidence. protected float typicalDistributionRatio; public CharDistributionAnalyser() @@ -72,7 +72,7 @@ public CharDistributionAnalyser() /// Feed a block of data and do distribution analysis /// /// - /// we do not handle character base on its original encoding string, but + /// we do not handle character base on its original encoding string, but /// convert this encoding string to a number, here called order. /// This allow multiple encoding of a language to share one frequency table /// @@ -82,7 +82,7 @@ public CharDistributionAnalyser() public abstract int GetOrder(byte[] buf, int offset); /// - /// Feed a character with known length + /// Feed a character with known length /// /// A /// buf offset diff --git a/src/Core/Analyzers/MultiByte/Chinese/BIG5DistributionAnalyser.cs b/src/Core/Analyzers/MultiByte/Chinese/BIG5DistributionAnalyser.cs index e8933dc..4507938 100644 --- a/src/Core/Analyzers/MultiByte/Chinese/BIG5DistributionAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Chinese/BIG5DistributionAnalyser.cs @@ -3,7 +3,7 @@ namespace UtfUnknown.Core.Analyzers.Chinese public class BIG5DistributionAnalyser : CharDistributionAnalyser { // Big5 frequency table - // by Taiwan's Mandarin Promotion Council + // by Taiwan's Mandarin Promotion Council // /****************************************************************************** * 128 --> 0.42261 @@ -902,28 +902,28 @@ public class BIG5DistributionAnalyser : CharDistributionAnalyser 13968,13969,13970,13971,13972, //13973 ****************************************************************************************/ }; - + public BIG5DistributionAnalyser() { charToFreqOrder = BIG5_CHAR2FREQ_ORDER; - typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; + typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO; } - + /// /// first byte range: 0xa4 -- 0xfe /// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// - public override int GetOrder(byte[] buf, int offset) - { + public override int GetOrder(byte[] buf, int offset) + { if (buf[offset] >= 0xA4) { if (buf[offset+1] >= 0xA1) return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0xA1 + 63; else return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0x40; } else { - return -1; - } + return -1; + } } } } \ No newline at end of file diff --git a/src/Core/Analyzers/MultiByte/Chinese/EUCTWDistributionAnalyser.cs b/src/Core/Analyzers/MultiByte/Chinese/EUCTWDistributionAnalyser.cs index 4fc4db0..262d5cc 100644 --- a/src/Core/Analyzers/MultiByte/Chinese/EUCTWDistributionAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Chinese/EUCTWDistributionAnalyser.cs @@ -3,8 +3,8 @@ namespace UtfUnknown.Core.Analyzers.Chinese public class EUCTWDistributionAnalyser : CharDistributionAnalyser { // EUCTW frequency table - // Converted from big5 work - // by Taiwan's Mandarin Promotion Council + // Converted from big5 work + // by Taiwan's Mandarin Promotion Council // /****************************************************************************** * 128 --> 0.42261 @@ -417,9 +417,9 @@ public EUCTWDistributionAnalyser() /// second byte range: 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xC4) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xC4) return 94 * (buf[offset] - 0xC4) + buf[offset+1] - 0xA1; else return -1; diff --git a/src/Core/Analyzers/MultiByte/Chinese/GB18030DistributionAnalyser.cs b/src/Core/Analyzers/MultiByte/Chinese/GB18030DistributionAnalyser.cs index b2f8564..1ccb134 100644 --- a/src/Core/Analyzers/MultiByte/Chinese/GB18030DistributionAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Chinese/GB18030DistributionAnalyser.cs @@ -455,7 +455,7 @@ public GB18030DistributionAnalyser() : base() charToFreqOrder = GB2312_CHAR2FREQ_ORDER; typicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO; } - + /// /// for GB2312 encoding, we are interested /// first byte range: 0xb0 -- 0xfe @@ -463,9 +463,9 @@ public GB18030DistributionAnalyser() : base() /// no validation needed here. State machine has done that /// /// - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1) return 94 * (buf[offset] - 0xb0) + buf[offset+1] - 0xA1; else return -1; diff --git a/src/Core/Analyzers/MultiByte/Japanese/EUCJPContextAnalyser.cs b/src/Core/Analyzers/MultiByte/Japanese/EUCJPContextAnalyser.cs index d95f06a..bf55de7 100644 --- a/src/Core/Analyzers/MultiByte/Japanese/EUCJPContextAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Japanese/EUCJPContextAnalyser.cs @@ -3,11 +3,11 @@ namespace UtfUnknown.Core.Analyzers.Japanese public class EUCJPContextAnalyser : JapaneseContextAnalyser { private const byte HIRAGANA_FIRST_BYTE = 0xA4; - + protected override int GetOrder(byte[] buf, int offset, out int charLen) { byte high = buf[offset]; - + //find out current char's byte length if (high == 0x8E || high >= 0xA1 && high <= 0xFE) charLen = 2; @@ -22,9 +22,9 @@ protected override int GetOrder(byte[] buf, int offset, out int charLen) if (low >= 0xA1 && low <= 0xF3) return low - 0xA1; } - return -1; + return -1; } - + protected override int GetOrder(byte[] buf, int offset) { // We are only interested in Hiragana @@ -34,6 +34,6 @@ protected override int GetOrder(byte[] buf, int offset) return low - 0xA1; } return -1; - } + } } } \ No newline at end of file diff --git a/src/Core/Analyzers/MultiByte/Japanese/EUCJPDistributionAnalyser.cs b/src/Core/Analyzers/MultiByte/Japanese/EUCJPDistributionAnalyser.cs index 8ead24f..bc9e1e0 100644 --- a/src/Core/Analyzers/MultiByte/Japanese/EUCJPDistributionAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Japanese/EUCJPDistributionAnalyser.cs @@ -7,9 +7,9 @@ public class EUCJPDistributionAnalyser : SJISDistributionAnalyser /// second byte range: 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xA0) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xA0) return 94 * (buf[offset] - 0xA1) + buf[offset+1] - 0xA1; else return -1; diff --git a/src/Core/Analyzers/MultiByte/Japanese/JapaneseContextAnalyser.cs b/src/Core/Analyzers/MultiByte/Japanese/JapaneseContextAnalyser.cs index 9992020..c6c8d2b 100644 --- a/src/Core/Analyzers/MultiByte/Japanese/JapaneseContextAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Japanese/JapaneseContextAnalyser.cs @@ -1,4 +1,4 @@ -/* ***** BEGIN LICENSE BLOCK ***** +/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version @@ -21,7 +21,7 @@ * Contributor(s): * Shy Shalom * Rudi Pettazzi (C# port) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -48,7 +48,7 @@ public abstract class JapaneseContextAnalyser // hiragana frequency category table // This is hiragana 2-char sequence table, the number in each cell represents its frequency category - protected static byte[,] jp2CharContext = { + protected static byte[,] jp2CharContext = { { 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,}, { 2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4,}, { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,}, @@ -133,50 +133,50 @@ public abstract class JapaneseContextAnalyser { 0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3,}, { 0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1,}, }; - + // category counters, each integer counts sequence in its category int[] relSample = new int[CATEGORIES_NUM]; // total sequence received int totalRel; - + // The order of previous char int lastCharOrder; - // if last byte in current buffer is not the last byte of a character, + // if last byte in current buffer is not the last byte of a character, // we need to know how many byte to skip in next buffer. int needToSkipCharNum; - // If this flag is set to true, detection is done and conclusion has + // If this flag is set to true, detection is done and conclusion has // been made bool done; - + public JapaneseContextAnalyser() { - Reset(); + Reset(); } - + public float GetConfidence() { // This is just one way to calculate confidence. It works well for me. if (totalRel > MINIMUM_DATA_THRESHOLD) return ((float)(totalRel - relSample[0]))/totalRel; - else + else return DONT_KNOW; } public void HandleData(byte[] buf, int offset, int len) { int max = offset + len; - + if (done) return; - // The buffer we got is byte oriented, and a character may span + // The buffer we got is byte oriented, and a character may span // more than one buffer. In case the last one or two byte in last - // buffer is not complete, we record how many byte needed to + // buffer is not complete, we record how many byte needed to // complete that character and skip these bytes here. We can choose - // to record those bytes as well and analyse the character once it + // to record those bytes as well and analyse the character once it // is complete, but since a character will not make much difference, // skipping it will simplify our logic and improve performance. for (int i = needToSkipCharNum+offset; i < max; ) { @@ -198,14 +198,14 @@ public void HandleData(byte[] buf, int offset, int len) } } } - + public void HandleOneChar(byte[] buf, int offset, int charLen) { - if (totalRel > MAX_REL_THRESHOLD) + if (totalRel > MAX_REL_THRESHOLD) done = true; - if (done) + if (done) return; - + // Only 2-bytes characters are of our interest int order = (charLen == 2) ? GetOrder(buf, offset) : -1; if (order != -1 && lastCharOrder != -1) { @@ -215,7 +215,7 @@ public void HandleOneChar(byte[] buf, int offset, int charLen) } lastCharOrder = order; } - + public void Reset() { totalRel = 0; @@ -226,16 +226,15 @@ public void Reset() done = false; } } - + protected abstract int GetOrder(byte[] buf, int offset, out int charLen); - + protected abstract int GetOrder(byte[] buf, int offset); - - public bool GotEnoughData() + + public bool GotEnoughData() { return totalRel > ENOUGH_REL_THRESHOLD; } - + } } - diff --git a/src/Core/Analyzers/MultiByte/Japanese/SJISContextAnalyser.cs b/src/Core/Analyzers/MultiByte/Japanese/SJISContextAnalyser.cs index d299c80..2c973b3 100644 --- a/src/Core/Analyzers/MultiByte/Japanese/SJISContextAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Japanese/SJISContextAnalyser.cs @@ -1,4 +1,4 @@ -namespace UtfUnknown.Core.Analyzers.Japanese +namespace UtfUnknown.Core.Analyzers.Japanese { public class SJISContextAnalyser : JapaneseContextAnalyser { @@ -7,10 +7,10 @@ public class SJISContextAnalyser : JapaneseContextAnalyser protected override int GetOrder(byte[] buf, int offset, out int charLen) { //find out current char's byte length - if (buf[offset] >= 0x81 && buf[offset] <= 0x9F + if (buf[offset] >= 0x81 && buf[offset] <= 0x9F || buf[offset] >= 0xe0 && buf[offset] <= 0xFC) charLen = 2; - else + else charLen = 1; // return its order if it is hiragana @@ -19,7 +19,7 @@ protected override int GetOrder(byte[] buf, int offset, out int charLen) if (low >= 0x9F && low <= 0xF1) return low - 0x9F; } - return -1; + return -1; } protected override int GetOrder(byte[] buf, int offset) @@ -32,6 +32,5 @@ protected override int GetOrder(byte[] buf, int offset) } return -1; } - } } \ No newline at end of file diff --git a/src/Core/Analyzers/MultiByte/Japanese/SJISDistributionAnalyser.cs b/src/Core/Analyzers/MultiByte/Japanese/SJISDistributionAnalyser.cs index 9963b43..e4eb617 100644 --- a/src/Core/Analyzers/MultiByte/Japanese/SJISDistributionAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Japanese/SJISDistributionAnalyser.cs @@ -4,7 +4,7 @@ public class SJISDistributionAnalyser : CharDistributionAnalyser { // Sampling from about 20M text materials include literature and computer technology // Japanese frequency table, applied to both S-JIS and EUC-JP - // They are sorted in order. + // They are sorted in order. /****************************************************************************** * 128 --> 0.77094 @@ -545,31 +545,31 @@ public class SJISDistributionAnalyser : CharDistributionAnalyser 8240,8241,8242,8243,8244,8245,8246,8247,8248,8249,8250,8251,8252,8253,8254,8255, // 8256 8256,8257,8258,8259,8260,8261,8262,8263,8264,8265,8266,8267,8268,8269,8270,8271, // 8272 ****************************************************************************************/ - }; - + }; + public SJISDistributionAnalyser() { charToFreqOrder = SJIS_CHAR2FREQ_ORDER; - typicalDistributionRatio = SJIS_TYPICAL_DISTRIBUTION_RATIO; + typicalDistributionRatio = SJIS_TYPICAL_DISTRIBUTION_RATIO; } - + /// /// first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe /// second byte range: 0x40 -- 0x7e, 0x81 -- oxfe /// no validation needed here. State machine has done that /// - public override int GetOrder(byte[] buf, int offset) - { + public override int GetOrder(byte[] buf, int offset) + { int order; - - if (buf[offset] >= 0x81 && buf[offset] <= 0x9F) + + if (buf[offset] >= 0x81 && buf[offset] <= 0x9F) order = 188 * (buf[offset] - 0x81); - else if (buf[offset] >= 0xE0 && buf[offset] <= 0xEF) + else if (buf[offset] >= 0xE0 && buf[offset] <= 0xEF) order = 188 * (buf[offset] - 0xE0 + 31); else return -1; order += buf[offset+1] - 0x40; - + if (buf[offset+1] > 0x7F) order--; return order; diff --git a/src/Core/Analyzers/MultiByte/Korean/EUCKRDistributionAnalyser.cs b/src/Core/Analyzers/MultiByte/Korean/EUCKRDistributionAnalyser.cs index 8ca17e5..b4374a5 100644 --- a/src/Core/Analyzers/MultiByte/Korean/EUCKRDistributionAnalyser.cs +++ b/src/Core/Analyzers/MultiByte/Korean/EUCKRDistributionAnalyser.cs @@ -1,11 +1,9 @@ -using UtfUnknown.Core.Analyzers; - -namespace UtfUnknown.Core.Analyzers.Korean +namespace UtfUnknown.Core.Analyzers.Korean { public class EUCKRDistributionAnalyser : CharDistributionAnalyser { // Sampling from about 20M text materials include literature and computer technology - + /* * 128 --> 0.79 * 256 --> 0.92 @@ -169,8 +167,8 @@ public class EUCKRDistributionAnalyser : CharDistributionAnalyser 2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, //512, 256 - /*************************************************************************************** - * Everything below is of no interest for detection purpose + /*************************************************************************************** + * Everything below is of no interest for detection purpose *************************************************************************************** 2643,2644,2645,2646,2647,2648,2649,2650,2651,2652,2653,2654,2655,2656,2657,2658, @@ -573,24 +571,24 @@ public class EUCKRDistributionAnalyser : CharDistributionAnalyser 8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719, 8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735, 8736,8737,8738,8739,8740,8741 */ }; - + public EUCKRDistributionAnalyser() { charToFreqOrder = EUCKR_CHAR2FREQ_ORDER; - typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; + typicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO; } - + /// /// first byte range: 0xb0 -- 0xfe /// second byte range: 0xa1 -- 0xfe /// no validation needed here. State machine has done that /// - public override int GetOrder(byte[] buf, int offset) - { - if (buf[offset] >= 0xB0) + public override int GetOrder(byte[] buf, int offset) + { + if (buf[offset] >= 0xB0) return 94 * (buf[offset] - 0xB0) + buf[offset+1] - 0xA1; else return -1; } } -} \ No newline at end of file +} diff --git a/src/Core/BitPackage.cs b/src/Core/BitPackage.cs index 3774579..c58724f 100644 --- a/src/Core/BitPackage.cs +++ b/src/Core/BitPackage.cs @@ -43,17 +43,17 @@ public class BitPackage public static int INDEX_SHIFT_4BITS = 3; public static int INDEX_SHIFT_8BITS = 2; public static int INDEX_SHIFT_16BITS = 1; - public static int SHIFT_MASK_4BITS = 7; public static int SHIFT_MASK_8BITS = 3; + public static int SHIFT_MASK_16BITS = 1; - public static int BIT_SHIFT_4BITS = 2; public static int BIT_SHIFT_8BITS = 3; + public static int BIT_SHIFT_16BITS = 4; - public static int UNIT_MASK_4BITS = 0x0000000F; public static int UNIT_MASK_8BITS = 0x000000FF; + public static int UNIT_MASK_16BITS = 0x0000FFFF; private int indexShift; @@ -61,7 +61,7 @@ public class BitPackage private int bitShift; private int unitMask; private int[] data; - + public BitPackage(int indexShift, int shiftMask, int bitShift, int unitMask, int[] data) { @@ -71,28 +71,28 @@ public BitPackage(int indexShift, int shiftMask, this.unitMask = unitMask; this.data = data; } - + public static int Pack16bits(int a, int b) { return ((b << 16) | a); } - + public static int Pack8bits(int a, int b, int c, int d) { return Pack16bits((b << 8) | a, (d << 8) | c); } - - public static int Pack4bits(int a, int b, int c, int d, + + public static int Pack4bits(int a, int b, int c, int d, int e, int f, int g, int h) { return Pack8bits((b << 4) | a, (d << 4) | c, (f << 4) | e, (h << 4) | g); } - + public int Unpack(int i) { - return (data[i >> indexShift] >> + return (data[i >> indexShift] >> ((i & shiftMask) << bitShift)) & unitMask; } - } -} + } +} \ No newline at end of file diff --git a/src/Core/CodepageName.cs b/src/Core/CodepageName.cs index 4d4aadd..391f473 100644 --- a/src/Core/CodepageName.cs +++ b/src/Core/CodepageName.cs @@ -13,7 +13,7 @@ * * Contributor(s): * UTF-Unknown Contributors (2019) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -45,7 +45,7 @@ internal static class CodepageName /// UTF-7 codepage name. /// internal const string UTF7 = "utf-7"; - + /// /// UTF-8 codepage name. /// @@ -55,22 +55,22 @@ internal static class CodepageName /// UTF-16LE codepage name. /// internal const string UTF16_LE = "utf-16le"; - + /// /// UTF-16BE codepage name. /// internal const string UTF16_BE = "utf-16be"; - + /// /// UTF-32LE codepage name. /// internal const string UTF32_LE = "utf-32le"; - + /// /// UTF-32BE codepage name. /// internal const string UTF32_BE = "utf-32be"; - + /// /// EUC Japanese codepage name. /// @@ -94,7 +94,7 @@ internal static class CodepageName /// Not supported. /// internal const string EUC_TW = "euc-tw"; - + /// /// ISO 2022 Chinese codepage name. /// @@ -102,7 +102,7 @@ internal static class CodepageName /// Supported by alias is x-cp50227 (Codepage 50227) in. NET. Codepage identifier 50229 is currently unsupported (see for example https://github.com/microsoft/referencesource/blob/17b97365645da62cf8a49444d979f94a59bbb155/mscorlib/system/text/iso2022encoding.cs#L92). /// internal const string ISO_2022_CN = "iso-2022-cn"; - + /// /// ISO 2022 Korean codepage name. /// @@ -123,7 +123,7 @@ internal static class CodepageName /// Other alias is cp50227. /// internal const string X_CP50227 = "x-cp50227"; - + /// /// Big5 codepage name. /// @@ -149,7 +149,7 @@ internal static class CodepageName /// Are other aliases shift_jis, sjis, csshiftjis, cswindows31j, ms_kanji, x-sjis in .NET /// internal const string SHIFT_JIS = "shift-jis"; - + /// /// ANSI/OEM Korean codepage name. /// @@ -165,7 +165,7 @@ internal static class CodepageName /// Not supported in .NET. A nearly identical version of cp949 is ks_c_5601-1987 (see https://lists.w3.org/Archives/Public/ietf-charsets/2002JulSep/0020.html) /// internal const string CP949 = "cp949"; - + /// /// OEM Latin-2 codepage name. /// @@ -181,7 +181,7 @@ internal static class CodepageName /// Is other alias cp855 in .NET /// internal const string IBM855 = "ibm855"; - + /// /// OEM Cyrillic (primarily Russian) codepage name. /// @@ -242,7 +242,7 @@ internal static class CodepageName /// Are other aliases iso_8859-7, iso_8859-7:1987, iso-ir-126, greek, greek8, csisolatingreek, ecma-118, elot_928 in .NET /// internal const string ISO_8859_7 = "iso-8859-7"; - + /// /// ISO 8859-8 Hebrew codepage name. /// @@ -266,7 +266,7 @@ internal static class CodepageName /// Not supported. /// internal const string ISO_8859_10 = "iso-8859-10"; - + /// /// ANSI/OEM Thai codepage name. /// @@ -287,7 +287,7 @@ internal static class CodepageName /// Are other aliases iso_8859-15, l9, latin9, csisolatin9 in .NET /// internal const string ISO_8859_15 = "iso-8859-15"; - + /// /// ISO 8859-16 codepage name. /// @@ -319,12 +319,12 @@ internal static class CodepageName /// Is other alias x-ansi in .NET /// internal const string WINDOWS_1252 = "windows-1252"; - + /// /// ANSI Greek codepage name. /// internal const string WINDOWS_1253 = "windows-1253"; - + /// /// ANSI Hebrew codepage name. /// @@ -342,7 +342,7 @@ internal static class CodepageName /// ANSI Baltic codepage name. /// internal const string WINDOWS_1257 = "windows-1257"; - + /// /// ANSI/OEM Vietnamese codepage name. /// @@ -352,7 +352,7 @@ internal static class CodepageName /// MAC Latin-2 codepage name. /// internal const string X_MAC_CE = "x-mac-ce"; - + /// /// Cyrillic (Mac) codepage name. /// @@ -401,4 +401,4 @@ internal static class CodepageName /// internal const string X_ISO_10646_UCS_4_2143 = "X-ISO-10646-UCS-4-2143"; } -} +} \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Chinese/BIG5SMModel.cs b/src/Core/Models/MultiByte/Chinese/BIG5SMModel.cs index 995d464..5995f6e 100644 --- a/src/Core/Models/MultiByte/Chinese/BIG5SMModel.cs +++ b/src/Core/Models/MultiByte/Chinese/BIG5SMModel.cs @@ -1,65 +1,62 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Chinese { public class BIG5SMModel : StateMachineModel { private readonly static int[] BIG5_cls = { BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 - BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 - BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 80 - 87 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f - BitPackage.Pack4bits(4,3,3,3,3,3,3,3), // a0 - a7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // a8 - af - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b0 - b7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b8 - bf - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c0 - c7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff + BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 + BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 + BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 80 - 87 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f + BitPackage.Pack4bits(4,3,3,3,3,3,3,3), // a0 - a7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // a8 - af + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b0 - b7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b8 - bf + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c0 - c7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff }; private readonly static int[] BIG5_st = { - BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ERROR),//08-0f - BitPackage.Pack4bits(ERROR,START,START,START,START,START,START,START) //10-17 + BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ERROR),//08-0f + BitPackage.Pack4bits(ERROR,START,START,START,START,START,START,START) //10-17 }; private readonly static int[] BIG5CharLenTable = {0, 1, 1, 2, 0}; - + public BIG5SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, BIG5_cls), 5, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, BIG5_st), BIG5CharLenTable, CodepageName.BIG5) { - } } } \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Chinese/EUCTWSMModel.cs b/src/Core/Models/MultiByte/Chinese/EUCTWSMModel.cs index 7ee8836..b302137 100644 --- a/src/Core/Models/MultiByte/Chinese/EUCTWSMModel.cs +++ b/src/Core/Models/MultiByte/Chinese/EUCTWSMModel.cs @@ -3,64 +3,63 @@ namespace UtfUnknown.Core.Models.MultiByte.Chinese public class EUCTWSMModel : StateMachineModel { private readonly static int[] EUCTW_cls = { - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 00 - 07 - BitPackage.Pack4bits(2,2,2,2,2,2,0,0), // 08 - 0f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 10 - 17 - BitPackage.Pack4bits(2,2,2,0,2,2,2,2), // 18 - 1f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 20 - 27 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 28 - 2f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 30 - 37 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 38 - 3f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 78 - 7f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 - BitPackage.Pack4bits(0,0,0,0,0,0,6,0), // 88 - 8f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f - BitPackage.Pack4bits(0,3,4,4,4,4,4,4), // a0 - a7 - BitPackage.Pack4bits(5,5,1,1,1,1,1,1), // a8 - af - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf - BitPackage.Pack4bits(1,1,3,1,3,3,3,3), // c0 - c7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7 - BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 00 - 07 + BitPackage.Pack4bits(2,2,2,2,2,2,0,0), // 08 - 0f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 10 - 17 + BitPackage.Pack4bits(2,2,2,0,2,2,2,2), // 18 - 1f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 20 - 27 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 28 - 2f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 30 - 37 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 38 - 3f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 78 - 7f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 + BitPackage.Pack4bits(0,0,0,0,0,0,6,0), // 88 - 8f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f + BitPackage.Pack4bits(0,3,4,4,4,4,4,4), // a0 - a7 + BitPackage.Pack4bits(5,5,1,1,1,1,1,1), // a8 - af + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf + BitPackage.Pack4bits(1,1,3,1,3,3,3,3), // c0 - c7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7 + BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff }; private readonly static int[] EUCTW_st = { - BitPackage.Pack4bits(ERROR,ERROR,START, 3, 3, 3, 4,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f - BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,START,ERROR),//10-17 - BitPackage.Pack4bits(START,START,START,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f - BitPackage.Pack4bits( 5,ERROR,ERROR,ERROR,START,ERROR,START,START),//20-27 - BitPackage.Pack4bits(START,ERROR,START,START,START,START,START,START) //28-2f + BitPackage.Pack4bits(ERROR,ERROR,START, 3, 3, 3, 4,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f + BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,START,ERROR),//10-17 + BitPackage.Pack4bits(START,START,START,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f + BitPackage.Pack4bits( 5,ERROR,ERROR,ERROR,START,ERROR,START,START),//20-27 + BitPackage.Pack4bits(START,ERROR,START,START,START,START,START,START) //28-2f }; private readonly static int[] EUCTWCharLenTable = { 0, 0, 1, 2, 2, 2, 3 }; - + public EUCTWSMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, EUCTW_cls), 7, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, EUCTW_st), EUCTWCharLenTable, CodepageName.EUC_TW) { - } } } \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Chinese/GB18030_SMModel.cs b/src/Core/Models/MultiByte/Chinese/GB18030_SMModel.cs index 20bf9c8..99ea1f9 100644 --- a/src/Core/Models/MultiByte/Chinese/GB18030_SMModel.cs +++ b/src/Core/Models/MultiByte/Chinese/GB18030_SMModel.cs @@ -1,73 +1,70 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Chinese { public class GB18030_SMModel : StateMachineModel { private readonly static int[] GB18030_cls = { - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 - BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 30 - 37 - BitPackage.Pack4bits(3,3,1,1,1,1,1,1), // 38 - 3f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 - BitPackage.Pack4bits(2,2,2,2,2,2,2,4), // 78 - 7f - BitPackage.Pack4bits(5,6,6,6,6,6,6,6), // 80 - 87 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a0 - a7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a8 - af - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b0 - b7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b8 - bf - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c0 - c7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e0 - e7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e8 - ef - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // f0 - f7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,0) // f8 - ff + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 + BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 + BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 30 - 37 + BitPackage.Pack4bits(3,3,1,1,1,1,1,1), // 38 - 3f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 + BitPackage.Pack4bits(2,2,2,2,2,2,2,4), // 78 - 7f + BitPackage.Pack4bits(5,6,6,6,6,6,6,6), // 80 - 87 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a0 - a7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a8 - af + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b0 - b7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b8 - bf + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c0 - c7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e0 - e7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e8 - ef + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // f0 - f7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,0) // f8 - ff }; private readonly static int[] GB18030_st = { - BitPackage.Pack4bits(ERROR,START,START,START,START,START, 3,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f - BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START),//10-17 - BitPackage.Pack4bits( 4,ERROR,START,START,ERROR,ERROR,ERROR,ERROR),//18-1f - BitPackage.Pack4bits(ERROR,ERROR, 5,ERROR,ERROR,ERROR,ITSME,ERROR),//20-27 - BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,START,START) //28-2f + BitPackage.Pack4bits(ERROR,START,START,START,START,START, 3,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f + BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START),//10-17 + BitPackage.Pack4bits( 4,ERROR,START,START,ERROR,ERROR,ERROR,ERROR),//18-1f + BitPackage.Pack4bits(ERROR,ERROR, 5,ERROR,ERROR,ERROR,ITSME,ERROR),//20-27 + BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,START,START) //28-2f }; - // To be accurate, the length of class 6 can be either 2 or 4. - // But it is not necessary to discriminate between the two since - // it is used for frequency analysis only, and we are validating - // each code range there as well. So it is safe to set it to be - // 2 here. + // To be accurate, the length of class 6 can be either 2 or 4. + // But it is not necessary to discriminate between the two since + // it is used for frequency analysis only, and we are validating + // each code range there as well. So it is safe to set it to be + // 2 here. private readonly static int[] GB18030CharLenTable = {0, 1, 1, 1, 1, 1, 2}; - + public GB18030_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, GB18030_cls), 7, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, GB18030_st), GB18030CharLenTable, CodepageName.GB18030) { - } } } \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Chinese/HZ_GB_2312_SMModel.cs b/src/Core/Models/MultiByte/Chinese/HZ_GB_2312_SMModel.cs index ffb3540..d62c6c6 100644 --- a/src/Core/Models/MultiByte/Chinese/HZ_GB_2312_SMModel.cs +++ b/src/Core/Models/MultiByte/Chinese/HZ_GB_2312_SMModel.cs @@ -37,71 +37,68 @@ * ***** END LICENSE BLOCK ***** */ //Escaped charsets state machines -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Chinese { public class HZ_GB_2312_SMModel : StateMachineModel { private readonly static int[] HZ_cls = { - BitPackage.Pack4bits(1,0,0,0,0,0,0,0), // 00 - 07 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 - BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 28 - 2f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 - BitPackage.Pack4bits(0,0,0,4,0,5,2,0), // 78 - 7f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 80 - 87 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 88 - 8f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 90 - 97 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 98 - 9f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a0 - a7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a8 - af - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c0 - c7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c8 - cf - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d0 - d7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d8 - df - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e0 - e7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e8 - ef - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // f0 - f7 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1) // f8 - ff + BitPackage.Pack4bits(1,0,0,0,0,0,0,0), // 00 - 07 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 + BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 28 - 2f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 + BitPackage.Pack4bits(0,0,0,4,0,5,2,0), // 78 - 7f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 80 - 87 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 88 - 8f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 90 - 97 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 98 - 9f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a0 - a7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a8 - af + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c0 - c7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c8 - cf + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d0 - d7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d8 - df + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e0 - e7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e8 - ef + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // f0 - f7 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1) // f8 - ff }; private readonly static int[] HZ_st = { - BitPackage.Pack4bits(START, ERROR, 3, START, START, START, ERROR, ERROR),//00-07 - BitPackage.Pack4bits(ERROR, ERROR, ERROR, ERROR, ITSME, ITSME, ITSME, ITSME),//08-0f - BitPackage.Pack4bits(ITSME, ITSME, ERROR, ERROR, START, START, 4, ERROR),//10-17 - BitPackage.Pack4bits( 5, ERROR, 6, ERROR, 5, 5, 4, ERROR),//18-1f - BitPackage.Pack4bits( 4, ERROR, 4, 4, 4, ERROR, 4, ERROR),//20-27 - BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f + BitPackage.Pack4bits(START, ERROR, 3, START, START, START, ERROR, ERROR),//00-07 + BitPackage.Pack4bits(ERROR, ERROR, ERROR, ERROR, ITSME, ITSME, ITSME, ITSME),//08-0f + BitPackage.Pack4bits(ITSME, ITSME, ERROR, ERROR, START, START, 4, ERROR),//10-17 + BitPackage.Pack4bits( 5, ERROR, 6, ERROR, 5, 5, 4, ERROR),//18-1f + BitPackage.Pack4bits( 4, ERROR, 4, 4, 4, ERROR, 4, ERROR),//20-27 + BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f }; private readonly static int[] HZCharLenTable = {0, 0, 0, 0, 0, 0}; - + public HZ_GB_2312_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, HZ_cls), 6, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, HZ_st), HZCharLenTable, CodepageName.HZ_GB_2312) { - } } -} +} \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Chinese/Iso_2022_CN_SMModel.cs b/src/Core/Models/MultiByte/Chinese/Iso_2022_CN_SMModel.cs index 45ca196..0e200f4 100644 --- a/src/Core/Models/MultiByte/Chinese/Iso_2022_CN_SMModel.cs +++ b/src/Core/Models/MultiByte/Chinese/Iso_2022_CN_SMModel.cs @@ -1,70 +1,67 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Chinese { public class Iso_2022_CN_SMModel : StateMachineModel { private readonly static int[] ISO2022CN_cls = { - BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 - BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 - BitPackage.Pack4bits(0,3,0,0,0,0,0,0), // 28 - 2f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f - BitPackage.Pack4bits(0,0,0,4,0,0,0,0), // 40 - 47 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff + BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 + BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 + BitPackage.Pack4bits(0,3,0,0,0,0,0,0), // 28 - 2f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f + BitPackage.Pack4bits(0,0,0,4,0,0,0,0), // 40 - 47 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff }; private readonly static int[] ISO2022CN_st = { - BitPackage.Pack4bits(START, 3,ERROR,START,START,START,START,START), //00-07 - BitPackage.Pack4bits(START,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR), //08-0f - BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME), //10-17 - BitPackage.Pack4bits(ITSME,ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR), //18-1f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR), //20-27 - BitPackage.Pack4bits( 5, 6,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR), //28-2f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR), //30-37 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f + BitPackage.Pack4bits(START, 3,ERROR,START,START,START,START,START), //00-07 + BitPackage.Pack4bits(START,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR), //08-0f + BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME), //10-17 + BitPackage.Pack4bits(ITSME,ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR), //18-1f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR), //20-27 + BitPackage.Pack4bits( 5, 6,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR), //28-2f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR), //30-37 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f }; private readonly static int[] ISO2022CNCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0}; public Iso_2022_CN_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, ISO2022CN_cls), 9, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, ISO2022CN_st), ISO2022CNCharLenTable, CodepageName.ISO_2022_CN) { - } } } \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Japanese/EUCJPSMModel.cs b/src/Core/Models/MultiByte/Japanese/EUCJPSMModel.cs index 2b4227c..b6f4c39 100644 --- a/src/Core/Models/MultiByte/Japanese/EUCJPSMModel.cs +++ b/src/Core/Models/MultiByte/Japanese/EUCJPSMModel.cs @@ -1,63 +1,61 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Japanese { public class EUCJPSMModel : StateMachineModel { private readonly static int[] EUCJP_cls = { - //BitPacket.Pack4bits(5,4,4,4,4,4,4,4), // 00 - 07 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 00 - 07 - BitPackage.Pack4bits(4,4,4,4,4,4,5,5), // 08 - 0f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 10 - 17 - BitPackage.Pack4bits(4,4,4,5,4,4,4,4), // 18 - 1f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 20 - 27 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 28 - 2f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 30 - 37 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 38 - 3f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 40 - 47 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 50 - 57 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 58 - 5f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 60 - 67 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 68 - 6f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 70 - 77 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 78 - 7f - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 80 - 87 - BitPackage.Pack4bits(5,5,5,5,5,5,1,3), // 88 - 8f - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 90 - 97 - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 98 - 9f - BitPackage.Pack4bits(5,2,2,2,2,2,2,2), // a0 - a7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,5) // f8 - ff + //BitPacket.Pack4bits(5,4,4,4,4,4,4,4), // 00 - 07 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 00 - 07 + BitPackage.Pack4bits(4,4,4,4,4,4,5,5), // 08 - 0f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 10 - 17 + BitPackage.Pack4bits(4,4,4,5,4,4,4,4), // 18 - 1f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 20 - 27 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 28 - 2f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 30 - 37 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 38 - 3f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 40 - 47 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 50 - 57 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 58 - 5f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 60 - 67 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 68 - 6f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 70 - 77 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 78 - 7f + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 80 - 87 + BitPackage.Pack4bits(5,5,5,5,5,5,1,3), // 88 - 8f + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 90 - 97 + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 98 - 9f + BitPackage.Pack4bits(5,2,2,2,2,2,2,2), // a0 - a7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,5) // f8 - ff }; private readonly static int[] EUCJP_st = { - BitPackage.Pack4bits( 3, 4, 3, 5,START,ERROR,ERROR,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f - BitPackage.Pack4bits(ITSME,ITSME,START,ERROR,START,ERROR,ERROR,ERROR),//10-17 - BitPackage.Pack4bits(ERROR,ERROR,START,ERROR,ERROR,ERROR, 3,ERROR),//18-1f - BitPackage.Pack4bits( 3,ERROR,ERROR,ERROR,START,START,START,START) //20-27 + BitPackage.Pack4bits( 3, 4, 3, 5,START,ERROR,ERROR,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f + BitPackage.Pack4bits(ITSME,ITSME,START,ERROR,START,ERROR,ERROR,ERROR),//10-17 + BitPackage.Pack4bits(ERROR,ERROR,START,ERROR,ERROR,ERROR, 3,ERROR),//18-1f + BitPackage.Pack4bits( 3,ERROR,ERROR,ERROR,START,START,START,START) //20-27 }; private readonly static int[] EUCJPCharLenTable = { 2, 2, 2, 3, 1, 0 }; - + public EUCJPSMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, EUCJP_cls), 6, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, EUCJP_st), EUCJPCharLenTable, CodepageName.EUC_JP) diff --git a/src/Core/Models/MultiByte/Japanese/Iso_2022_JP_SMModel.cs b/src/Core/Models/MultiByte/Japanese/Iso_2022_JP_SMModel.cs index c1865df..c47dbc6 100644 --- a/src/Core/Models/MultiByte/Japanese/Iso_2022_JP_SMModel.cs +++ b/src/Core/Models/MultiByte/Japanese/Iso_2022_JP_SMModel.cs @@ -1,72 +1,70 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Japanese { public class Iso_2022_JP_SMModel : StateMachineModel { private readonly static int[] ISO2022JP_cls = { - BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07 - BitPackage.Pack4bits(0,0,0,0,0,0,2,2), // 08 - 0f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 - BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f - BitPackage.Pack4bits(0,0,0,0,7,0,0,0), // 20 - 27 - BitPackage.Pack4bits(3,0,0,0,0,0,0,0), // 28 - 2f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f - BitPackage.Pack4bits(6,0,4,0,8,0,0,0), // 40 - 47 - BitPackage.Pack4bits(0,9,5,0,0,0,0,0), // 48 - 4f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff + BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07 + BitPackage.Pack4bits(0,0,0,0,0,0,2,2), // 08 - 0f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 + BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f + BitPackage.Pack4bits(0,0,0,0,7,0,0,0), // 20 - 27 + BitPackage.Pack4bits(3,0,0,0,0,0,0,0), // 28 - 2f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f + BitPackage.Pack4bits(6,0,4,0,8,0,0,0), // 40 - 47 + BitPackage.Pack4bits(0,9,5,0,0,0,0,0), // 48 - 4f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff }; private readonly static int[] ISO2022JP_st = { - BitPackage.Pack4bits(START, 3, ERROR,START,START,START,START,START), //00-07 - BitPackage.Pack4bits(START, START, ERROR,ERROR,ERROR,ERROR,ERROR,ERROR), //08-0f - BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ITSME,ITSME,ITSME), //10-17 - BitPackage.Pack4bits(ITSME, ITSME, ITSME,ITSME,ITSME,ITSME,ERROR,ERROR), //18-1f - BitPackage.Pack4bits(ERROR, 5, ERROR,ERROR,ERROR, 4,ERROR,ERROR), //20-27 - BitPackage.Pack4bits(ERROR, ERROR, ERROR, 6,ITSME,ERROR,ITSME,ERROR), //28-2f - BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ERROR,ERROR,ITSME,ITSME), //30-37 - BitPackage.Pack4bits(ERROR, ERROR, ERROR,ITSME,ERROR,ERROR,ERROR,ERROR), //38-3f - BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47 + BitPackage.Pack4bits(START, 3, ERROR,START,START,START,START,START), //00-07 + BitPackage.Pack4bits(START, START, ERROR,ERROR,ERROR,ERROR,ERROR,ERROR), //08-0f + BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ITSME,ITSME,ITSME), //10-17 + BitPackage.Pack4bits(ITSME, ITSME, ITSME,ITSME,ITSME,ITSME,ERROR,ERROR), //18-1f + BitPackage.Pack4bits(ERROR, 5, ERROR,ERROR,ERROR, 4,ERROR,ERROR), //20-27 + BitPackage.Pack4bits(ERROR, ERROR, ERROR, 6,ITSME,ERROR,ITSME,ERROR), //28-2f + BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ERROR,ERROR,ITSME,ITSME), //30-37 + BitPackage.Pack4bits(ERROR, ERROR, ERROR,ITSME,ERROR,ERROR,ERROR,ERROR), //38-3f + BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47 }; private readonly static int[] ISO2022JPCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; public Iso_2022_JP_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, ISO2022JP_cls), 10, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, ISO2022JP_st), ISO2022JPCharLenTable, CodepageName.ISO_2022_JP) { } - + } } \ No newline at end of file diff --git a/src/Core/Models/MultiByte/Japanese/SJIS_SMModel.cs b/src/Core/Models/MultiByte/Japanese/SJIS_SMModel.cs index 814bad4..5450e7a 100644 --- a/src/Core/Models/MultiByte/Japanese/SJIS_SMModel.cs +++ b/src/Core/Models/MultiByte/Japanese/SJIS_SMModel.cs @@ -1,63 +1,61 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte.Japanese { public class SJIS_SMModel : StateMachineModel { private readonly static int[] SJIS_cls = { - //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 - BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 - BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 80 - 87 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 88 - 8f - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 90 - 97 - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 98 - 9f - //0xa0 is illegal in sjis encoding, but some pages does + //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 + BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 + BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77 + BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 80 - 87 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 88 - 8f + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 90 - 97 + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 98 - 9f + //0xa0 is illegal in sjis encoding, but some pages does //contain such byte. We need to be more error forgiven. - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7 - BitPackage.Pack4bits(3,3,3,3,3,4,4,4), // e8 - ef - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // f0 - f7 - BitPackage.Pack4bits(4,4,4,4,4,0,0,0) // f8 - ff + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7 + BitPackage.Pack4bits(3,3,3,3,3,4,4,4), // e8 - ef + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // f0 - f7 + BitPackage.Pack4bits(4,4,4,4,4,0,0,0) // f8 - ff }; private readonly static int[] SJIS_st = { - BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f - BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,START,START,START,START) //10-17 + BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f + BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,START,START,START,START) //10-17 }; private readonly static int[] SJISCharLenTable = { 0, 1, 1, 2, 0, 0 }; - + public SJIS_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, SJIS_cls), 6, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, SJIS_st), SJISCharLenTable, CodepageName.SHIFT_JIS) diff --git a/src/Core/Models/MultiByte/Korean/CP949SMModel.cs b/src/Core/Models/MultiByte/Korean/CP949SMModel.cs index f9272e1..fde1be0 100644 --- a/src/Core/Models/MultiByte/Korean/CP949SMModel.cs +++ b/src/Core/Models/MultiByte/Korean/CP949SMModel.cs @@ -31,38 +31,38 @@ public class CP949SMModel : StateMachineModel */ private readonly static int[] CP949_cls = { - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 - BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f - BitPackage.Pack4bits(1,4,4,4,4,4,4,4), // 40 - 47 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f - BitPackage.Pack4bits(4,4,5,5,5,5,5,5), // 50 - 57 - BitPackage.Pack4bits(5,5,5,1,1,1,1,1), // 58 - 5f - BitPackage.Pack4bits(1,5,5,5,5,5,5,5), // 60 - 67 - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 68 - 6f - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 70 - 77 - BitPackage.Pack4bits(5,5,5,1,1,1,1,1), // 78 - 7f - BitPackage.Pack4bits(0,6,6,6,6,6,6,6), // 80 - 87 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f - BitPackage.Pack4bits(6,7,7,7,7,7,7,7), // a0 - a7 - BitPackage.Pack4bits(7,7,7,7,7,8,8,8), // a8 - af - BitPackage.Pack4bits(7,7,7,7,7,7,7,7), // b0 - b7 - BitPackage.Pack4bits(7,7,7,7,7,7,7,7), // b8 - bf - BitPackage.Pack4bits(7,7,7,7,7,7,9,2), // c0 - c7 - BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 - BitPackage.Pack4bits(2,2,2,2,2,2,3,0) // f8 - ff + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 + BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 + BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f + BitPackage.Pack4bits(1,4,4,4,4,4,4,4), // 40 - 47 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f + BitPackage.Pack4bits(4,4,5,5,5,5,5,5), // 50 - 57 + BitPackage.Pack4bits(5,5,5,1,1,1,1,1), // 58 - 5f + BitPackage.Pack4bits(1,5,5,5,5,5,5,5), // 60 - 67 + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 68 - 6f + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 70 - 77 + BitPackage.Pack4bits(5,5,5,1,1,1,1,1), // 78 - 7f + BitPackage.Pack4bits(0,6,6,6,6,6,6,6), // 80 - 87 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f + BitPackage.Pack4bits(6,7,7,7,7,7,7,7), // a0 - a7 + BitPackage.Pack4bits(7,7,7,7,7,8,8,8), // a8 - af + BitPackage.Pack4bits(7,7,7,7,7,7,7,7), // b0 - b7 + BitPackage.Pack4bits(7,7,7,7,7,7,7,7), // b8 - bf + BitPackage.Pack4bits(7,7,7,7,7,7,9,2), // c0 - c7 + BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 + BitPackage.Pack4bits(2,2,2,2,2,2,3,0) // f8 - ff }; /* @@ -90,15 +90,15 @@ public class CP949SMModel : StateMachineModel }; private readonly static int[] CP949CharLenTable = { 0, 1, 2, 0, 1, 1, 2, 2, 0, 2 }; - + public CP949SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, CP949_cls), 10, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, CP949_st), CP949CharLenTable, CodepageName.CP949) diff --git a/src/Core/Models/MultiByte/Korean/EUCKRSMModel.cs b/src/Core/Models/MultiByte/Korean/EUCKRSMModel.cs index 0ffedd3..c7e4de1 100644 --- a/src/Core/Models/MultiByte/Korean/EUCKRSMModel.cs +++ b/src/Core/Models/MultiByte/Korean/EUCKRSMModel.cs @@ -3,56 +3,56 @@ namespace UtfUnknown.Core.Models.MultiByte.Korean public class EUCKRSMModel : StateMachineModel { private readonly static int[] EUCKR_cls = { - //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 - BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f - BitPackage.Pack4bits(0,2,2,2,2,2,2,2), // a0 - a7 - BitPackage.Pack4bits(2,2,2,2,2,3,3,3), // a8 - af - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 - BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,0) // f8 - ff + //BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 + BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 + BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f + BitPackage.Pack4bits(0,2,2,2,2,2,2,2), // a0 - a7 + BitPackage.Pack4bits(2,2,2,2,2,3,3,3), // a8 - af + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 + BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,0) // f8 - ff }; private readonly static int[] EUCKR_st = { - BitPackage.Pack4bits(ERROR,START, 3,ERROR,ERROR,ERROR,ERROR,ERROR),//00-07 - BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START,START) //08-0f + BitPackage.Pack4bits(ERROR,START, 3,ERROR,ERROR,ERROR,ERROR,ERROR),//00-07 + BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START,START) //08-0f }; private readonly static int[] EUCKRCharLenTable = { 0, 1, 2, 0 }; - + public EUCKRSMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, EUCKR_cls), 4, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, EUCKR_st), EUCKRCharLenTable, CodepageName.EUC_KR) diff --git a/src/Core/Models/MultiByte/Korean/Iso_2022_KR_SMModel.cs b/src/Core/Models/MultiByte/Korean/Iso_2022_KR_SMModel.cs index 8c49a00..3504f85 100644 --- a/src/Core/Models/MultiByte/Korean/Iso_2022_KR_SMModel.cs +++ b/src/Core/Models/MultiByte/Korean/Iso_2022_KR_SMModel.cs @@ -1,60 +1,60 @@ namespace UtfUnknown.Core.Models.MultiByte.Korean { public class Iso_2022_KR_SMModel : StateMachineModel - { + { private readonly static int[] ISO2022KR_cls = { - BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 - BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f - BitPackage.Pack4bits(0,0,0,0,3,0,0,0), // 20 - 27 - BitPackage.Pack4bits(0,4,0,0,0,0,0,0), // 28 - 2f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f - BitPackage.Pack4bits(0,0,0,5,0,0,0,0), // 40 - 47 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef - BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 - BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff + BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 + BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f + BitPackage.Pack4bits(0,0,0,0,3,0,0,0), // 20 - 27 + BitPackage.Pack4bits(0,4,0,0,0,0,0,0), // 28 - 2f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f + BitPackage.Pack4bits(0,0,0,5,0,0,0,0), // 40 - 47 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef + BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7 + BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff }; private readonly static int[] ISO2022KR_st = { - BitPackage.Pack4bits(START, 3,ERROR,START,START,START,ERROR,ERROR), //00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME), //08-0f - BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR,ERROR), //10-17 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 5,ERROR,ERROR,ERROR), //18-1f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27 + BitPackage.Pack4bits(START, 3,ERROR,START,START,START,ERROR,ERROR), //00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME), //08-0f + BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR,ERROR), //10-17 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 5,ERROR,ERROR,ERROR), //18-1f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27 }; private readonly static int[] ISO2022KRCharLenTable = {0, 0, 0, 0, 0, 0}; public Iso_2022_KR_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, ISO2022KR_cls), 6, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, ISO2022KR_st), ISO2022KRCharLenTable, CodepageName.ISO_2022_KR) diff --git a/src/Core/Models/MultiByte/UCS2BE_SMModel.cs b/src/Core/Models/MultiByte/UCS2BE_SMModel.cs index a299fbb..4a028ca 100644 --- a/src/Core/Models/MultiByte/UCS2BE_SMModel.cs +++ b/src/Core/Models/MultiByte/UCS2BE_SMModel.cs @@ -1,64 +1,62 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte { public class UCS2BE_SMModel : StateMachineModel { private readonly static int[] UCS2BE_cls = { - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07 - BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 - BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 - BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7 - BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07 + BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 + BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 + BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7 + BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff }; private readonly static int[] UCS2BE_st = { - BitPackage.Pack4bits( 5, 7, 7,ERROR, 4, 3,ERROR,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f - BitPackage.Pack4bits(ITSME,ITSME, 6, 6, 6, 6,ERROR,ERROR),//10-17 - BitPackage.Pack4bits( 6, 6, 6, 6, 6,ITSME, 6, 6),//18-1f - BitPackage.Pack4bits( 6, 6, 6, 6, 5, 7, 7,ERROR),//20-27 - BitPackage.Pack4bits( 5, 8, 6, 6,ERROR, 6, 6, 6),//28-2f - BitPackage.Pack4bits( 6, 6, 6, 6,ERROR,ERROR,START,START) //30-37 + BitPackage.Pack4bits( 5, 7, 7,ERROR, 4, 3,ERROR,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f + BitPackage.Pack4bits(ITSME,ITSME, 6, 6, 6, 6,ERROR,ERROR),//10-17 + BitPackage.Pack4bits( 6, 6, 6, 6, 6,ITSME, 6, 6),//18-1f + BitPackage.Pack4bits( 6, 6, 6, 6, 5, 7, 7,ERROR),//20-27 + BitPackage.Pack4bits( 5, 8, 6, 6,ERROR, 6, 6, 6),//28-2f + BitPackage.Pack4bits( 6, 6, 6, 6,ERROR,ERROR,START,START) //30-37 }; private readonly static int[] UCS2BECharLenTable = { 2, 2, 2, 0, 2, 2 }; - + public UCS2BE_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, UCS2BE_cls), 6, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, UCS2BE_st), UCS2BECharLenTable, CodepageName.UTF16_BE) diff --git a/src/Core/Models/MultiByte/UCS2LE_SMModel.cs b/src/Core/Models/MultiByte/UCS2LE_SMModel.cs index f248857..169c32e 100644 --- a/src/Core/Models/MultiByte/UCS2LE_SMModel.cs +++ b/src/Core/Models/MultiByte/UCS2LE_SMModel.cs @@ -1,64 +1,62 @@ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte { public class UCS2LE_SMModel : StateMachineModel { private readonly static int[] UCS2LE_cls = { - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07 - BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 - BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 - BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7 - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef - BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7 - BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07 + BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17 + BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27 + BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7 + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef + BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7 + BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff }; private readonly static int[] UCS2LE_st = { - BitPackage.Pack4bits( 6, 6, 7, 6, 4, 3,ERROR,ERROR),//00-07 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f - BitPackage.Pack4bits(ITSME,ITSME, 5, 5, 5,ERROR,ITSME,ERROR),//10-17 - BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR, 6, 6),//18-1f - BitPackage.Pack4bits( 7, 6, 8, 8, 5, 5, 5,ERROR),//20-27 - BitPackage.Pack4bits( 5, 5, 5,ERROR,ERROR,ERROR, 5, 5),//28-2f - BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR,START,START) //30-37 + BitPackage.Pack4bits( 6, 6, 7, 6, 4, 3,ERROR,ERROR),//00-07 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f + BitPackage.Pack4bits(ITSME,ITSME, 5, 5, 5,ERROR,ITSME,ERROR),//10-17 + BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR, 6, 6),//18-1f + BitPackage.Pack4bits( 7, 6, 8, 8, 5, 5, 5,ERROR),//20-27 + BitPackage.Pack4bits( 5, 5, 5,ERROR,ERROR,ERROR, 5, 5),//28-2f + BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR,START,START) //30-37 }; private readonly static int[] UCS2LECharLenTable = { 2, 2, 2, 2, 2, 2 }; - + public UCS2LE_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, UCS2LE_cls), 6, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, UCS2LE_st), UCS2LECharLenTable, CodepageName.UTF16_LE) diff --git a/src/Core/Models/MultiByte/UTF8_SMModel.cs b/src/Core/Models/MultiByte/UTF8_SMModel.cs index 42912a7..b138949 100644 --- a/src/Core/Models/MultiByte/UTF8_SMModel.cs +++ b/src/Core/Models/MultiByte/UTF8_SMModel.cs @@ -36,87 +36,85 @@ * * ***** END LICENSE BLOCK ***** */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.MultiByte { public class UTF8_SMModel : StateMachineModel { private readonly static int[] UTF8_cls = { BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07 - BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 - BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77 - BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f - BitPackage.Pack4bits(2,2,2,2,3,3,3,3), // 80 - 87 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97 - BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a0 - a7 - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a8 - af - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b0 - b7 - BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b8 - bf - BitPackage.Pack4bits(0,0,6,6,6,6,6,6), // c0 - c7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7 - BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df - BitPackage.Pack4bits(7,8,8,8,8,8,8,8), // e0 - e7 - BitPackage.Pack4bits(8,8,8,8,8,9,8,8), // e8 - ef - BitPackage.Pack4bits(10,11,11,11,11,11,11,11), // f0 - f7 - BitPackage.Pack4bits(12,13,13,13,14,15,0,0) // f8 - ff + BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17 + BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77 + BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f + BitPackage.Pack4bits(2,2,2,2,3,3,3,3), // 80 - 87 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97 + BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a0 - a7 + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a8 - af + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b0 - b7 + BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b8 - bf + BitPackage.Pack4bits(0,0,6,6,6,6,6,6), // c0 - c7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7 + BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df + BitPackage.Pack4bits(7,8,8,8,8,8,8,8), // e0 - e7 + BitPackage.Pack4bits(8,8,8,8,8,9,8,8), // e8 - ef + BitPackage.Pack4bits(10,11,11,11,11,11,11,11), // f0 - f7 + BitPackage.Pack4bits(12,13,13,13,14,15,0,0) // f8 - ff }; private readonly static int[] UTF8_st = { - BitPackage.Pack4bits(ERROR,START,ERROR,ERROR,ERROR,ERROR, 12, 10),//00-07 - BitPackage.Pack4bits( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//10-17 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f - BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//20-27 - BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//28-2f - BitPackage.Pack4bits(ERROR,ERROR, 5, 5, 5, 5,ERROR,ERROR),//30-37 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//38-3f - BitPackage.Pack4bits(ERROR,ERROR,ERROR, 5, 5, 5,ERROR,ERROR),//40-47 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//48-4f - BitPackage.Pack4bits(ERROR,ERROR, 7, 7, 7, 7,ERROR,ERROR),//50-57 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//58-5f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 7, 7,ERROR,ERROR),//60-67 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//68-6f - BitPackage.Pack4bits(ERROR,ERROR, 9, 9, 9, 9,ERROR,ERROR),//70-77 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//78-7f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 9, 9,ERROR,ERROR),//80-87 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//88-8f - BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12, 12,ERROR,ERROR),//90-97 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//98-9f - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 12,ERROR,ERROR),//a0-a7 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//a8-af - BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12,ERROR,ERROR,ERROR),//b0-b7 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//b8-bf - BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,ERROR,ERROR),//c0-c7 - BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR) //c8-cf + BitPackage.Pack4bits(ERROR,START,ERROR,ERROR,ERROR,ERROR, 12, 10),//00-07 + BitPackage.Pack4bits( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//10-17 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f + BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//20-27 + BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//28-2f + BitPackage.Pack4bits(ERROR,ERROR, 5, 5, 5, 5,ERROR,ERROR),//30-37 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//38-3f + BitPackage.Pack4bits(ERROR,ERROR,ERROR, 5, 5, 5,ERROR,ERROR),//40-47 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//48-4f + BitPackage.Pack4bits(ERROR,ERROR, 7, 7, 7, 7,ERROR,ERROR),//50-57 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//58-5f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 7, 7,ERROR,ERROR),//60-67 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//68-6f + BitPackage.Pack4bits(ERROR,ERROR, 9, 9, 9, 9,ERROR,ERROR),//70-77 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//78-7f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 9, 9,ERROR,ERROR),//80-87 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//88-8f + BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12, 12,ERROR,ERROR),//90-97 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//98-9f + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 12,ERROR,ERROR),//a0-a7 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//a8-af + BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12,ERROR,ERROR,ERROR),//b0-b7 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//b8-bf + BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,ERROR,ERROR),//c0-c7 + BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR) //c8-cf }; - private readonly static int[] UTF8CharLenTable = + private readonly static int[] UTF8CharLenTable = {0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6 }; - + public UTF8_SMModel() : base( - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, UTF8_cls), 16, - new BitPackage(BitPackage.INDEX_SHIFT_4BITS, - BitPackage.SHIFT_MASK_4BITS, + new BitPackage(BitPackage.INDEX_SHIFT_4BITS, + BitPackage.SHIFT_MASK_4BITS, BitPackage.BIT_SHIFT_4BITS, BitPackage.UNIT_MASK_4BITS, UTF8_st), UTF8CharLenTable, CodepageName.UTF8) diff --git a/src/Core/Models/SequenceModel.cs b/src/Core/Models/SequenceModel.cs index 95421c1..f2ded24 100644 --- a/src/Core/Models/SequenceModel.cs +++ b/src/Core/Models/SequenceModel.cs @@ -21,7 +21,7 @@ * Contributor(s): * Shy Shalom * Rudi Pettazzi (C# port) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -39,7 +39,7 @@ using System; namespace UtfUnknown.Core.Models -{ +{ public abstract class SequenceModel { // Codepoints @@ -57,11 +57,11 @@ public abstract class SequenceModel // [256] table use to find a char's order protected byte[] charToOrderMap; - - // freqCharCount x freqCharCount table to find a 2-char sequence's - // frequency + + // freqCharCount x freqCharCount table to find a 2-char sequence's + // frequency protected byte[] precedenceMatrix; - + // The count of frequent characters protected int freqCharCount; @@ -72,11 +72,11 @@ public int FreqCharCount // freqSeqs / totalSeqs protected float typicalPositiveRatio; - + public float TypicalPositiveRatio { get { return typicalPositiveRatio; } } - + /// /// TODO not used? @@ -89,13 +89,13 @@ public float TypicalPositiveRatio { public bool KeepEnglishLetter { get { return keepEnglishLetter; } } - + protected string charsetName; public string CharsetName { get { return charsetName; } } - + public SequenceModel( byte[] charToOrderMap, byte[] precedenceMatrix, @@ -111,15 +111,15 @@ public SequenceModel( this.keepEnglishLetter = keepEnglishLetter; this.charsetName = charsetName; } - + public byte GetOrder(byte b) { return charToOrderMap[b]; } - + public byte GetPrecedence(int pos) { return precedenceMatrix[pos]; } } -} +} \ No newline at end of file diff --git a/src/Core/Models/SingleByte/Arabic/ArabicModel.cs b/src/Core/Models/SingleByte/Arabic/ArabicModel.cs index b02c066..47ccb80 100644 --- a/src/Core/Models/SingleByte/Arabic/ArabicModel.cs +++ b/src/Core/Models/SingleByte/Arabic/ArabicModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Arabic { public abstract class ArabicModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Bulgarian/BulgarianModel.cs b/src/Core/Models/SingleByte/Bulgarian/BulgarianModel.cs index dec91e9..22d6d3d 100644 --- a/src/Core/Models/SingleByte/Bulgarian/BulgarianModel.cs +++ b/src/Core/Models/SingleByte/Bulgarian/BulgarianModel.cs @@ -40,18 +40,16 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Bulgarian -{ +{ public abstract class BulgarianModel : SequenceModel { - // Model Table: + // Model Table: // total sequences: 100% // first 512 sequences: 96.9392% // first 1024 sequences:3.0618% // rest sequences: 0.2992% - // negative sequences: 0.0020% + // negative sequences: 0.0020% private static byte[] LANG_MODEL = { 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2, @@ -183,9 +181,9 @@ public abstract class BulgarianModel : SequenceModel 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1, }; - public BulgarianModel(byte[] charToOrderMap, string name) + public BulgarianModel(byte[] charToOrderMap, string name) : base(charToOrderMap, LANG_MODEL, 64, 0.969392f, false, name) { - } + } } } diff --git a/src/Core/Models/SingleByte/Bulgarian/Iso_8859_5_BulgarianModel.cs b/src/Core/Models/SingleByte/Bulgarian/Iso_8859_5_BulgarianModel.cs index 45f40d6..2bfcd49 100644 --- a/src/Core/Models/SingleByte/Bulgarian/Iso_8859_5_BulgarianModel.cs +++ b/src/Core/Models/SingleByte/Bulgarian/Iso_8859_5_BulgarianModel.cs @@ -50,7 +50,7 @@ public class Iso_8859_5_BulgarianModel : BulgarianModel // NUM: 0 - 9 // // Character Mapping Table: - // this table is modified base on win1251BulgarianCharToOrderMap, so + // this table is modified base on win1251BulgarianCharToOrderMap, so // only number <64 is sure valid private static byte[] CHAR_TO_ORDER_MAP = { diff --git a/src/Core/Models/SingleByte/Bulgarian/Windows_1251_BulgarianModel.cs b/src/Core/Models/SingleByte/Bulgarian/Windows_1251_BulgarianModel.cs index 05b540a..c3d8332 100644 --- a/src/Core/Models/SingleByte/Bulgarian/Windows_1251_BulgarianModel.cs +++ b/src/Core/Models/SingleByte/Bulgarian/Windows_1251_BulgarianModel.cs @@ -50,7 +50,7 @@ public class Windows_1251_BulgarianModel : BulgarianModel // NUM: 0 - 9 // // Character Mapping Table: - // this table is modified base on win1251BulgarianCharToOrderMap, so + // this table is modified base on win1251BulgarianCharToOrderMap, so // only number <64 is sure valid private static byte[] CHAR_TO_ORDER_MAP = { diff --git a/src/Core/Models/SingleByte/Croatian/CroatianModel.cs b/src/Core/Models/SingleByte/Croatian/CroatianModel.cs index 0f849e6..9b61a70 100644 --- a/src/Core/Models/SingleByte/Croatian/CroatianModel.cs +++ b/src/Core/Models/SingleByte/Croatian/CroatianModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Croatian { public abstract class CroatianModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Czech/CzechModel.cs b/src/Core/Models/SingleByte/Czech/CzechModel.cs index cfd8706..9d96344 100644 --- a/src/Core/Models/SingleByte/Czech/CzechModel.cs +++ b/src/Core/Models/SingleByte/Czech/CzechModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Czech { public abstract class CzechModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Danish/DanishModel.cs b/src/Core/Models/SingleByte/Danish/DanishModel.cs index 6d4bdb7..4e3e64e 100644 --- a/src/Core/Models/SingleByte/Danish/DanishModel.cs +++ b/src/Core/Models/SingleByte/Danish/DanishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Danish { public abstract class DanishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Esperanto/EsperantoModel.cs b/src/Core/Models/SingleByte/Esperanto/EsperantoModel.cs index 34d7215..f6a876d 100644 --- a/src/Core/Models/SingleByte/Esperanto/EsperantoModel.cs +++ b/src/Core/Models/SingleByte/Esperanto/EsperantoModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Esperanto { public abstract class EsperantoModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Estonian/EstonianModel.cs b/src/Core/Models/SingleByte/Estonian/EstonianModel.cs index d41b724..e16d024 100644 --- a/src/Core/Models/SingleByte/Estonian/EstonianModel.cs +++ b/src/Core/Models/SingleByte/Estonian/EstonianModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Estonian { public abstract class EstonianModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Finnish/FinnishModel.cs b/src/Core/Models/SingleByte/Finnish/FinnishModel.cs index 54adf5d..c1bfb57 100644 --- a/src/Core/Models/SingleByte/Finnish/FinnishModel.cs +++ b/src/Core/Models/SingleByte/Finnish/FinnishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Finnish { public abstract class FinnishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/French/FrenchModel.cs b/src/Core/Models/SingleByte/French/FrenchModel.cs index 2b747c4..49b8728 100644 --- a/src/Core/Models/SingleByte/French/FrenchModel.cs +++ b/src/Core/Models/SingleByte/French/FrenchModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.French { public abstract class FrenchModel : SequenceModel diff --git a/src/Core/Models/SingleByte/German/GermanModel.cs b/src/Core/Models/SingleByte/German/GermanModel.cs index 12d9ff8..0073897 100644 --- a/src/Core/Models/SingleByte/German/GermanModel.cs +++ b/src/Core/Models/SingleByte/German/GermanModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.German { public abstract class GermanModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Greek/GreekModel.cs b/src/Core/Models/SingleByte/Greek/GreekModel.cs index 79cf159..83efc23 100644 --- a/src/Core/Models/SingleByte/Greek/GreekModel.cs +++ b/src/Core/Models/SingleByte/Greek/GreekModel.cs @@ -40,13 +40,11 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Greek -{ +{ public abstract class GreekModel : SequenceModel { - // Model Table: + // Model Table: // Total sequences: 1579 // First 512 sequences: 0.958419074626211 // Next 512 sequences (512-1024): 0.03968891876305471 @@ -147,9 +145,9 @@ public abstract class GreekModel : SequenceModel 0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, }; - public GreekModel(byte[] charToOrderMap, string name) + public GreekModel(byte[] charToOrderMap, string name) : base(charToOrderMap, LANG_MODEL, 46, 0.958419074626211f, false, name) { - } + } } } diff --git a/src/Core/Models/SingleByte/Hebrew/HebrewModel.cs b/src/Core/Models/SingleByte/Hebrew/HebrewModel.cs index 0f84581..94ee7b5 100644 --- a/src/Core/Models/SingleByte/Hebrew/HebrewModel.cs +++ b/src/Core/Models/SingleByte/Hebrew/HebrewModel.cs @@ -42,13 +42,11 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Hebrew -{ +{ public abstract class HebrewModel : SequenceModel { - // Model Table: + // Model Table: // total sequences: 100% // first 512 sequences: 98.4004% // first 1024 sequences: 1.5981% @@ -185,7 +183,7 @@ public abstract class HebrewModel : SequenceModel 0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0, }; - public HebrewModel(byte[] charToOrderMap, string name) + public HebrewModel(byte[] charToOrderMap, string name) : base(charToOrderMap, LANG_MODEL, 64, 0.984004f, false, name) { } diff --git a/src/Core/Models/SingleByte/Hebrew/Windows_1255_HebrewModel.cs b/src/Core/Models/SingleByte/Hebrew/Windows_1255_HebrewModel.cs index 9b010e3..c943f25 100644 --- a/src/Core/Models/SingleByte/Hebrew/Windows_1255_HebrewModel.cs +++ b/src/Core/Models/SingleByte/Hebrew/Windows_1255_HebrewModel.cs @@ -52,7 +52,7 @@ public class Windows_1255_HebrewModel : HebrewModel // 252: 0 - 9 // Windows-1255 language model - // Character Mapping Table: + // Character Mapping Table: private readonly static byte[]CHAR_TO_ORDER_MAP = { CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,RET,CTR,CTR,RET,CTR,CTR, /* 0X */ CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR,CTR, /* 1X */ diff --git a/src/Core/Models/SingleByte/Hungarian/HungarianModel.cs b/src/Core/Models/SingleByte/Hungarian/HungarianModel.cs index 32be421..3fb656b 100644 --- a/src/Core/Models/SingleByte/Hungarian/HungarianModel.cs +++ b/src/Core/Models/SingleByte/Hungarian/HungarianModel.cs @@ -41,10 +41,8 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Hungarian -{ +{ public abstract class HungarianModel : SequenceModel { // Model Table: @@ -88,10 +86,10 @@ public abstract class HungarianModel : SequenceModel 2,2,2,2,3,2,2,2,0,3,2,0,2,2,0,2,2,3,0,2,2,0,2,2,2,0,0,0,0,0,0,0, }; - public HungarianModel(byte[] charToOrderMap, string name) - : base(charToOrderMap, LANG_MODEL, 32, 0.9748272224933486f, + public HungarianModel(byte[] charToOrderMap, string name) + : base(charToOrderMap, LANG_MODEL, 32, 0.9748272224933486f, false, name) { - } + } } } diff --git a/src/Core/Models/SingleByte/Irish/IrishModel.cs b/src/Core/Models/SingleByte/Irish/IrishModel.cs index 5e9b8f3..00b1841 100644 --- a/src/Core/Models/SingleByte/Irish/IrishModel.cs +++ b/src/Core/Models/SingleByte/Irish/IrishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Irish { public abstract class IrishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Italian/ItalianModel.cs b/src/Core/Models/SingleByte/Italian/ItalianModel.cs index 5f2c00b..2eb44ab 100644 --- a/src/Core/Models/SingleByte/Italian/ItalianModel.cs +++ b/src/Core/Models/SingleByte/Italian/ItalianModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Italian { public abstract class ItalianModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Latvian/LatvianModel.cs b/src/Core/Models/SingleByte/Latvian/LatvianModel.cs index cd8506d..a38b247 100644 --- a/src/Core/Models/SingleByte/Latvian/LatvianModel.cs +++ b/src/Core/Models/SingleByte/Latvian/LatvianModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Latvian { public abstract class LatvianModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Lithuanian/LithuanianModel.cs b/src/Core/Models/SingleByte/Lithuanian/LithuanianModel.cs index c2b2195..5039392 100644 --- a/src/Core/Models/SingleByte/Lithuanian/LithuanianModel.cs +++ b/src/Core/Models/SingleByte/Lithuanian/LithuanianModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Lithuanian { public abstract class LithuanianModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Maltese/MalteseModel.cs b/src/Core/Models/SingleByte/Maltese/MalteseModel.cs index 1da6dfa..4590c2d 100644 --- a/src/Core/Models/SingleByte/Maltese/MalteseModel.cs +++ b/src/Core/Models/SingleByte/Maltese/MalteseModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Maltese { public abstract class MalteseModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Polish/PolishModel.cs b/src/Core/Models/SingleByte/Polish/PolishModel.cs index 18ab869..dd97998 100644 --- a/src/Core/Models/SingleByte/Polish/PolishModel.cs +++ b/src/Core/Models/SingleByte/Polish/PolishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Polish { public abstract class PolishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Portuguese/PortugueseModel.cs b/src/Core/Models/SingleByte/Portuguese/PortugueseModel.cs index 51b9515..fc6d8d9 100644 --- a/src/Core/Models/SingleByte/Portuguese/PortugueseModel.cs +++ b/src/Core/Models/SingleByte/Portuguese/PortugueseModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Portuguese { public abstract class PortugueseModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Romanian/RomanianModel.cs b/src/Core/Models/SingleByte/Romanian/RomanianModel.cs index a501e82..e86e97c 100644 --- a/src/Core/Models/SingleByte/Romanian/RomanianModel.cs +++ b/src/Core/Models/SingleByte/Romanian/RomanianModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Romanian { public abstract class RomanianModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Russian/Ibm855_RussianModel.cs b/src/Core/Models/SingleByte/Russian/Ibm855_RussianModel.cs index 42ce1d2..04e7a79 100644 --- a/src/Core/Models/SingleByte/Russian/Ibm855_RussianModel.cs +++ b/src/Core/Models/SingleByte/Russian/Ibm855_RussianModel.cs @@ -62,7 +62,7 @@ public class Ibm855_RussianModel : RussianModel 43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249, 250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,NUM,CTR, }; - + public Ibm855_RussianModel() : base(BYTE_TO_ORDER_MAP, CodepageName.IBM855) { } diff --git a/src/Core/Models/SingleByte/Russian/Ibm866_RussianModel.cs b/src/Core/Models/SingleByte/Russian/Ibm866_RussianModel.cs index ad17f5a..4adecf6 100644 --- a/src/Core/Models/SingleByte/Russian/Ibm866_RussianModel.cs +++ b/src/Core/Models/SingleByte/Russian/Ibm866_RussianModel.cs @@ -62,7 +62,7 @@ public class Ibm866_RussianModel : RussianModel 9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16, 239, 68,240,241,242,243,244,245,246,247,248,249,250,251,NUM,CTR, }; - + public Ibm866_RussianModel() : base(CHAR_TO_ORDER_MAP, CodepageName.IBM866) { } diff --git a/src/Core/Models/SingleByte/Russian/RussianModel.cs b/src/Core/Models/SingleByte/Russian/RussianModel.cs index 7535461..7335b73 100644 --- a/src/Core/Models/SingleByte/Russian/RussianModel.cs +++ b/src/Core/Models/SingleByte/Russian/RussianModel.cs @@ -40,18 +40,16 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Russian { public abstract class RussianModel : SequenceModel { - // Model Table: + // Model Table: // total sequences: 100% // first 512 sequences: 97.6601% // first 1024 sequences: 2.3389% // rest sequences: 0.1237% - // negative sequences: 0.0009% + // negative sequences: 0.0009% protected readonly static byte[] LANG_MODEL = { 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2, @@ -182,8 +180,8 @@ public abstract class RussianModel : SequenceModel 0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, }; - - public RussianModel(byte[] charToOrderMap, string name) + + public RussianModel(byte[] charToOrderMap, string name) : base(charToOrderMap, LANG_MODEL, 64, 0.976601f, false, name) { } diff --git a/src/Core/Models/SingleByte/Slovak/SlovakModel.cs b/src/Core/Models/SingleByte/Slovak/SlovakModel.cs index 3854bb2..226ff92 100644 --- a/src/Core/Models/SingleByte/Slovak/SlovakModel.cs +++ b/src/Core/Models/SingleByte/Slovak/SlovakModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Slovak { public abstract class SlovakModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Slovene/SloveneModel.cs b/src/Core/Models/SingleByte/Slovene/SloveneModel.cs index a2d552a..f7ad1c2 100644 --- a/src/Core/Models/SingleByte/Slovene/SloveneModel.cs +++ b/src/Core/Models/SingleByte/Slovene/SloveneModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Slovene { public abstract class SloveneModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Spanish/SpanishModel.cs b/src/Core/Models/SingleByte/Spanish/SpanishModel.cs index 90af12f..03bb44d 100644 --- a/src/Core/Models/SingleByte/Spanish/SpanishModel.cs +++ b/src/Core/Models/SingleByte/Spanish/SpanishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Spanish { public abstract class SpanishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Swedish/SwedishModel.cs b/src/Core/Models/SingleByte/Swedish/SwedishModel.cs index 007bfc5..e9992f0 100644 --- a/src/Core/Models/SingleByte/Swedish/SwedishModel.cs +++ b/src/Core/Models/SingleByte/Swedish/SwedishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Swedish { public abstract class SwedishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Thai/ThaiModel.cs b/src/Core/Models/SingleByte/Thai/ThaiModel.cs index 905e8c2..b75aabf 100644 --- a/src/Core/Models/SingleByte/Thai/ThaiModel.cs +++ b/src/Core/Models/SingleByte/Thai/ThaiModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Thai { public class ThaiModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Turkish/TurkishModel.cs b/src/Core/Models/SingleByte/Turkish/TurkishModel.cs index e7be522..88819ad 100644 --- a/src/Core/Models/SingleByte/Turkish/TurkishModel.cs +++ b/src/Core/Models/SingleByte/Turkish/TurkishModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Turkish { public abstract class TurkishModel : SequenceModel diff --git a/src/Core/Models/SingleByte/Vietnamese/VietnameseModel.cs b/src/Core/Models/SingleByte/Vietnamese/VietnameseModel.cs index 8dc0af2..7bcbafd 100644 --- a/src/Core/Models/SingleByte/Vietnamese/VietnameseModel.cs +++ b/src/Core/Models/SingleByte/Vietnamese/VietnameseModel.cs @@ -40,8 +40,6 @@ * and adjusted to language specific support. */ -using UtfUnknown.Core.Models; - namespace UtfUnknown.Core.Models.SingleByte.Vietnamese { public abstract class VietnameseModel : SequenceModel diff --git a/src/Core/Models/StateMachineModel.cs b/src/Core/Models/StateMachineModel.cs index 725d4be..47db8ff 100644 --- a/src/Core/Models/StateMachineModel.cs +++ b/src/Core/Models/StateMachineModel.cs @@ -77,10 +77,10 @@ public StateMachineModel(BitPackage classTable, int classFactor, this.charLenTable = charLenTable; Name = name; } - + public int GetClass(byte b) - { - return classTable.Unpack((int)b); + { + return classTable.Unpack((int)b); } - } -} + } +} \ No newline at end of file diff --git a/src/Core/Probers/CharsetProber.cs b/src/Core/Probers/CharsetProber.cs index e1e463a..ab8e717 100644 --- a/src/Core/Probers/CharsetProber.cs +++ b/src/Core/Probers/CharsetProber.cs @@ -21,7 +21,7 @@ * Contributor(s): * Shy Shalom * Rudi Pettazzi (C# port) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -95,7 +95,7 @@ public virtual string DumpStatus() // Helper functions used in the Latin1 and Group probers // /// - /// + /// /// /// filtered buffer protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int len) @@ -139,8 +139,8 @@ protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int } /// - /// Do filtering to reduce load to probers (Remove ASCII symbols, - /// collapse spaces). This filter applies to all scripts which contain + /// Do filtering to reduce load to probers (Remove ASCII symbols, + /// collapse spaces). This filter applies to all scripts which contain /// both English characters and upper ASCII characters. /// /// a filtered copy of the input buffer @@ -180,7 +180,7 @@ protected static byte[] FilterWithEnglishLetters(byte[] buf, int offset, int len cur++; } - // If the current segment contains more than just a symbol + // If the current segment contains more than just a symbol // and it is not inside a tag then keep it. if (!inTag && cur > prev) ms.Write(buf, prev, cur - prev); @@ -190,5 +190,4 @@ protected static byte[] FilterWithEnglishLetters(byte[] buf, int offset, int len return result; } } -} - +} \ No newline at end of file diff --git a/src/Core/Probers/CodingStateMachine.cs b/src/Core/Probers/CodingStateMachine.cs index 88551cb..1cbd2b3 100644 --- a/src/Core/Probers/CodingStateMachine.cs +++ b/src/Core/Probers/CodingStateMachine.cs @@ -22,7 +22,7 @@ * Shy Shalom * Kohei TAKETA (Java port) * Rudi Pettazzi (C# port) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -49,9 +49,9 @@ public class CodingStateMachine private int currentState; private StateMachineModel model; private int currentCharLen; - - - public CodingStateMachine(StateMachineModel model) + + + public CodingStateMachine(StateMachineModel model) { currentState = StateMachineModel.START; this.model = model; @@ -59,34 +59,34 @@ public CodingStateMachine(StateMachineModel model) public int NextState(byte b) { - // for each byte we get its class, if it is first byte, + // for each byte we get its class, if it is first byte, // we also get byte length int byteCls = model.GetClass(b); - if (currentState == StateMachineModel.START) { - + if (currentState == StateMachineModel.START) { + currentCharLen = model.charLenTable[byteCls]; } - - // from byte's class and stateTable, we get its next state + + // from byte's class and stateTable, we get its next state currentState = model.stateTable.Unpack( currentState * model.ClassFactor + byteCls); - + return currentState; } - - public void Reset() - { - currentState = StateMachineModel.START; + + public void Reset() + { + currentState = StateMachineModel.START; } - public int CurrentCharLen - { - get { return currentCharLen; } + public int CurrentCharLen + { + get { return currentCharLen; } } - public string ModelName - { - get { return model.Name; } + public string ModelName + { + get { return model.Name; } } } -} +} \ No newline at end of file diff --git a/src/Core/Probers/EscCharsetProber.cs b/src/Core/Probers/EscCharsetProber.cs index 482baeb..3abc4c8 100644 --- a/src/Core/Probers/EscCharsetProber.cs +++ b/src/Core/Probers/EscCharsetProber.cs @@ -48,19 +48,19 @@ public class EscCharsetProber : CharsetProber { private const int CHARSETS_NUM = 4; private string detectedCharset; - private CodingStateMachine[] codingSM; + private CodingStateMachine[] codingSM; int activeSM; public EscCharsetProber() { - codingSM = new CodingStateMachine[CHARSETS_NUM]; + codingSM = new CodingStateMachine[CHARSETS_NUM]; codingSM[0] = new CodingStateMachine(new HZ_GB_2312_SMModel()); codingSM[1] = new CodingStateMachine(new Iso_2022_CN_SMModel()); codingSM[2] = new CodingStateMachine(new Iso_2022_JP_SMModel()); codingSM[3] = new CodingStateMachine(new Iso_2022_KR_SMModel()); Reset(); } - + public override void Reset() { state = ProbingState.Detecting; @@ -73,7 +73,7 @@ public override void Reset() public override ProbingState HandleData(byte[] buf, int offset, int len) { int max = offset + len; - + for (int i = offset; i < max && state == ProbingState.Detecting; i++) { for (int j = activeSM - 1; j >= 0; j--) { // byte is feed to all active state machine @@ -101,12 +101,12 @@ public override ProbingState HandleData(byte[] buf, int offset, int len) public override string GetCharsetName() { - return detectedCharset; + return detectedCharset; } - + public override float GetConfidence(StringBuilder status = null) { return 0.99f; - } + } } -} +} \ No newline at end of file diff --git a/src/Core/Probers/HebrewProber.cs b/src/Core/Probers/HebrewProber.cs index 50470e2..6c9c0b8 100644 --- a/src/Core/Probers/HebrewProber.cs +++ b/src/Core/Probers/HebrewProber.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; using System.Text; /* @@ -137,7 +136,7 @@ */ namespace UtfUnknown.Core.Probers -{ +{ /// /// This prober doesn't actually recognize a language or a charset. /// It is a helper prober for the use of the Hebrew model probers @@ -166,25 +165,25 @@ public class HebrewProber : CharsetProber protected const string VISUAL_NAME = CodepageName.ISO_8859_8; protected const string LOGICAL_NAME = CodepageName.WINDOWS_1255; - + // owned by the group prober. protected CharsetProber logicalProber, visualProber; - protected int finalCharLogicalScore, finalCharVisualScore; - + protected int finalCharLogicalScore, finalCharVisualScore; + // The two last bytes seen in the previous buffer. protected byte prev, beforePrev; - + public HebrewProber() { Reset(); } - - public void SetModelProbers(CharsetProber logical, CharsetProber visual) - { - logicalProber = logical; - visualProber = visual; + + public void SetModelProbers(CharsetProber logical, CharsetProber visual) + { + logicalProber = logical; + visualProber = visual; } - + /** * Final letter analysis for logical-visual decision. * Look for evidence that the received buffer is either logical Hebrew or @@ -219,31 +218,31 @@ public override ProbingState HandleData(byte[] buf, int offset, int len) int max = offset + len; for (int i = offset; i < max; i++) { - + byte b = buf[i]; - + // a word just ended if (b == 0x20) { // *(curPtr-2) was not a space so prev is not a 1 letter word if (beforePrev != 0x20) { // case (1) [-2:not space][-1:final letter][cur:space] - if (IsFinal(prev)) + if (IsFinal(prev)) finalCharLogicalScore++; - // case (2) [-2:not space][-1:Non-Final letter][cur:space] + // case (2) [-2:not space][-1:Non-Final letter][cur:space] else if (IsNonFinal(prev)) finalCharVisualScore++; } - + } else { // case (3) [-2:space][-1:final letter][cur:not space] - if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' ')) + if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' ')) ++finalCharVisualScore; } beforePrev = prev; prev = b; } - // Forever detecting, till the end or until both model probers + // Forever detecting, till the end or until both model probers // return NotMe (handled above). return ProbingState.Detecting; } @@ -253,7 +252,7 @@ public override string GetCharsetName() { // If the final letter score distance is dominant enough, rely on it. int finalsub = finalCharLogicalScore - finalCharVisualScore; - if (finalsub >= MIN_FINAL_CHAR_DISTANCE) + if (finalsub >= MIN_FINAL_CHAR_DISTANCE) return LOGICAL_NAME; if (finalsub <= -(MIN_FINAL_CHAR_DISTANCE)) return VISUAL_NAME; @@ -264,9 +263,9 @@ public override string GetCharsetName() return LOGICAL_NAME; if (modelsub < -(MIN_MODEL_DISTANCE)) return VISUAL_NAME; - + // Still no good, back to final letter distance, maybe it'll save the day. - if (finalsub < 0) + if (finalsub < 0) return VISUAL_NAME; // (finalsub > 0 - Logical) or (don't know what to do) default to Logical. @@ -281,10 +280,10 @@ public override void Reset() beforePrev = 0x20; } - public override ProbingState GetState() + public override ProbingState GetState() { // Remain active as long as any of the model probers are active. - if (logicalProber.GetState() == ProbingState.NotMe && + if (logicalProber.GetState() == ProbingState.NotMe && visualProber.GetState() == ProbingState.NotMe) return ProbingState.NotMe; return ProbingState.Detecting; @@ -298,30 +297,30 @@ public override string DumpStatus() return status.ToString(); } - + public override float GetConfidence(StringBuilder status = null) - { + { return 0.0f; } - + protected static bool IsFinal(byte b) { - return (b == FINAL_KAF || b == FINAL_MEM || b == FINAL_NUN - || b == FINAL_PE || b == FINAL_TSADI); + return (b == FINAL_KAF || b == FINAL_MEM || b == FINAL_NUN + || b == FINAL_PE || b == FINAL_TSADI); } - + protected static bool IsNonFinal(byte b) { - // The normal Tsadi is not a good Non-Final letter due to words like - // 'lechotet' (to chat) containing an apostrophe after the tsadi. This - // apostrophe is converted to a space in FilterWithoutEnglishLetters causing - // the Non-Final tsadi to appear at an end of a word even though this is not + // The normal Tsadi is not a good Non-Final letter due to words like + // 'lechotet' (to chat) containing an apostrophe after the tsadi. This + // apostrophe is converted to a space in FilterWithoutEnglishLetters causing + // the Non-Final tsadi to appear at an end of a word even though this is not // the case in the original text. - // The letters Pe and Kaf rarely display a related behavior of not being a - // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for - // example legally end with a Non-Final Pe or Kaf. However, the benefit of - // these letters as Non-Final letters outweighs the damage since these words - // are quite rare. + // The letters Pe and Kaf rarely display a related behavior of not being a + // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for + // example legally end with a Non-Final Pe or Kaf. However, the benefit of + // these letters as Non-Final letters outweighs the damage since these words + // are quite rare. return (b == NORMAL_KAF || b == NORMAL_MEM || b == NORMAL_NUN || b == NORMAL_PE); } } diff --git a/src/Core/Probers/Latin1Prober.cs b/src/Core/Probers/Latin1Prober.cs index bf6dee5..643d76e 100644 --- a/src/Core/Probers/Latin1Prober.cs +++ b/src/Core/Probers/Latin1Prober.cs @@ -36,12 +36,11 @@ * * ***** END LICENSE BLOCK ***** */ -using System; using System.Text; namespace UtfUnknown.Core.Probers { - // TODO: Using trigrams the detector should be able to discriminate between + // TODO: Using trigrams the detector should be able to discriminate between // latin-1 and iso8859-2 public class Latin1Prober : CharsetProber { @@ -102,11 +101,11 @@ public class Latin1Prober : CharsetProber /* UDF OTH ASC ASS ACV ACO ASV ASO */ /*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0, /*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3, - /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3, + /*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3, /*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3, /*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2, - /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3, - /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3, + /*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3, + /*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3, /*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3, }; @@ -173,7 +172,7 @@ public override float GetConfidence(StringBuilder status = null) confidence -= freqCounter[1] * 20.0f / total; } - // lower the confidence of latin1 so that other more accurate detector + // lower the confidence of latin1 so that other more accurate detector // can take priority. return confidence < 0.0f ? 0.0f : confidence * 0.5f; } diff --git a/src/Core/Probers/MBCSGroupProber.cs b/src/Core/Probers/MBCSGroupProber.cs index abc7a6c..9770368 100644 --- a/src/Core/Probers/MBCSGroupProber.cs +++ b/src/Core/Probers/MBCSGroupProber.cs @@ -21,7 +21,7 @@ * Contributor(s): * Shy Shalom * Rudi Pettazzi (C# port) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; using System.Text; using UtfUnknown.Core.Probers.MultiByte; @@ -241,4 +240,4 @@ public override string DumpStatus() return status.ToString(); } } -} +} \ No newline at end of file diff --git a/src/Core/Probers/MultiByte/Chinese/GB18030Prober.cs b/src/Core/Probers/MultiByte/Chinese/GB18030Prober.cs index 9cf0034..c2371fd 100644 --- a/src/Core/Probers/MultiByte/Chinese/GB18030Prober.cs +++ b/src/Core/Probers/MultiByte/Chinese/GB18030Prober.cs @@ -44,7 +44,7 @@ namespace UtfUnknown.Core.Probers.MultiByte.Chinese { - // We use gb18030 to replace gb2312, because 18030 is a superset. + // We use gb18030 to replace gb2312, because 18030 is a superset. public class GB18030Prober : CharsetProber { private CodingStateMachine codingSM; diff --git a/src/Core/Probers/ProbingState.cs b/src/Core/Probers/ProbingState.cs index 96049f0..d67f996 100644 --- a/src/Core/Probers/ProbingState.cs +++ b/src/Core/Probers/ProbingState.cs @@ -5,14 +5,14 @@ public enum ProbingState /// /// No sure answer yet, but caller can ask for confidence /// - Detecting = 0, // + Detecting = 0, /// /// Positive answer /// FoundIt = 1, /// - /// Negative answer + /// Negative answer /// NotMe = 2 - }; + } } \ No newline at end of file diff --git a/src/Core/Probers/SBCSGroupProber.cs b/src/Core/Probers/SBCSGroupProber.cs index d98891e..5263bfb 100644 --- a/src/Core/Probers/SBCSGroupProber.cs +++ b/src/Core/Probers/SBCSGroupProber.cs @@ -21,7 +21,7 @@ * Contributor(s): * Shy Shalom * Rudi Pettazzi (C# port) - * + * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; using System.Text; #region using languages @@ -104,7 +103,7 @@ public SBCSGroupProber() // Hebrew HebrewProber hebprober = new HebrewProber(); probers[10] = hebprober; - // Logical + // Logical probers[11] = new SingleByteCharSetProber(new Windows_1255_HebrewModel(), false, hebprober); // Visual probers[12] = new SingleByteCharSetProber(new Windows_1255_HebrewModel(), true, hebprober); @@ -125,7 +124,7 @@ public SBCSGroupProber() probers[20] = new SingleByteCharSetProber(new Windows_1252_SpanishModel()); // Is the following still valid? - // disable latin2 before latin1 is available, otherwise all latin1 + // disable latin2 before latin1 is available, otherwise all latin1 // will be detected as latin2 because of their similarity // Hungarian probers[21] = new SingleByteCharSetProber(new Iso_8859_2_HungarianModel()); @@ -256,7 +255,7 @@ public SBCSGroupProber() public override ProbingState HandleData(byte[] buf, int offset, int len) { // apply filter to original buffer, and we got new buffer back - // depend on what script it is, we will feed them the new buffer + // depend on what script it is, we will feed them the new buffer // we got after applying proper filter // this is done without any consideration to KeepEnglishLetters // of each prober since as of now, there are no probers here which @@ -411,4 +410,4 @@ public override string GetCharsetName() return probers[bestGuess].GetCharsetName(); } } -} +} \ No newline at end of file diff --git a/src/Core/Probers/SingleByteCharSetProber.cs b/src/Core/Probers/SingleByteCharSetProber.cs index ebecf95..9aac0e0 100644 --- a/src/Core/Probers/SingleByteCharSetProber.cs +++ b/src/Core/Probers/SingleByteCharSetProber.cs @@ -1,4 +1,4 @@ -/* ***** BEGIN LICENSE BLOCK ***** +/* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version @@ -20,7 +20,7 @@ * * Contributor(s): * Shy Shalom - * Rudi Pettazzi (C# port) + * Rudi Pettazzi (C# port) * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or @@ -54,11 +54,11 @@ public class SingleByteCharSetProber : CharsetProber private const int PROBABLE_CAT = NUMBER_OF_SEQ_CAT - 2; private const int NEUTRAL_CAT = NUMBER_OF_SEQ_CAT - 3; private const int NEGATIVE_CAT = 0; - + protected SequenceModel model; - - // true if we need to reverse every pair in the model lookup - bool reversed; + + // true if we need to reverse every pair in the model lookup + bool reversed; // char order of last character byte lastOrder; @@ -68,32 +68,32 @@ public class SingleByteCharSetProber : CharsetProber int totalChar; int ctrlChar; - + // characters that fall in our sampling range int freqChar; - + // Optional auxiliary prober for name decision. created and destroyed by the GroupProber - CharsetProber nameProber; - - public SingleByteCharSetProber(SequenceModel model) + CharsetProber nameProber; + + public SingleByteCharSetProber(SequenceModel model) : this(model, false, null) { - + } - - public SingleByteCharSetProber(SequenceModel model, bool reversed, + + public SingleByteCharSetProber(SequenceModel model, bool reversed, CharsetProber nameProber) { this.model = model; this.reversed = reversed; this.nameProber = nameProber; - Reset(); + Reset(); } public override ProbingState HandleData(byte[] buf, int offset, int len) { int max = offset + len; - + for (int i = offset; i < max; i++) { byte order = model.GetOrder(buf[i]); @@ -113,7 +113,7 @@ public override ProbingState HandleData(byte[] buf, int offset, int len) { ctrlChar++; } - + if (order < model.FreqCharCount) { freqChar++; @@ -142,7 +142,7 @@ public override ProbingState HandleData(byte[] buf, int offset, int len) } return state; } - + public override string DumpStatus() { StringBuilder status = new StringBuilder(); @@ -182,7 +182,7 @@ public override float GetConfidence(StringBuilder status = null) // charsets used for the same language. r = r * (seqCounters[POSITIVE_CAT] + (float)seqCounters[PROBABLE_CAT] / 4.0f) / totalChar; - // The more control characters (proportionnaly to the size of the text), the + // The more control characters (proportionally to the size of the text), the // less confident we become in the current charset. r = r * (totalChar - ctrlChar) / totalChar; @@ -191,9 +191,9 @@ public override float GetConfidence(StringBuilder status = null) r = 0.99f; return r; } - return 0.01f; + return 0.01f; } - + public override void Reset() { state = ProbingState.Detecting; @@ -205,11 +205,11 @@ public override void Reset() freqChar = 0; ctrlChar = 0; } - - public override string GetCharsetName() + + public override string GetCharsetName() { return (nameProber == null) ? model.CharsetName : nameProber.GetCharsetName(); - } + } } } diff --git a/src/DetectionDetail.cs b/src/DetectionDetail.cs index fa1eee1..e188519 100644 --- a/src/DetectionDetail.cs +++ b/src/DetectionDetail.cs @@ -57,7 +57,7 @@ public DetectionDetail(CharsetProber prober, TimeSpan? time = null) public string EncodingName { get; } /// - /// The detected encoding. + /// The detected encoding. /// public Encoding Encoding { get; set; } diff --git a/src/DetectionResult.cs b/src/DetectionResult.cs index 80cd87c..db1f0f1 100644 --- a/src/DetectionResult.cs +++ b/src/DetectionResult.cs @@ -1,4 +1,3 @@ -using System; using System.Collections.Generic; using System.Linq; diff --git a/src/UTF-unknown.csproj b/src/UTF-unknown.csproj index 6019ac6..afafb21 100644 --- a/src/UTF-unknown.csproj +++ b/src/UTF-unknown.csproj @@ -42,7 +42,7 @@ Features: - XML documentation included Compared to Ude: - + - Refactor of API, namespaces and deadcode removal - Added some docs - Improve error handling @@ -77,4 +77,4 @@ Features: 2.0.0.0 - + \ No newline at end of file diff --git a/tests/BitPackageTest.cs b/tests/BitPackageTest.cs index 472cc56..7e9d6d2 100644 --- a/tests/BitPackageTest.cs +++ b/tests/BitPackageTest.cs @@ -51,7 +51,7 @@ public void TestPack() Assert.AreEqual(BitPackage.Pack4bits(2,2,2,2,2,2,2,2), 572662306); Assert.AreEqual(BitPackage.Pack4bits(15,15,15,15,15,15,15,15), -1); } - + [Test] public void TestUnpack() { @@ -59,7 +59,7 @@ public void TestUnpack() BitPackage.Pack4bits(0, 1, 2, 3, 4, 5, 6, 7), BitPackage.Pack4bits(8, 9, 10, 11, 12, 13, 14, 15) }; - + BitPackage pkg = new BitPackage( BitPackage.INDEX_SHIFT_4BITS, BitPackage.SHIFT_MASK_4BITS, @@ -70,7 +70,7 @@ public void TestUnpack() for (int i = 0; i < 16; i++) { int n = pkg.Unpack(i); Assert.AreEqual(n, i); - } + } } } } diff --git a/tests/CharsetDetectorTest.cs b/tests/CharsetDetectorTest.cs index b8da89c..2d2df34 100644 --- a/tests/CharsetDetectorTest.cs +++ b/tests/CharsetDetectorTest.cs @@ -81,7 +81,7 @@ public void DetectFromStreamMaxBytes(int? maxBytes, int expectedPosition, int st public void DetectFromByteArray(int offset, int len, string detectedCodepage) { // Arrange - string s = "UTF-Unknown은 파일, 스트림, 그 외 바이트 배열의 캐릭터 셋을 탐지하는 라이브러리입니다." + + string s = "UTF-Unknown은 파일, 스트림, 그 외 바이트 배열의 캐릭터 셋을 탐지하는 라이브러리입니다." + "대한민국 (大韓民國, Republic of Korea)"; byte[] bytes = Encoding.UTF8.GetBytes(s); diff --git a/tests/CharsetDetectorTestBatch.cs b/tests/CharsetDetectorTestBatch.cs index a626b80..51ca6fd 100644 --- a/tests/CharsetDetectorTestBatch.cs +++ b/tests/CharsetDetectorTestBatch.cs @@ -7,9 +7,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Reflection; using Newtonsoft.Json; -using UtfUnknown.Core; using NUnit.Framework; namespace UtfUnknown.Tests @@ -74,7 +72,7 @@ public void TestFile(TestCase testCase) { TestFile(testCase.ExpectedEncoding, testCase.InputFile.FullName); } - + [TestCaseSource(nameof(AllTestFilesUnsupportedEncoding))] public void TestFileUnsupportedEncodings(TestCase testCase) { @@ -84,7 +82,7 @@ public void TestFileUnsupportedEncodings(TestCase testCase) _logWriter.WriteLine(string.Concat( $"- {testCase.InputFile.FullName} ({testCase.ExpectedEncoding}) -> ", $"{JsonConvert.SerializeObject(result, Formatting.Indented, new EncodingJsonConverter())}")); - + StringAssert.AreEqualIgnoringCase( testCase.ExpectedEncoding, detected.EncodingName, @@ -126,7 +124,7 @@ private static List AllTestFiles() return testCases; } - + private static IReadOnlyList AllTestFilesUnsupportedEncoding() { var path = Path.Combine(TESTS_ROOT, "DataUnsupported"); @@ -134,7 +132,7 @@ private static IReadOnlyList AllTestFilesUnsupportedEncoding() { throw new DirectoryNotFoundException($"Directory Data with test files not found, path: {path}"); } - + var dirs = new DirectoryInfo(path).GetDirectories(); var testCases = new List(); foreach (var dir in dirs) @@ -147,7 +145,7 @@ private static IReadOnlyList AllTestFilesUnsupportedEncoding() private static List CreateTestCases(DirectoryInfo dirname) { - //encoding is the directory name - before the optional '(' + //encoding is the directory name - before the optional '(' var expectedEncoding = dirname.Name.Split('(').First().Trim(); var files = dirname.GetFiles(); diff --git a/tests/DetectionDetailTests.cs b/tests/DetectionDetailTests.cs index 49b9cc0..9945dd2 100644 --- a/tests/DetectionDetailTests.cs +++ b/tests/DetectionDetailTests.cs @@ -2,9 +2,7 @@ using System.Linq; using System.Reflection; using NUnit.Framework; -using UtfUnknown; using UtfUnknown.Core; -using UtfUnknown.Core.Probers; namespace UtfUnknown.Tests { diff --git a/tests/EncodingJsonConverter.cs b/tests/EncodingJsonConverter.cs index 0f23c77..20dcf7e 100644 --- a/tests/EncodingJsonConverter.cs +++ b/tests/EncodingJsonConverter.cs @@ -1,8 +1,5 @@ using System; -using System.Collections.Generic; -using System.Linq; using System.Text; -using System.Threading.Tasks; using Newtonsoft.Json; namespace UtfUnknown.Tests