Skip to content

Commit

Permalink
Automatic code arrangement (#157)
Browse files Browse the repository at this point in the history
* Some automatic code arrangement, removed a ton of useless whitespace.

* More whitespace removed.

* Added editorconfig.

* Unused usings and whitespace removal
  • Loading branch information
adimosh authored Jul 26, 2022
1 parent 9707443 commit 53af842
Show file tree
Hide file tree
Showing 83 changed files with 1,086 additions and 1,019 deletions.
160 changes: 160 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# To learn more about .editorconfig see https://aka.ms/editorconfigdocs
###############################
# Core EditorConfig Options #
###############################
# All files
[*]
indent_style = space
insert_final_newline = false

# XML project files
[*.{csproj,proj,projitems,shproj}]
indent_size = 2

# XML config files
[*.{props,targets,ruleset,config,nuspec,resx,vsixmanifest,vsct}]
indent_size = 2

# Code files
[*.{cs,csx,vb,vbx}]
indent_size = 4
insert_final_newline = true
charset = utf-8-bom
###############################
# .NET Coding Conventions #
###############################
[*.{cs,vb}]
# Organize usings
dotnet_sort_system_directives_first = true
# this. preferences
dotnet_style_qualification_for_field = false:silent
dotnet_style_qualification_for_property = false:silent
dotnet_style_qualification_for_method = false:silent
dotnet_style_qualification_for_event = false:silent
# Language keywords vs BCL types preferences
dotnet_style_predefined_type_for_locals_parameters_members = true:silent
dotnet_style_predefined_type_for_member_access = true:silent
# Parentheses preferences
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
# Modifier preferences
dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent
dotnet_style_readonly_field = true:suggestion
# Expression-level preferences
dotnet_style_object_initializer = true:suggestion
dotnet_style_collection_initializer = true:suggestion
dotnet_style_explicit_tuple_names = true:suggestion
dotnet_style_null_propagation = true:suggestion
dotnet_style_coalesce_expression = true:suggestion
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:silent
dotnet_style_prefer_inferred_tuple_names = true:suggestion
dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
dotnet_style_prefer_auto_properties = true:silent
dotnet_style_prefer_conditional_expression_over_assignment = true:silent
dotnet_style_prefer_conditional_expression_over_return = true:silent
###############################
# Naming Conventions #
###############################
# Style Definitions
dotnet_naming_style.pascal_case_style.capitalization = pascal_case
# Use PascalCase for constant fields
dotnet_naming_rule.constant_fields_should_be_pascal_case.severity = suggestion
dotnet_naming_rule.constant_fields_should_be_pascal_case.symbols = constant_fields
dotnet_naming_rule.constant_fields_should_be_pascal_case.style = pascal_case_style
dotnet_naming_symbols.constant_fields.applicable_kinds = field
dotnet_naming_symbols.constant_fields.applicable_accessibilities = *
dotnet_naming_symbols.constant_fields.required_modifiers = const
dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
dotnet_style_prefer_compound_assignment = true:suggestion
dotnet_style_prefer_simplified_interpolation = true:suggestion
dotnet_style_namespace_match_folder = true:suggestion
dotnet_style_operator_placement_when_wrapping = beginning_of_line
tab_width = 4
end_of_line = crlf
dotnet_style_allow_multiple_blank_lines_experimental = true:silent
dotnet_style_allow_statement_immediately_after_block_experimental = true:silent
dotnet_code_quality_unused_parameters = all:suggestion
###############################
# C# Coding Conventions #
###############################
[*.cs]
# var preferences
csharp_style_var_for_built_in_types = true:silent
csharp_style_var_when_type_is_apparent = true:silent
csharp_style_var_elsewhere = true:silent
# Expression-bodied members
csharp_style_expression_bodied_methods = false:silent
csharp_style_expression_bodied_constructors = false:silent
csharp_style_expression_bodied_operators = false:silent
csharp_style_expression_bodied_properties = true:silent
csharp_style_expression_bodied_indexers = true:silent
csharp_style_expression_bodied_accessors = true:silent
# Pattern matching preferences
csharp_style_pattern_matching_over_is_with_cast_check = true:suggestion
csharp_style_pattern_matching_over_as_with_null_check = true:suggestion
# Null-checking preferences
csharp_style_throw_expression = true:suggestion
csharp_style_conditional_delegate_call = true:suggestion
# Modifier preferences
csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async:suggestion
# Expression-level preferences
csharp_prefer_braces = true:silent
csharp_style_deconstructed_variable_declaration = true:suggestion
csharp_prefer_simple_default_expression = true:suggestion
csharp_style_pattern_local_over_anonymous_function = true:suggestion
csharp_style_inlined_variable_declaration = true:suggestion
###############################
# C# Formatting Rules #
###############################
# New line preferences
csharp_new_line_before_open_brace = all
csharp_new_line_before_else = true
csharp_new_line_before_catch = true
csharp_new_line_before_finally = true
csharp_new_line_before_members_in_object_initializers = true
csharp_new_line_before_members_in_anonymous_types = true
csharp_new_line_between_query_expression_clauses = true
# Indentation preferences
csharp_indent_case_contents = true
csharp_indent_switch_labels = true
csharp_indent_labels = flush_left
# Space preferences
csharp_space_after_cast = false
csharp_space_after_keywords_in_control_flow_statements = true
csharp_space_between_method_call_parameter_list_parentheses = false
csharp_space_between_method_declaration_parameter_list_parentheses = false
csharp_space_between_parentheses = false
csharp_space_before_colon_in_inheritance_clause = true
csharp_space_after_colon_in_inheritance_clause = true
csharp_space_around_binary_operators = before_and_after
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
csharp_space_between_method_call_name_and_opening_parenthesis = false
csharp_space_between_method_call_empty_parameter_list_parentheses = false
# Wrapping preferences
csharp_preserve_single_line_statements = true
csharp_preserve_single_line_blocks = true
csharp_using_directive_placement = outside_namespace:silent
csharp_prefer_simple_using_statement = true:suggestion
csharp_style_namespace_declarations = block_scoped:silent
csharp_style_prefer_method_group_conversion = true:silent
csharp_style_expression_bodied_lambdas = true:silent
csharp_style_expression_bodied_local_functions = false:silent
csharp_style_prefer_extended_property_pattern = true:suggestion
csharp_style_prefer_null_check_over_type_check = true:suggestion
csharp_style_prefer_local_over_anonymous_function = true:suggestion
csharp_style_prefer_range_operator = true:suggestion
csharp_style_prefer_index_operator = true:suggestion
csharp_style_prefer_tuple_swap = true:suggestion
csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion
csharp_style_unused_value_expression_statement_preference = discard_variable:silent
csharp_style_unused_value_assignment_preference = discard_variable:suggestion
csharp_prefer_static_local_function = true:suggestion
csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent
csharp_style_prefer_parameter_null_checking = true:suggestion
csharp_style_prefer_switch_expression = true:suggestion
csharp_style_prefer_pattern_matching = true:silent
csharp_style_prefer_not_pattern = true:suggestion
18 changes: 13 additions & 5 deletions UTF-unknown.sln
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26228.9
# Visual Studio Version 17
VisualStudioVersion = 17.2.32616.157
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UTF-unknown", "src\UTF-unknown.csproj", "{64CA7BA7-EFD9-4475-BB66-40B187622A73}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UTF-unknown", "src\UTF-unknown.csproj", "{64CA7BA7-EFD9-4475-BB66-40B187622A73}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ConsoleExample", "example\ConsoleExample.csproj", "{386C6ABF-44EA-4418-B90E-E8D21E4C2475}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ConsoleExample", "example\ConsoleExample.csproj", "{386C6ABF-44EA-4418-B90E-E8D21E4C2475}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UTF-unknown.Tests", "Tests\UTF-unknown.Tests.csproj", "{1922DCC9-A45F-4627-9087-CD492BBF7F38}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UTF-unknown.Tests", "Tests\UTF-unknown.Tests.csproj", "{1922DCC9-A45F-4627-9087-CD492BBF7F38}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{052846B2-CA56-482F-B477-6E33523C091E}"
ProjectSection(SolutionItems) = preProject
.editorconfig = .editorconfig
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -31,4 +36,7 @@ Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {0C7AF656-EF20-4880-8EB9-9BF101340A03}
EndGlobalSection
EndGlobal
27 changes: 13 additions & 14 deletions src/CharsetDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@
namespace UtfUnknown
{
/// <summary>
/// Default implementation of charset detection interface.
/// Default implementation of charset detection interface.
/// The detector can be fed by a System.IO.Stream:
/// </summary>
/// </summary>
public class CharsetDetector
{
internal InputState InputState;
Expand Down Expand Up @@ -135,7 +135,7 @@ public static DetectionResult DetectFromBytes(byte[] bytes)
}

/// <summary>
/// Detect the character encoding form this byte array.
/// Detect the character encoding form this byte array.
/// It searchs for BOM from bytes[offset].
/// </summary>
/// <param name="bytes">The byte array containing the text</param>
Expand Down Expand Up @@ -170,7 +170,7 @@ public static DetectionResult DetectFromBytes(byte[] bytes, int offset, int len)

/// <summary>
/// Detect the character encoding by reading the stream.
///
///
/// Note: stream position is not reset before and after.
/// </summary>
/// <param name="stream">The steam. </param>
Expand All @@ -186,7 +186,7 @@ public static DetectionResult DetectFromStream(Stream stream)

/// <summary>
/// Detect the character encoding by reading the stream.
///
///
/// Note: stream position is not reset before and after.
/// </summary>
/// <param name="stream">The steam. </param>
Expand Down Expand Up @@ -403,10 +403,10 @@ private static string FindCharSetByBom(byte[] buf, int offset, int len)

if (buf0 == 0xEF && buf1 == 0xBB && buf[offset + 2] == 0xBF)
return CodepageName.UTF8;

if (len < 4)
return null;

//Here, because anyway further more than 3 positions are checked.
if (buf0 == 0x00 && buf1 == 0x00)
{
Expand All @@ -422,24 +422,24 @@ private static string FindCharSetByBom(byte[] buf, int offset, int len)
if (buf0 == 0x2B && buf1 == 0x2F && buf[offset + 2] == 0x76)
if (buf[offset + 3] == 0x38 || buf[offset + 3] == 0x39 || buf[offset + 3] == 0x2B || buf[offset + 3] == 0x2F)
return CodepageName.UTF7;

// Detect GB18030 with bom (see table in https://en.wikipedia.org/wiki/Byte_order_mark)
// TODO: If you remove this check, GB18030Prober will still be defined as GB18030 -- It's feature or bug?
if (buf0 == 0x84 && buf1 == 0x31 && buf[offset + 2] == 0x95 && buf[offset + 3] == 0x33)
return CodepageName.GB18030;

return null;
}

/// <summary>
/// Notify detector that no further data is available.
/// Notify detector that no further data is available.
/// </summary>
private DetectionResult DataEnd()
{
if (!_gotData)
{
// we haven't got any data yet, return immediately
// caller program sometimes call DataEnd before anything has
// we haven't got any data yet, return immediately
// caller program sometimes call DataEnd before anything has
// been sent to detector
return new DetectionResult();
}
Expand Down Expand Up @@ -499,5 +499,4 @@ internal IList<CharsetProber> GetNewProbers()
}
}
}
}

}
10 changes: 5 additions & 5 deletions src/Core/Analyzers/CharDistributionAnalyser.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* ***** BEGIN LICENSE BLOCK *****
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
Expand Down Expand Up @@ -38,7 +38,7 @@
namespace UtfUnknown.Core.Analyzers
{
/// <summary>
/// Base class for the Character Distribution Method, used for
/// Base class for the Character Distribution Method, used for
/// the CJK encodings
/// </summary>
public abstract class CharDistributionAnalyser
Expand All @@ -60,7 +60,7 @@ public abstract class CharDistributionAnalyser
// Mapping table to get frequency order from char order (get from GetOrder())
protected int[] charToFreqOrder;

// This constant value varies from language to language. It is used in calculating confidence.
// This constant value varies from language to language. It is used in calculating confidence.
protected float typicalDistributionRatio;

public CharDistributionAnalyser()
Expand All @@ -72,7 +72,7 @@ public CharDistributionAnalyser()
/// Feed a block of data and do distribution analysis
/// </summary>
/// <remarks>
/// we do not handle character base on its original encoding string, but
/// we do not handle character base on its original encoding string, but
/// convert this encoding string to a number, here called order.
/// This allow multiple encoding of a language to share one frequency table
/// </remarks>
Expand All @@ -82,7 +82,7 @@ public CharDistributionAnalyser()
public abstract int GetOrder(byte[] buf, int offset);

/// <summary>
/// Feed a character with known length
/// Feed a character with known length
/// </summary>
/// <param name="buf">A <see cref="System.Byte"/></param>
/// <param name="offset">buf offset</param>
Expand Down
16 changes: 8 additions & 8 deletions src/Core/Analyzers/MultiByte/Chinese/BIG5DistributionAnalyser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ namespace UtfUnknown.Core.Analyzers.Chinese
public class BIG5DistributionAnalyser : CharDistributionAnalyser
{
// Big5 frequency table
// by Taiwan's Mandarin Promotion Council
// by Taiwan's Mandarin Promotion Council
// <http://www.edu.tw:81/mandr/>
/******************************************************************************
* 128 --> 0.42261
Expand Down Expand Up @@ -902,28 +902,28 @@ public class BIG5DistributionAnalyser : CharDistributionAnalyser
13968,13969,13970,13971,13972, //13973
****************************************************************************************/
};

public BIG5DistributionAnalyser()
{
charToFreqOrder = BIG5_CHAR2FREQ_ORDER;
typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
typicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO;
}

/// <summary>
/// first byte range: 0xa4 -- 0xfe
/// second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
/// no validation needed here. State machine has done that
/// </summary>
public override int GetOrder(byte[] buf, int offset)
{
public override int GetOrder(byte[] buf, int offset)
{
if (buf[offset] >= 0xA4) {
if (buf[offset+1] >= 0xA1)
return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0xA1 + 63;
else
return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0x40;
} else {
return -1;
}
return -1;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ namespace UtfUnknown.Core.Analyzers.Chinese
public class EUCTWDistributionAnalyser : CharDistributionAnalyser
{
// EUCTW frequency table
// Converted from big5 work
// by Taiwan's Mandarin Promotion Council
// Converted from big5 work
// by Taiwan's Mandarin Promotion Council
// <http://www.edu.tw:81/mandr/>
/******************************************************************************
* 128 --> 0.42261
Expand Down Expand Up @@ -417,9 +417,9 @@ public EUCTWDistributionAnalyser()
/// second byte range: 0xa1 -- 0xfe
/// no validation needed here. State machine has done that
/// </summary>
public override int GetOrder(byte[] buf, int offset)
{
if (buf[offset] >= 0xC4)
public override int GetOrder(byte[] buf, int offset)
{
if (buf[offset] >= 0xC4)
return 94 * (buf[offset] - 0xC4) + buf[offset+1] - 0xA1;
else
return -1;
Expand Down
Loading

0 comments on commit 53af842

Please sign in to comment.