Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/dotnet-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- name: Setup .NET 8.0
- name: Setup .NET 10.0
uses: actions/setup-dotnet@v3
with:
dotnet-version: 8.0.x
dotnet-version: 10.0.x
include-prerelease: false
- name: Install dependencies
run: dotnet restore
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
## v3.0.0 - 5 Feb 2024
## v4.0.0 - 19 Dez 2025
* Switch to .Net 10.0
* Switch to C# 14
* Code refactoring
* Fix code example 7 and 8 in readme
* Add tests for readme code examples
* Adjust readme code example to use filePath instead of filename or path
* Suppress warnings CA2022 and S2674 due to expected dynamic array length
* Remove Serializable annotation from EncodingSecurityException

## v3.0.0 - 5 Feb 2024
* Switch to .Net 8.0
* Updated dependencies
* Improved error handling for empty and whitespace path
Expand Down
8 changes: 8 additions & 0 deletions Documentation/upgrade_guide_3_0_0_to_4_0_x.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Upgrade Guide v3.0.0 to v4.0.*

No code changes required!

### Dependency Change

Now requires .Net 10.0

44 changes: 22 additions & 22 deletions MagicFileEncoding.sln
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

Microsoft Visual Studio Solution File, Format Version 12.00
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MagicFileEncoding", "MagicFileEncoding\MagicFileEncoding.csproj", "{89C8EAD5-218B-46A8-8DE9-93783166F9FC}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UnitTests", "UnitTests\UnitTests.csproj", "{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{89C8EAD5-218B-46A8-8DE9-93783166F9FC}.Release|Any CPU.Build.0 = Release|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{83F84B60-DBC5-4738-AC63-4EFB547BF9F7}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
4 changes: 2 additions & 2 deletions MagicFileEncoding/ByteOrderMask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ public static readonly ByteOrderMaskInfo UTF8
public static readonly ByteOrderMaskInfo UTF7
= new (Encoding.UTF7, 0x2b, 0x2f, 0x76);

public static readonly List<ByteOrderMaskInfo> List = new ()
public static readonly IList<ByteOrderMaskInfo> List = new List<ByteOrderMaskInfo>()
{
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
};
}.AsReadOnly();
}
#pragma warning restore SYSLIB0001
1 change: 0 additions & 1 deletion MagicFileEncoding/EncodingSecurityException.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

namespace MagicFileEncoding;

[Serializable]
public class EncodingSecurityException : Exception
{
public EncodingSecurityException(string message) : base(message)
Expand Down
9 changes: 5 additions & 4 deletions MagicFileEncoding/MagicFileEncoding.csproj
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<PackageVersion>3.0.0</PackageVersion>
<TargetFramework>net10.0</TargetFramework>
<PackageVersion>4.0.0</PackageVersion>
<Title>Magic File Encoding</Title>
<Authors>Jan Schwien</Authors>
<Copyright>by Jan Schwien</Copyright>
Expand All @@ -18,8 +18,9 @@ Be aware of possible transformation issues if the target encoding is simpler tha

It is strongly recommended to write unit tests for your use case to ensure the load and transformation works as expected.</Description>
<Nullable>enable</Nullable>
<LangVersion>11</LangVersion>
<AssemblyVersion>3.0.0</AssemblyVersion>
<LangVersion>14</LangVersion>
<AssemblyVersion>4.0.0</AssemblyVersion>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
</PropertyGroup>

<PropertyGroup>
Expand Down
123 changes: 72 additions & 51 deletions MagicFileEncoding/Tools/EncodingTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,49 +53,9 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco
// For the below, false positives should be exceedingly rare (and would
// be either slightly malformed UTF-8 (which would suit our purposes
// anyway) or 8-bit extended ASCII/UTF-16/32 at a vanishingly long shot).
var i = 0;
var utf8 = false;
while (i < taster - 4)
{
if (bytes[i] <= 0x7F)
{
i += 1;
continue;
}


// If all characters are below 0x80, then it is valid UTF8,
// but UTF8 is not 'required' (and therefore the text is more desirable to be treated as
// the default codepage of the computer). Hence, there's no "utf8 = true;"
// code unlike the next three checks.

if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0)
{
i += 2;
utf8 = true;
continue;
}

if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 &&
bytes[i + 2] < 0xC0)
{
i += 3;
utf8 = true;
continue;
}

if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 &&
bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0)
{
i += 4;
utf8 = true;
continue;
}

utf8 = false;
break;
}

if (utf8)
if (CheckForUtf8(bytes, taster))
{
text = provideText ? Encoding.UTF8.GetString(bytes) : null;
return Encoding.UTF8;
Expand Down Expand Up @@ -138,6 +98,53 @@ internal static byte[] AutomaticTransformBytes(byte[] bytes, Encoding targetEnco
return fallbackEncoding ?? FileEncoding.DefaultFallback;
}

private static bool CheckForUtf8(byte[] bytes, int taster)
{
var utf8 = false;
var i = 0;
while (i < taster - 4)
{
if (bytes[i] <= 0x7F)
{
i += 1;
continue;
}

// If all characters are below 0x80, then it is valid UTF8,
// but UTF8 is not 'required' (and therefore the text is more desirable to be treated as
// the default codepage of the computer). Hence, there's no "utf8 = true;"
// code unlike the next three checks.

if (bytes[i] >= 0xC2 && bytes[i] <= 0xDF && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0)
{
i += 2;
utf8 = true;
continue;
}

if (bytes[i] >= 0xE0 && bytes[i] <= 0xF0 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 && bytes[i + 2] >= 0x80 &&
bytes[i + 2] < 0xC0)
{
i += 3;
utf8 = true;
continue;
}

if (bytes[i] >= 0xF0 && bytes[i] <= 0xF4 && bytes[i + 1] >= 0x80 && bytes[i + 1] < 0xC0 &&
bytes[i + 2] >= 0x80 && bytes[i + 2] < 0xC0 && bytes[i + 3] >= 0x80 && bytes[i + 3] < 0xC0)
{
i += 4;
utf8 = true;
continue;
}

utf8 = false;
break;
}

return utf8;
}

/// <summary>
/// A long shot - let's see if we can find "charset=xyz" or
/// "encoding=xyz" to identify the encoding:
Expand All @@ -153,28 +160,40 @@ private static bool LongShot(ref string? text, bool provideText, int taster, byt
for (var n = 0; n < taster - 9; n++)
{
if (!IsCharsetMarker(bytes, n) && !IsEncodingMarker(bytes, n))
{
continue;
}

if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C') n += 8;
else n += 9;
if (bytes[n + 0] == 'c' || bytes[n + 0] == 'C')
{
n += 8;
}
else
{
n += 9;
}

if (bytes[n] == '"' || bytes[n] == '\'') n++;
if (bytes[n] == '"' || bytes[n] == '\'')
{
n++;
}

var oldN = n;

while (IsCharsetNameRange(taster, bytes, n))
{
n++;
}

var nb = new byte[n - oldN];
Array.Copy(bytes, oldN, nb, 0, n - oldN);
try
{
var internalEnc = Encoding.ASCII.GetString(nb);
text = provideText ? Encoding.GetEncoding(internalEnc).GetString(bytes) : null;
{
encoding = Encoding.GetEncoding(internalEnc);
return true;
}

encoding = Encoding.GetEncoding(internalEnc);
return true;
}
catch
{
Expand Down Expand Up @@ -252,9 +271,11 @@ private static bool IsCharsetNameRange(int taster, byte[] bytes, int n)

var bom = new byte[4];
fileStream.Position = 0;

// ReSharper disable once MustUseReturnValue

// read the BOM with dynamical length
#pragma warning disable CA2022, S2674
fileStream.Read(bom, 0, 4);
#pragma warning restore CA2022

return GetEncodingByBom(bom, fallbackEncoding, out _, false);
}
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ comprehensive solution to handle various encoding scenarios effortlessly.
[MagicFileEncoding at nuget.org](https://www.nuget.org/packages/MagicFileEncoding/)

## .Net Version
- **.Net 8:** Magic File Encoding **3.0.0 and newer**
- **.Net 10:** Magic File Encoding **4.0.0 and newer**
- **.Net 8:** Magic File Encoding **3.0.0**
- **.Net 6:** Magic File Encoding **2.0.1**

## Transformation Considerations
Expand Down
Loading
Loading