From d2a9ceeffa05fff016805e60d45f5fc98bb39228 Mon Sep 17 00:00:00 2001 From: "METANEOCORTEX\\Kotti" Date: Mon, 16 Feb 2026 17:01:00 +0100 Subject: [PATCH 1/2] fix: Win's Unicode detection is not reliable, so change encoding detection accordingly, fix bug --- lexilla/Lexilla.vcxproj | 6 +++ minipath/minipath.vcxproj | 7 +-- np3encrypt/np3encrypt.vcxproj | 12 +++++ src/Edit.c | 83 +++++++++++++++++++---------------- src/Encoding.c | 9 +--- src/Encoding.h | 6 +-- src/EncodingDetection.cpp | 34 ++++++-------- src/Notepad3.vcxproj | 1 + src/Notepad3.vcxproj.filters | 1 + src/ced/ced.vcxproj | 6 +++ 10 files changed, 92 insertions(+), 73 deletions(-) diff --git a/lexilla/Lexilla.vcxproj b/lexilla/Lexilla.vcxproj index 3db47eb9f..c601956b8 100644 --- a/lexilla/Lexilla.vcxproj +++ b/lexilla/Lexilla.vcxproj @@ -339,6 +339,8 @@ stdc17 ProgramDatabase /utf-8 %(AdditionalOptions) + Disabled + EnableFastChecks Console @@ -457,6 +459,8 @@ ProgramDatabase /utf-8 %(AdditionalOptions) false + Disabled + EnableFastChecks Console @@ -490,6 +494,8 @@ ProgramDatabase /utf-8 %(AdditionalOptions) false + Disabled + EnableFastChecks Console diff --git a/minipath/minipath.vcxproj b/minipath/minipath.vcxproj index 80fd94394..00ecd2c42 100644 --- a/minipath/minipath.vcxproj +++ b/minipath/minipath.vcxproj @@ -396,9 +396,8 @@ true $(OutputPath)obj;%(AdditionalLibraryDirectories) + DebugFull - - MultiThreadedDebug EnableFastChecks @@ -416,6 +415,7 @@ Sync stdc17 false + ProgramDatabase PerMonitorHighDPIAware @@ -441,8 +441,8 @@ $(OutputPath)obj;%(AdditionalLibraryDirectories) + DebugFull - MultiThreadedDebug EnableFastChecks @@ -461,6 +461,7 @@ Sync stdc17 false + ProgramDatabase PerMonitorHighDPIAware diff --git a/np3encrypt/np3encrypt.vcxproj b/np3encrypt/np3encrypt.vcxproj index 965b5a285..a713cd4bf 100644 --- a/np3encrypt/np3encrypt.vcxproj +++ b/np3encrypt/np3encrypt.vcxproj @@ -355,10 +355,14 @@ /utf-8 %(AdditionalOptions) stdcpp20 stdc17 + ProgramDatabase + EnableFastChecks + MultiThreadedDebug Console Default + true @@ -374,10 +378,14 @@ /utf-8 %(AdditionalOptions) stdcpp20 stdc17 + ProgramDatabase + EnableFastChecks + MultiThreadedDebug Console Default + true @@ -393,10 +401,14 @@ /utf-8 %(AdditionalOptions) stdcpp20 stdc17 + ProgramDatabase + EnableFastChecks + MultiThreadedDebug Console Default + true diff --git a/src/Edit.c b/src/Edit.c index 18d5c1a7a..3e4605a85 100644 --- a/src/Edit.c +++ b/src/Edit.c @@ -1422,11 +1422,12 @@ bool EditLoadFile( EditDetectEOLMode(lpData, cbData, status); } } - else if (!IS_ENC_ENFORCED() && (encDetection.bPureASCII7Bit && !encDetection.bHasUnicodeNullBytes)) { + else if (!IS_ENC_ENFORCED() && encDetection.bPureASCII7Bit) { // load ASCII(7-bit) as ANSI/UTF-8 EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory, bReloadFile); status->iEncoding = (Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT); EditDetectEOLMode(lpData, cbData, status); + } else { // === ALL OTHER NON UTF-8 === status->iEncoding = encDetection.Encoding; @@ -4659,45 +4660,53 @@ void EditUniteDuplicateLines(HWND hwnd, bool bRemoveEmptyLines, bool bRemoveLast DocLn iCurLine = iStartLine; while (iCurLine < iEndLine) { - DocPos const iCurLnLen = Sci_GetNetLineLength(iCurLine); - DocPos const iBegCurLine = SciCall_PositionFromLine(iCurLine); - // range-pointer may move during line deletion, so copy current line for const comparison - StringCchCopyNA(pCurrentLine, SizeOfMem(pCurrentLine), SciCall_GetRangePointer(iBegCurLine, iCurLnLen + 1), iCurLnLen); - pCurrentLine[iCurLnLen] = '\0'; - - DocLn iPrevLine = iCurLine; - DocLn iCompareLine = iCurLine; - bool bFoundDup = false; - while (++iCompareLine <= iEndLine) { - - DocPos const iCmpLnLen = Sci_GetNetLineLength(iCompareLine); - if (bRemoveEmptyLines || (iCmpLnLen > 0)) { - - DocPos const iBegCmpLine = SciCall_PositionFromLine(iCompareLine); - const char* const pCompareLine = SciCall_GetRangePointer(iBegCmpLine, iCmpLnLen); - - if ((iCurLnLen == iCmpLnLen) && IsSameCharSequence(pCurrentLine, pCompareLine, iCmpLnLen)) { - bFoundDup = true; - DocPos const posPrev = SciCall_GetLineEndPosition(iPrevLine); - DocPos const posComp = SciCall_GetLineEndPosition(iCompareLine); - assert(posPrev != posComp); - SciCall_SetTargetRange(posPrev, posComp); - SciCall_ReplaceTarget(0, ""); - --iEndLine; // line inbetween removed - --iCompareLine; // don't proceed compare-line + DocPos const iBegCurLine = SciCall_PositionFromLine(iCurLine); + + if (iBegCurLine >= 0) { + + DocPos const iCurLnLen = Sci_GetNetLineLength(iCurLine); + + // range-pointer may move during line deletion, so copy current line for const comparison + StringCchCopyNA(pCurrentLine, SizeOfMem(pCurrentLine), SciCall_GetRangePointer(iBegCurLine, iCurLnLen + 1), iCurLnLen); + pCurrentLine[iCurLnLen] = '\0'; + + DocLn iPrevLine = iCurLine; + DocLn iCompareLine = iCurLine; + bool bFoundDup = false; + while (++iCompareLine <= iEndLine) { + + DocPos const iCmpLnLen = Sci_GetNetLineLength(iCompareLine); + if (bRemoveEmptyLines || (iCmpLnLen > 0)) { + + DocPos const iBegCmpLine = SciCall_PositionFromLine(iCompareLine); + const char* const pCompareLine = SciCall_GetRangePointer(iBegCmpLine, iCmpLnLen); + + if ((iCurLnLen == iCmpLnLen) && IsSameCharSequence(pCurrentLine, pCompareLine, iCmpLnLen)) { + bFoundDup = true; + DocPos const posPrev = SciCall_GetLineEndPosition(iPrevLine); + DocPos const posComp = SciCall_GetLineEndPosition(iCompareLine); + assert(posPrev != posComp); + SciCall_SetTargetRange(posPrev, posComp); + SciCall_ReplaceTarget(0, ""); + --iEndLine; // line inbetween removed + --iCompareLine; // compare-line removed, so stay at same line for next compare + } + else iPrevLine = iCompareLine; } + else iPrevLine = iCompareLine; + + } // while + + if (bRemoveLastDup && bFoundDup) { + DocPos const posBeg = SciCall_PositionFromLine(iCurLine); + DocPos const posEnd = SciCall_PositionFromLine(iCurLine + 1); + SciCall_SetTargetRange(posBeg, posEnd); + SciCall_ReplaceTarget(0, ""); + --iEndLine; // line removed } - iPrevLine = iCompareLine; - } - if (bRemoveLastDup && bFoundDup) { - DocPos const posBeg = SciCall_PositionFromLine(iCurLine); - DocPos const posEnd = SciCall_PositionFromLine(iCurLine + 1); - SciCall_SetTargetRange(posBeg, posEnd); - SciCall_ReplaceTarget(0, ""); - } - else { - ++iCurLine; + else ++iCurLine; } + else ++iCurLine; } EndUndoTransAction(); diff --git a/src/Encoding.c b/src/Encoding.c index c43a392b9..199793a89 100644 --- a/src/Encoding.c +++ b/src/Encoding.c @@ -714,18 +714,11 @@ bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt) } // ---------------------------------------------------------------------------- -bool HasUnicodeNullBytes(const char* pBuf, size_t cnt) -{ - int iTest = IS_TEXT_UNICODE_NULL_BYTES; - bool const ok = IsTextUnicode(pBuf, (int)cnt, &iTest); - return (ok && ((iTest & IS_TEXT_UNICODE_NULL_BYTES) != 0)); -} -// ---------------------------------------------------------------------------- - bool Has_UTF16_BOM(const char* pBuf, size_t cnt) { return (Has_UTF16_LE_BOM(pBuf, cnt) || Has_UTF16_BE_BOM(pBuf, cnt)); } +// ---------------------------------------------------------------------------- // ============================================================================ diff --git a/src/Encoding.h b/src/Encoding.h index d80e54c47..66bbda17d 100644 --- a/src/Encoding.h +++ b/src/Encoding.h @@ -124,7 +124,6 @@ int Encoding_GetNameW(const cpi_enc_t iEncoding, LPWSTR buffer, size_t cwch); bool Has_UTF16_LE_BOM(const char* pBuf, size_t cnt); bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt); bool Has_UTF16_BOM(const char *pBuf, size_t cnt); -bool HasUnicodeNullBytes(const char* pBuf, size_t cnt); inline bool IsUTF8Signature(const char* p) { @@ -132,8 +131,8 @@ inline bool IsUTF8Signature(const char* p) } #define UTF8StringStart(p) (IsUTF8Signature(p)) ? ((p)+3) : (p) -bool IsValidUTF8(const char* pTest, size_t nLength); bool IsPureAscii7Bit(const char* pTest, size_t nLength); +bool IsValidUTF8(const char* pTest, size_t nLength); ////////////////////////////////////////////////////// @@ -193,14 +192,13 @@ typedef struct _enc_det_t { bool bIsReverse; bool bIsUTF8Sig; bool bValidUTF8; - bool bHasUnicodeNullBytes; bool bPureASCII7Bit; char encodingStrg[64]; } ENC_DET_T; -#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, false, "" } +#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, "" } ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpData, const size_t cbData, diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp index aadd7ad3e..6fbcfc385 100644 --- a/src/EncodingDetection.cpp +++ b/src/EncodingDetection.cpp @@ -362,7 +362,7 @@ extern "C" NP2ENCODING g_Encodings[] = { /* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, L"" }, // CPI_UNICODE 4 /* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, L"" }, // CPI_UNICODEBE 5 /* 006 */{ NCP_ASCII_7BIT | NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, L"" }, // CPI_UTF8 6 - /* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, // CPI_UTF8SIGN 7 + /* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, // CPI_UTF8SIGN 7 /* 008 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, L"" }, // CPI_UTF7 8 /* 009 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, L"" }, /* 010 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, L"" }, @@ -561,6 +561,7 @@ cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM return CPI_NONE; // iTest doesn't seem to have been modified ... } + bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE); bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE); @@ -570,7 +571,7 @@ cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM //bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES); - if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse))) { + if ((bHasBOM || bHasRBOM || (bIsUnicode || bIsReverse)) && !bIsIllegal && !(bIsUnicode && bIsReverse)) { if (lpbBOM) { *lpbBOM = (bHasBOM || bHasRBOM); } @@ -1261,14 +1262,13 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD cpi_enc_t iAnalyzeHint, bool bSkipUTFDetection, bool bSkipANSICPDetection, bool bForceEncDetection) { ENC_DET_T encDetRes = INIT_ENC_DET_T; + #define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding)) FileVars_GetFromData(lpData, cbData, &Globals.fvCurFile); bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData); bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData); -#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding)) - // --- 1st check for force encodings --- LPCWSTR lpszExt = Path_FindExtension(hpath); @@ -1306,27 +1306,23 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &encDetRes, iAnalyzeHint); // --------------------------------------------------------------------------- } - encDetRes.bHasUnicodeNullBytes = HasUnicodeNullBytes(lpData, cbData); encDetRes.bPureASCII7Bit = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) || IsPureAscii7Bit(lpData, cbData); if (encDetRes.analyzedEncoding == CPI_NONE) { encDetRes.analyzedEncoding = iAnalyzeHint; encDetRes.confidence = (1.0f - Settings2.AnalyzeReliableConfidenceLevel); } - else if (encDetRes.bPureASCII7Bit && !encDetRes.bHasUnicodeNullBytes) { - encDetRes.analyzedEncoding = (Settings.LoadASCIIasUTF8) ? CPI_UTF8 : CPI_ANSI_DEFAULT; + else if (encDetRes.bPureASCII7Bit && encDetRes.bValidUTF8) { + encDetRes.analyzedEncoding = CPI_UTF8; } if (!bSkipUTFDetection) { encDetRes.unicodeAnalysis = GetUnicodeEncoding(lpData, cbData, &(encDetRes.bHasBOM), &(encDetRes.bIsReverse)); - if (Encoding_IsNONE(encDetRes.unicodeAnalysis) && Encoding_IsUNICODE(encDetRes.analyzedEncoding)) { - encDetRes.unicodeAnalysis = encDetRes.analyzedEncoding; - } if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis)) { // check consistent BOM - if (encDetRes.bHasBOM && !bBOM_LE && !bBOM_BE) { + if (encDetRes.bHasBOM && !(bBOM_LE || bBOM_BE)) { encDetRes.unicodeAnalysis = CPI_NONE; } else if (encDetRes.bHasBOM && encDetRes.bIsReverse && !bBOM_BE) { @@ -1354,14 +1350,14 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD _SetEncodingTitleInfo(&encDetRes); } - int const iConfidence = f2int(encDetRes.confidence * 100.0f); - int const iReliableThreshold = f2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f); - encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold); - // -------------------------------------------------------------------------- // --- choose best encoding guess ---- // -------------------------------------------------------------------------- + int const iConfidence = f2int(encDetRes.confidence * 100.0f); + int const iReliableThreshold = f2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f); + encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold); + // init Preferred Encoding encDetRes.Encoding = CPI_PREFERRED_ENCODING; @@ -1375,17 +1371,13 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD encDetRes.Encoding = bBOM_LE ? CPI_UNICODEBOM : CPI_UNICODEBEBOM; encDetRes.bIsReverse = bBOM_BE; } - else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && encDetRes.bHasUnicodeNullBytes) - { - encDetRes.Encoding = encDetRes.unicodeAnalysis; - } else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly)) { encDetRes.Encoding = encDetRes.analyzedEncoding; } - else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis)) + else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && (iConfidence > 66)) { - encDetRes.Encoding = encDetRes.unicodeAnalysis; + encDetRes.Encoding = encDetRes.analyzedEncoding; // (1) rely on analyzed encoding } else if (Encoding_IsValid(Encoding_SrcWeak(CPI_GET))) { diff --git a/src/Notepad3.vcxproj b/src/Notepad3.vcxproj index ee4f0e59c..b7e439adb 100644 --- a/src/Notepad3.vcxproj +++ b/src/Notepad3.vcxproj @@ -1288,6 +1288,7 @@ + diff --git a/src/Notepad3.vcxproj.filters b/src/Notepad3.vcxproj.filters index b5c62071a..6c651f9e6 100644 --- a/src/Notepad3.vcxproj.filters +++ b/src/Notepad3.vcxproj.filters @@ -721,5 +721,6 @@ + \ No newline at end of file diff --git a/src/ced/ced.vcxproj b/src/ced/ced.vcxproj index 060ec58f4..5db7c9248 100644 --- a/src/ced/ced.vcxproj +++ b/src/ced/ced.vcxproj @@ -109,6 +109,9 @@ ced;%(AdditionalIncludeDirectories) true /utf-8 %(AdditionalOptions) + ProgramDatabase + EnableFastChecks + MultiThreadedDebug Console @@ -126,6 +129,9 @@ ced;%(AdditionalIncludeDirectories) true /utf-8 %(AdditionalOptions) + ProgramDatabase + EnableFastChecks + MultiThreadedDebug Console From cccf13d4b0583ff7044eee899e7bab0c05c0e3c1 Mon Sep 17 00:00:00 2001 From: "METANEOCORTEX\\Kotti" Date: Mon, 16 Feb 2026 17:10:03 +0100 Subject: [PATCH 2/2] fix: remove MyTypes.natvis --- src/Notepad3.vcxproj | 1 - src/Notepad3.vcxproj.filters | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Notepad3.vcxproj b/src/Notepad3.vcxproj index b7e439adb..ee4f0e59c 100644 --- a/src/Notepad3.vcxproj +++ b/src/Notepad3.vcxproj @@ -1288,7 +1288,6 @@ - diff --git a/src/Notepad3.vcxproj.filters b/src/Notepad3.vcxproj.filters index 6c651f9e6..b5c62071a 100644 --- a/src/Notepad3.vcxproj.filters +++ b/src/Notepad3.vcxproj.filters @@ -721,6 +721,5 @@ - \ No newline at end of file