diff --git a/lexilla/Lexilla.vcxproj b/lexilla/Lexilla.vcxproj
index 3db47eb9f..c601956b8 100644
--- a/lexilla/Lexilla.vcxproj
+++ b/lexilla/Lexilla.vcxproj
@@ -339,6 +339,8 @@
stdc17
ProgramDatabase
/utf-8 %(AdditionalOptions)
+ Disabled
+ EnableFastChecks
Console
@@ -457,6 +459,8 @@
ProgramDatabase
/utf-8 %(AdditionalOptions)
false
+ Disabled
+ EnableFastChecks
Console
@@ -490,6 +494,8 @@
ProgramDatabase
/utf-8 %(AdditionalOptions)
false
+ Disabled
+ EnableFastChecks
Console
diff --git a/minipath/minipath.vcxproj b/minipath/minipath.vcxproj
index 80fd94394..00ecd2c42 100644
--- a/minipath/minipath.vcxproj
+++ b/minipath/minipath.vcxproj
@@ -396,9 +396,8 @@
true
$(OutputPath)obj;%(AdditionalLibraryDirectories)
+ DebugFull
-
-
MultiThreadedDebug
EnableFastChecks
@@ -416,6 +415,7 @@
Sync
stdc17
false
+ ProgramDatabase
PerMonitorHighDPIAware
@@ -441,8 +441,8 @@
$(OutputPath)obj;%(AdditionalLibraryDirectories)
+ DebugFull
-
MultiThreadedDebug
EnableFastChecks
@@ -461,6 +461,7 @@
Sync
stdc17
false
+ ProgramDatabase
PerMonitorHighDPIAware
diff --git a/np3encrypt/np3encrypt.vcxproj b/np3encrypt/np3encrypt.vcxproj
index 965b5a285..a713cd4bf 100644
--- a/np3encrypt/np3encrypt.vcxproj
+++ b/np3encrypt/np3encrypt.vcxproj
@@ -355,10 +355,14 @@
/utf-8 %(AdditionalOptions)
stdcpp20
stdc17
+ ProgramDatabase
+ EnableFastChecks
+ MultiThreadedDebug
Console
Default
+ true
@@ -374,10 +378,14 @@
/utf-8 %(AdditionalOptions)
stdcpp20
stdc17
+ ProgramDatabase
+ EnableFastChecks
+ MultiThreadedDebug
Console
Default
+ true
@@ -393,10 +401,14 @@
/utf-8 %(AdditionalOptions)
stdcpp20
stdc17
+ ProgramDatabase
+ EnableFastChecks
+ MultiThreadedDebug
Console
Default
+ true
diff --git a/src/Edit.c b/src/Edit.c
index 18d5c1a7a..3e4605a85 100644
--- a/src/Edit.c
+++ b/src/Edit.c
@@ -1422,11 +1422,12 @@ bool EditLoadFile(
EditDetectEOLMode(lpData, cbData, status);
}
}
- else if (!IS_ENC_ENFORCED() && (encDetection.bPureASCII7Bit && !encDetection.bHasUnicodeNullBytes)) {
+ else if (!IS_ENC_ENFORCED() && encDetection.bPureASCII7Bit) {
// load ASCII(7-bit) as ANSI/UTF-8
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory, bReloadFile);
status->iEncoding = (Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT);
EditDetectEOLMode(lpData, cbData, status);
+
} else { // === ALL OTHER NON UTF-8 ===
status->iEncoding = encDetection.Encoding;
@@ -4659,45 +4660,53 @@ void EditUniteDuplicateLines(HWND hwnd, bool bRemoveEmptyLines, bool bRemoveLast
DocLn iCurLine = iStartLine;
while (iCurLine < iEndLine) {
- DocPos const iCurLnLen = Sci_GetNetLineLength(iCurLine);
- DocPos const iBegCurLine = SciCall_PositionFromLine(iCurLine);
- // range-pointer may move during line deletion, so copy current line for const comparison
- StringCchCopyNA(pCurrentLine, SizeOfMem(pCurrentLine), SciCall_GetRangePointer(iBegCurLine, iCurLnLen + 1), iCurLnLen);
- pCurrentLine[iCurLnLen] = '\0';
-
- DocLn iPrevLine = iCurLine;
- DocLn iCompareLine = iCurLine;
- bool bFoundDup = false;
- while (++iCompareLine <= iEndLine) {
-
- DocPos const iCmpLnLen = Sci_GetNetLineLength(iCompareLine);
- if (bRemoveEmptyLines || (iCmpLnLen > 0)) {
-
- DocPos const iBegCmpLine = SciCall_PositionFromLine(iCompareLine);
- const char* const pCompareLine = SciCall_GetRangePointer(iBegCmpLine, iCmpLnLen);
-
- if ((iCurLnLen == iCmpLnLen) && IsSameCharSequence(pCurrentLine, pCompareLine, iCmpLnLen)) {
- bFoundDup = true;
- DocPos const posPrev = SciCall_GetLineEndPosition(iPrevLine);
- DocPos const posComp = SciCall_GetLineEndPosition(iCompareLine);
- assert(posPrev != posComp);
- SciCall_SetTargetRange(posPrev, posComp);
- SciCall_ReplaceTarget(0, "");
- --iEndLine; // line inbetween removed
- --iCompareLine; // don't proceed compare-line
+ DocPos const iBegCurLine = SciCall_PositionFromLine(iCurLine);
+
+ if (iBegCurLine >= 0) {
+
+ DocPos const iCurLnLen = Sci_GetNetLineLength(iCurLine);
+
+ // range-pointer may move during line deletion, so copy current line for const comparison
+ StringCchCopyNA(pCurrentLine, SizeOfMem(pCurrentLine), SciCall_GetRangePointer(iBegCurLine, iCurLnLen + 1), iCurLnLen);
+ pCurrentLine[iCurLnLen] = '\0';
+
+ DocLn iPrevLine = iCurLine;
+ DocLn iCompareLine = iCurLine;
+ bool bFoundDup = false;
+ while (++iCompareLine <= iEndLine) {
+
+ DocPos const iCmpLnLen = Sci_GetNetLineLength(iCompareLine);
+ if (bRemoveEmptyLines || (iCmpLnLen > 0)) {
+
+ DocPos const iBegCmpLine = SciCall_PositionFromLine(iCompareLine);
+ const char* const pCompareLine = SciCall_GetRangePointer(iBegCmpLine, iCmpLnLen);
+
+ if ((iCurLnLen == iCmpLnLen) && IsSameCharSequence(pCurrentLine, pCompareLine, iCmpLnLen)) {
+ bFoundDup = true;
+ DocPos const posPrev = SciCall_GetLineEndPosition(iPrevLine);
+ DocPos const posComp = SciCall_GetLineEndPosition(iCompareLine);
+ assert(posPrev != posComp);
+ SciCall_SetTargetRange(posPrev, posComp);
+ SciCall_ReplaceTarget(0, "");
+ --iEndLine; // line inbetween removed
+ --iCompareLine; // compare-line removed, so stay at same line for next compare
+ }
+ else iPrevLine = iCompareLine;
}
+ else iPrevLine = iCompareLine;
+
+ } // while
+
+ if (bRemoveLastDup && bFoundDup) {
+ DocPos const posBeg = SciCall_PositionFromLine(iCurLine);
+ DocPos const posEnd = SciCall_PositionFromLine(iCurLine + 1);
+ SciCall_SetTargetRange(posBeg, posEnd);
+ SciCall_ReplaceTarget(0, "");
+ --iEndLine; // line removed
}
- iPrevLine = iCompareLine;
- }
- if (bRemoveLastDup && bFoundDup) {
- DocPos const posBeg = SciCall_PositionFromLine(iCurLine);
- DocPos const posEnd = SciCall_PositionFromLine(iCurLine + 1);
- SciCall_SetTargetRange(posBeg, posEnd);
- SciCall_ReplaceTarget(0, "");
- }
- else {
- ++iCurLine;
+ else ++iCurLine;
}
+ else ++iCurLine;
}
EndUndoTransAction();
diff --git a/src/Encoding.c b/src/Encoding.c
index c43a392b9..199793a89 100644
--- a/src/Encoding.c
+++ b/src/Encoding.c
@@ -714,18 +714,11 @@ bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt)
}
// ----------------------------------------------------------------------------
-bool HasUnicodeNullBytes(const char* pBuf, size_t cnt)
-{
- int iTest = IS_TEXT_UNICODE_NULL_BYTES;
- bool const ok = IsTextUnicode(pBuf, (int)cnt, &iTest);
- return (ok && ((iTest & IS_TEXT_UNICODE_NULL_BYTES) != 0));
-}
-// ----------------------------------------------------------------------------
-
bool Has_UTF16_BOM(const char* pBuf, size_t cnt)
{
return (Has_UTF16_LE_BOM(pBuf, cnt) || Has_UTF16_BE_BOM(pBuf, cnt));
}
+// ----------------------------------------------------------------------------
// ============================================================================
diff --git a/src/Encoding.h b/src/Encoding.h
index d80e54c47..66bbda17d 100644
--- a/src/Encoding.h
+++ b/src/Encoding.h
@@ -124,7 +124,6 @@ int Encoding_GetNameW(const cpi_enc_t iEncoding, LPWSTR buffer, size_t cwch);
bool Has_UTF16_LE_BOM(const char* pBuf, size_t cnt);
bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt);
bool Has_UTF16_BOM(const char *pBuf, size_t cnt);
-bool HasUnicodeNullBytes(const char* pBuf, size_t cnt);
inline bool IsUTF8Signature(const char* p)
{
@@ -132,8 +131,8 @@ inline bool IsUTF8Signature(const char* p)
}
#define UTF8StringStart(p) (IsUTF8Signature(p)) ? ((p)+3) : (p)
-bool IsValidUTF8(const char* pTest, size_t nLength);
bool IsPureAscii7Bit(const char* pTest, size_t nLength);
+bool IsValidUTF8(const char* pTest, size_t nLength);
//////////////////////////////////////////////////////
@@ -193,14 +192,13 @@ typedef struct _enc_det_t {
bool bIsReverse;
bool bIsUTF8Sig;
bool bValidUTF8;
- bool bHasUnicodeNullBytes;
bool bPureASCII7Bit;
char encodingStrg[64];
} ENC_DET_T;
-#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, false, "" }
+#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, "" }
ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpData, const size_t cbData,
diff --git a/src/EncodingDetection.cpp b/src/EncodingDetection.cpp
index aadd7ad3e..6fbcfc385 100644
--- a/src/EncodingDetection.cpp
+++ b/src/EncodingDetection.cpp
@@ -362,7 +362,7 @@ extern "C" NP2ENCODING g_Encodings[] = {
/* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, L"" }, // CPI_UNICODE 4
/* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, L"" }, // CPI_UNICODEBE 5
/* 006 */{ NCP_ASCII_7BIT | NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, L"" }, // CPI_UTF8 6
- /* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, // CPI_UTF8SIGN 7
+ /* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, // CPI_UTF8SIGN 7
/* 008 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, L"" }, // CPI_UTF7 8
/* 009 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, L"" },
/* 010 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, L"" },
@@ -561,6 +561,7 @@ cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM
return CPI_NONE; // iTest doesn't seem to have been modified ...
}
+
bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE);
bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);
@@ -570,7 +571,7 @@ cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM
//bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);
- if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse))) {
+ if ((bHasBOM || bHasRBOM || (bIsUnicode || bIsReverse)) && !bIsIllegal && !(bIsUnicode && bIsReverse)) {
if (lpbBOM) {
*lpbBOM = (bHasBOM || bHasRBOM);
}
@@ -1261,14 +1262,13 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
cpi_enc_t iAnalyzeHint, bool bSkipUTFDetection, bool bSkipANSICPDetection, bool bForceEncDetection)
{
ENC_DET_T encDetRes = INIT_ENC_DET_T;
+ #define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding))
FileVars_GetFromData(lpData, cbData, &Globals.fvCurFile);
bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData);
bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData);
-#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding))
-
// --- 1st check for force encodings ---
LPCWSTR lpszExt = Path_FindExtension(hpath);
@@ -1306,27 +1306,23 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &encDetRes, iAnalyzeHint);
// ---------------------------------------------------------------------------
}
- encDetRes.bHasUnicodeNullBytes = HasUnicodeNullBytes(lpData, cbData);
encDetRes.bPureASCII7Bit = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) || IsPureAscii7Bit(lpData, cbData);
if (encDetRes.analyzedEncoding == CPI_NONE) {
encDetRes.analyzedEncoding = iAnalyzeHint;
encDetRes.confidence = (1.0f - Settings2.AnalyzeReliableConfidenceLevel);
}
- else if (encDetRes.bPureASCII7Bit && !encDetRes.bHasUnicodeNullBytes) {
- encDetRes.analyzedEncoding = (Settings.LoadASCIIasUTF8) ? CPI_UTF8 : CPI_ANSI_DEFAULT;
+ else if (encDetRes.bPureASCII7Bit && encDetRes.bValidUTF8) {
+ encDetRes.analyzedEncoding = CPI_UTF8;
}
if (!bSkipUTFDetection) {
encDetRes.unicodeAnalysis = GetUnicodeEncoding(lpData, cbData, &(encDetRes.bHasBOM), &(encDetRes.bIsReverse));
- if (Encoding_IsNONE(encDetRes.unicodeAnalysis) && Encoding_IsUNICODE(encDetRes.analyzedEncoding)) {
- encDetRes.unicodeAnalysis = encDetRes.analyzedEncoding;
- }
if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis)) {
// check consistent BOM
- if (encDetRes.bHasBOM && !bBOM_LE && !bBOM_BE) {
+ if (encDetRes.bHasBOM && !(bBOM_LE || bBOM_BE)) {
encDetRes.unicodeAnalysis = CPI_NONE;
}
else if (encDetRes.bHasBOM && encDetRes.bIsReverse && !bBOM_BE) {
@@ -1354,14 +1350,14 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
_SetEncodingTitleInfo(&encDetRes);
}
- int const iConfidence = f2int(encDetRes.confidence * 100.0f);
- int const iReliableThreshold = f2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
- encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);
-
// --------------------------------------------------------------------------
// --- choose best encoding guess ----
// --------------------------------------------------------------------------
+ int const iConfidence = f2int(encDetRes.confidence * 100.0f);
+ int const iReliableThreshold = f2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
+ encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);
+
// init Preferred Encoding
encDetRes.Encoding = CPI_PREFERRED_ENCODING;
@@ -1375,17 +1371,13 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
encDetRes.Encoding = bBOM_LE ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
encDetRes.bIsReverse = bBOM_BE;
}
- else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && encDetRes.bHasUnicodeNullBytes)
- {
- encDetRes.Encoding = encDetRes.unicodeAnalysis;
- }
else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly))
{
encDetRes.Encoding = encDetRes.analyzedEncoding;
}
- else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis))
+ else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && (iConfidence > 66))
{
- encDetRes.Encoding = encDetRes.unicodeAnalysis;
+ encDetRes.Encoding = encDetRes.analyzedEncoding; // (1) rely on analyzed encoding
}
else if (Encoding_IsValid(Encoding_SrcWeak(CPI_GET)))
{
diff --git a/src/ced/ced.vcxproj b/src/ced/ced.vcxproj
index 060ec58f4..5db7c9248 100644
--- a/src/ced/ced.vcxproj
+++ b/src/ced/ced.vcxproj
@@ -109,6 +109,9 @@
ced;%(AdditionalIncludeDirectories)
true
/utf-8 %(AdditionalOptions)
+ ProgramDatabase
+ EnableFastChecks
+ MultiThreadedDebug
Console
@@ -126,6 +129,9 @@
ced;%(AdditionalIncludeDirectories)
true
/utf-8 %(AdditionalOptions)
+ ProgramDatabase
+ EnableFastChecks
+ MultiThreadedDebug
Console