Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lexilla/Lexilla.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@
<LanguageStandard_C>stdc17</LanguageStandard_C>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<Optimization>Disabled</Optimization>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down Expand Up @@ -457,6 +459,8 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<OmitFramePointers>false</OmitFramePointers>
<Optimization>Disabled</Optimization>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down Expand Up @@ -490,6 +494,8 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<OmitFramePointers>false</OmitFramePointers>
<Optimization>Disabled</Optimization>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down
7 changes: 4 additions & 3 deletions minipath/minipath.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,8 @@
<CETCompat>true</CETCompat>
<AdditionalLibraryDirectories>$(OutputPath)obj;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<ManifestFile />
<GenerateDebugInformation>DebugFull</GenerateDebugInformation>
</Link>
<ClCompile>
</ClCompile>
<ClCompile>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
Expand All @@ -416,6 +415,7 @@
<ExceptionHandling>Sync</ExceptionHandling>
<LanguageStandard_C>stdc17</LanguageStandard_C>
<UseStandardPreprocessor>false</UseStandardPreprocessor>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Manifest>
<EnableDpiAwareness>PerMonitorHighDPIAware</EnableDpiAwareness>
Expand All @@ -441,8 +441,8 @@
<AdditionalLibraryDirectories>$(OutputPath)obj;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<ManifestFile>
</ManifestFile>
<GenerateDebugInformation>DebugFull</GenerateDebugInformation>
</Link>
<ClCompile />
<ClCompile>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
Expand All @@ -461,6 +461,7 @@
<ExceptionHandling>Sync</ExceptionHandling>
<LanguageStandard_C>stdc17</LanguageStandard_C>
<UseStandardPreprocessor>false</UseStandardPreprocessor>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Manifest>
<EnableDpiAwareness>PerMonitorHighDPIAware</EnableDpiAwareness>
Expand Down
12 changes: 12 additions & 0 deletions np3encrypt/np3encrypt.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,14 @@
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<LanguageStandard>stdcpp20</LanguageStandard>
<LanguageStandard_C>stdc17</LanguageStandard_C>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
Expand All @@ -374,10 +378,14 @@
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<LanguageStandard>stdcpp20</LanguageStandard>
<LanguageStandard_C>stdc17</LanguageStandard_C>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
Expand All @@ -393,10 +401,14 @@
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<LanguageStandard>stdcpp20</LanguageStandard>
<LanguageStandard_C>stdc17</LanguageStandard_C>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<LinkTimeCodeGeneration>Default</LinkTimeCodeGeneration>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
Expand Down
83 changes: 46 additions & 37 deletions src/Edit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1422,11 +1422,12 @@ bool EditLoadFile(
EditDetectEOLMode(lpData, cbData, status);
}
}
else if (!IS_ENC_ENFORCED() && (encDetection.bPureASCII7Bit && !encDetection.bHasUnicodeNullBytes)) {
else if (!IS_ENC_ENFORCED() && encDetection.bPureASCII7Bit) {
// load ASCII(7-bit) as ANSI/UTF-8
EditSetNewText(hwnd, lpData, cbData, bClearUndoHistory, bReloadFile);
status->iEncoding = (Settings.LoadASCIIasUTF8 ? CPI_UTF8 : CPI_ANSI_DEFAULT);
EditDetectEOLMode(lpData, cbData, status);

} else { // === ALL OTHER NON UTF-8 ===

status->iEncoding = encDetection.Encoding;
Expand Down Expand Up @@ -4659,45 +4660,53 @@ void EditUniteDuplicateLines(HWND hwnd, bool bRemoveEmptyLines, bool bRemoveLast
DocLn iCurLine = iStartLine;
while (iCurLine < iEndLine) {

DocPos const iCurLnLen = Sci_GetNetLineLength(iCurLine);
DocPos const iBegCurLine = SciCall_PositionFromLine(iCurLine);
// range-pointer may move during line deletion, so copy current line for const comparison
StringCchCopyNA(pCurrentLine, SizeOfMem(pCurrentLine), SciCall_GetRangePointer(iBegCurLine, iCurLnLen + 1), iCurLnLen);
pCurrentLine[iCurLnLen] = '\0';

DocLn iPrevLine = iCurLine;
DocLn iCompareLine = iCurLine;
bool bFoundDup = false;
while (++iCompareLine <= iEndLine) {

DocPos const iCmpLnLen = Sci_GetNetLineLength(iCompareLine);
if (bRemoveEmptyLines || (iCmpLnLen > 0)) {

DocPos const iBegCmpLine = SciCall_PositionFromLine(iCompareLine);
const char* const pCompareLine = SciCall_GetRangePointer(iBegCmpLine, iCmpLnLen);

if ((iCurLnLen == iCmpLnLen) && IsSameCharSequence(pCurrentLine, pCompareLine, iCmpLnLen)) {
bFoundDup = true;
DocPos const posPrev = SciCall_GetLineEndPosition(iPrevLine);
DocPos const posComp = SciCall_GetLineEndPosition(iCompareLine);
assert(posPrev != posComp);
SciCall_SetTargetRange(posPrev, posComp);
SciCall_ReplaceTarget(0, "");
--iEndLine; // line inbetween removed
--iCompareLine; // don't proceed compare-line
DocPos const iBegCurLine = SciCall_PositionFromLine(iCurLine);

if (iBegCurLine >= 0) {

DocPos const iCurLnLen = Sci_GetNetLineLength(iCurLine);

// range-pointer may move during line deletion, so copy current line for const comparison
StringCchCopyNA(pCurrentLine, SizeOfMem(pCurrentLine), SciCall_GetRangePointer(iBegCurLine, iCurLnLen + 1), iCurLnLen);
pCurrentLine[iCurLnLen] = '\0';

DocLn iPrevLine = iCurLine;
DocLn iCompareLine = iCurLine;
bool bFoundDup = false;
while (++iCompareLine <= iEndLine) {

DocPos const iCmpLnLen = Sci_GetNetLineLength(iCompareLine);
if (bRemoveEmptyLines || (iCmpLnLen > 0)) {

DocPos const iBegCmpLine = SciCall_PositionFromLine(iCompareLine);
const char* const pCompareLine = SciCall_GetRangePointer(iBegCmpLine, iCmpLnLen);

if ((iCurLnLen == iCmpLnLen) && IsSameCharSequence(pCurrentLine, pCompareLine, iCmpLnLen)) {
bFoundDup = true;
DocPos const posPrev = SciCall_GetLineEndPosition(iPrevLine);
DocPos const posComp = SciCall_GetLineEndPosition(iCompareLine);
assert(posPrev != posComp);
SciCall_SetTargetRange(posPrev, posComp);
SciCall_ReplaceTarget(0, "");
--iEndLine; // line inbetween removed
--iCompareLine; // compare-line removed, so stay at same line for next compare
}
else iPrevLine = iCompareLine;
}
else iPrevLine = iCompareLine;

} // while

if (bRemoveLastDup && bFoundDup) {
DocPos const posBeg = SciCall_PositionFromLine(iCurLine);
DocPos const posEnd = SciCall_PositionFromLine(iCurLine + 1);
SciCall_SetTargetRange(posBeg, posEnd);
SciCall_ReplaceTarget(0, "");
--iEndLine; // line removed
}
iPrevLine = iCompareLine;
}
if (bRemoveLastDup && bFoundDup) {
DocPos const posBeg = SciCall_PositionFromLine(iCurLine);
DocPos const posEnd = SciCall_PositionFromLine(iCurLine + 1);
SciCall_SetTargetRange(posBeg, posEnd);
SciCall_ReplaceTarget(0, "");
}
else {
++iCurLine;
else ++iCurLine;
}
else ++iCurLine;
}

EndUndoTransAction();
Expand Down
9 changes: 1 addition & 8 deletions src/Encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -714,18 +714,11 @@ bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt)
}
// ----------------------------------------------------------------------------

bool HasUnicodeNullBytes(const char* pBuf, size_t cnt)
{
int iTest = IS_TEXT_UNICODE_NULL_BYTES;
bool const ok = IsTextUnicode(pBuf, (int)cnt, &iTest);
return (ok && ((iTest & IS_TEXT_UNICODE_NULL_BYTES) != 0));
}
// ----------------------------------------------------------------------------

bool Has_UTF16_BOM(const char* pBuf, size_t cnt)
{
return (Has_UTF16_LE_BOM(pBuf, cnt) || Has_UTF16_BE_BOM(pBuf, cnt));
}
// ----------------------------------------------------------------------------

// ============================================================================

Expand Down
6 changes: 2 additions & 4 deletions src/Encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,16 +124,15 @@ int Encoding_GetNameW(const cpi_enc_t iEncoding, LPWSTR buffer, size_t cwch);
bool Has_UTF16_LE_BOM(const char* pBuf, size_t cnt);
bool Has_UTF16_BE_BOM(const char* pBuf, size_t cnt);
bool Has_UTF16_BOM(const char *pBuf, size_t cnt);
bool HasUnicodeNullBytes(const char* pBuf, size_t cnt);

inline bool IsUTF8Signature(const char* p)
{
return ((p[0] == '\xEF') && (p[1] == '\xBB') && (p[2] == '\xBF'));
}
#define UTF8StringStart(p) (IsUTF8Signature(p)) ? ((p)+3) : (p)

bool IsValidUTF8(const char* pTest, size_t nLength);
bool IsPureAscii7Bit(const char* pTest, size_t nLength);
bool IsValidUTF8(const char* pTest, size_t nLength);


//////////////////////////////////////////////////////
Expand Down Expand Up @@ -193,14 +192,13 @@ typedef struct _enc_det_t {
bool bIsReverse;
bool bIsUTF8Sig;
bool bValidUTF8;
bool bHasUnicodeNullBytes;
bool bPureASCII7Bit;

char encodingStrg[64];

} ENC_DET_T;

#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, false, "" }
#define INIT_ENC_DET_T { CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, CPI_NONE, 0.0f, false, false, false, false, false, false, "" }


ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpData, const size_t cbData,
Expand Down
34 changes: 13 additions & 21 deletions src/EncodingDetection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ extern "C" NP2ENCODING g_Encodings[] = {
/* 004 */{ NCP_UNICODE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16LE, IDS_ENC_UTF16LE, L"" }, // CPI_UNICODE 4
/* 005 */{ NCP_UNICODE | NCP_UNICODE_REVERSE | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF16BE, IDS_ENC_UTF16BE, L"" }, // CPI_UNICODEBE 5
/* 006 */{ NCP_ASCII_7BIT | NCP_UTF8 | NCP_RECODE, CP_UTF8, ENC_PARSE_NAM_UTF8, IDS_ENC_UTF8, L"" }, // CPI_UTF8 6
/* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, // CPI_UTF8SIGN 7
/* 007 */{ NCP_UTF8 | NCP_UTF8_SIGN, CP_UTF8, ENC_PARSE_NAM_UTF8SIG, IDS_ENC_UTF8SIG, L"" }, // CPI_UTF8SIGN 7
/* 008 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, CP_UTF7, ENC_PARSE_NAM_UTF7, IDS_ENC_UTF7, L"" }, // CPI_UTF7 8
/* 009 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, 720, ENC_PARSE_NAM_DOS_720, IDS_ENC_DOS_720, L"" },
/* 010 */{ NCP_ASCII_7BIT | NCP_EXTERNAL_8BIT | NCP_RECODE, 28596, ENC_PARSE_NAM_ISO_8859_6, IDS_ENC_ISO_8859_6, L"" },
Expand Down Expand Up @@ -561,6 +561,7 @@ cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM
return CPI_NONE; // iTest doesn't seem to have been modified ...
}


bool const bHasBOM = (iTest & IS_TEXT_UNICODE_SIGNATURE);
bool const bHasRBOM = (iTest & IS_TEXT_UNICODE_REVERSE_SIGNATURE);

Expand All @@ -570,7 +571,7 @@ cpi_enc_t GetUnicodeEncoding(const char* pBuffer, const size_t len, bool* lpbBOM

//bool const bHasNullBytes = (iTest & IS_TEXT_UNICODE_NULL_BYTES);

if (bHasBOM || bHasRBOM || ((bIsUnicode || bIsReverse) && !bIsIllegal && !(bIsUnicode && bIsReverse))) {
if ((bHasBOM || bHasRBOM || (bIsUnicode || bIsReverse)) && !bIsIllegal && !(bIsUnicode && bIsReverse)) {
if (lpbBOM) {
*lpbBOM = (bHasBOM || bHasRBOM);
}
Expand Down Expand Up @@ -1261,14 +1262,13 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
cpi_enc_t iAnalyzeHint, bool bSkipUTFDetection, bool bSkipANSICPDetection, bool bForceEncDetection)
{
ENC_DET_T encDetRes = INIT_ENC_DET_T;
#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding))

FileVars_GetFromData(lpData, cbData, &Globals.fvCurFile);

bool const bBOM_LE = Has_UTF16_LE_BOM(lpData, cbData);
bool const bBOM_BE = Has_UTF16_BE_BOM(lpData, cbData);

#define IS_ENC_ENFORCED() (!Encoding_IsNONE(encDetRes.forcedEncoding))

// --- 1st check for force encodings ---

LPCWSTR lpszExt = Path_FindExtension(hpath);
Expand Down Expand Up @@ -1306,27 +1306,23 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
Encoding_AnalyzeText(lpData, cbNbytes4Analysis, &encDetRes, iAnalyzeHint);
// ---------------------------------------------------------------------------
}
encDetRes.bHasUnicodeNullBytes = HasUnicodeNullBytes(lpData, cbData);
encDetRes.bPureASCII7Bit = (encDetRes.analyzedEncoding == CPI_ASCII_7BIT) || IsPureAscii7Bit(lpData, cbData);

if (encDetRes.analyzedEncoding == CPI_NONE) {
encDetRes.analyzedEncoding = iAnalyzeHint;
encDetRes.confidence = (1.0f - Settings2.AnalyzeReliableConfidenceLevel);
}
else if (encDetRes.bPureASCII7Bit && !encDetRes.bHasUnicodeNullBytes) {
encDetRes.analyzedEncoding = (Settings.LoadASCIIasUTF8) ? CPI_UTF8 : CPI_ANSI_DEFAULT;
else if (encDetRes.bPureASCII7Bit && encDetRes.bValidUTF8) {
encDetRes.analyzedEncoding = CPI_UTF8;
}

if (!bSkipUTFDetection) {

encDetRes.unicodeAnalysis = GetUnicodeEncoding(lpData, cbData, &(encDetRes.bHasBOM), &(encDetRes.bIsReverse));
if (Encoding_IsNONE(encDetRes.unicodeAnalysis) && Encoding_IsUNICODE(encDetRes.analyzedEncoding)) {
encDetRes.unicodeAnalysis = encDetRes.analyzedEncoding;
}

if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis)) {
// check consistent BOM
if (encDetRes.bHasBOM && !bBOM_LE && !bBOM_BE) {
if (encDetRes.bHasBOM && !(bBOM_LE || bBOM_BE)) {
encDetRes.unicodeAnalysis = CPI_NONE;
}
else if (encDetRes.bHasBOM && encDetRes.bIsReverse && !bBOM_BE) {
Expand Down Expand Up @@ -1354,14 +1350,14 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
_SetEncodingTitleInfo(&encDetRes);
}

int const iConfidence = f2int(encDetRes.confidence * 100.0f);
int const iReliableThreshold = f2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);

// --------------------------------------------------------------------------
// --- choose best encoding guess ----
// --------------------------------------------------------------------------

int const iConfidence = f2int(encDetRes.confidence * 100.0f);
int const iReliableThreshold = f2int(Settings2.AnalyzeReliableConfidenceLevel * 100.0f);
encDetRes.bIsAnalysisReliable = (iConfidence >= iReliableThreshold);

// init Preferred Encoding
encDetRes.Encoding = CPI_PREFERRED_ENCODING;

Expand All @@ -1375,17 +1371,13 @@ extern "C" ENC_DET_T Encoding_DetectEncoding(const HPATHL hpath, const char* lpD
encDetRes.Encoding = bBOM_LE ? CPI_UNICODEBOM : CPI_UNICODEBEBOM;
encDetRes.bIsReverse = bBOM_BE;
}
else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && encDetRes.bHasUnicodeNullBytes)
{
encDetRes.Encoding = encDetRes.unicodeAnalysis;
}
else if (Encoding_IsValid(encDetRes.analyzedEncoding) && (encDetRes.bIsAnalysisReliable || !Settings.UseReliableCEDonly))
{
encDetRes.Encoding = encDetRes.analyzedEncoding;
}
else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis))
else if (Encoding_IsUNICODE(encDetRes.unicodeAnalysis) && (iConfidence > 66))
{
encDetRes.Encoding = encDetRes.unicodeAnalysis;
encDetRes.Encoding = encDetRes.analyzedEncoding; // (1) rely on analyzed encoding
}
else if (Encoding_IsValid(Encoding_SrcWeak(CPI_GET)))
{
Expand Down
6 changes: 6 additions & 0 deletions src/ced/ced.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@
<AdditionalIncludeDirectories>ced;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand All @@ -126,6 +129,9 @@
<AdditionalIncludeDirectories>ced;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/utf-8 %(AdditionalOptions)</AdditionalOptions>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
Expand Down