diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/BasicRascalConfig.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/BasicRascalConfig.rsc index b40457b475..8c24cd390c 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/BasicRascalConfig.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/BasicRascalConfig.rsc @@ -177,10 +177,10 @@ bool isValidRascalTplVersion(str version) str getCurrentRascalTplVersion() = currentRascalTplVersion; -str currentRascalTplVersion = "2.0.0"; +str currentRascalTplVersion = "3.0.0"; data TModel ( - str rascalTplVersion = "2.0.0" + str rascalTplVersion = "3.0.0" ); // Define alias for TypePalConfig diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/CheckerCommon.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/CheckerCommon.rsc index 9a447b9c82..943676676c 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/CheckerCommon.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/CheckerCommon.rsc @@ -472,3 +472,5 @@ int nextClosure(){ void resetClosureCounter(){ closureCounter = 0; } + +str normalizedMD5Hash(str s) = md5Hash(removeWhitespace(s)); diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectDataDeclaration.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectDataDeclaration.rsc index 96bc64408f..39905a60cf 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectDataDeclaration.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectDataDeclaration.rsc @@ -69,7 +69,7 @@ void dataDeclaration(Tags tags, Declaration current, list[Variant] variants, Col dt = isEmpty(typeParameters) ? defType(aadt(adtName, [], dataSyntax())) : defType(typeParameters, AType(Solver s) { return aadt(adtName, [ s.getType(tp)[closed=true] | tp <- typeParameters], dataSyntax()); }); - dt.md5 = md5Hash(""); + dt.md5 = normalizedMD5Hash(""); dataCounter += 1; if(!isEmpty(commonKeywordParameterList)) dt.commonKeywordFields = commonKeywordParameterList; c.define(adtName, dataId(), current, dt); @@ -141,7 +141,7 @@ void collect(current:(Variant) ` ( <{TypeArg ","}* arguments> "); + dt.md5 = normalizedMD5Hash(""); c.define(fieldName, fieldId(), ta.name, dt); } } @@ -152,7 +152,7 @@ void collect(current:(Variant) ` ( <{TypeArg ","}* arguments> "); + dt.md5 = normalizedMD5Hash(""); c.define(fieldName, keywordFieldId(), kwf.name, dt); } @@ -166,7 +166,7 @@ void collect(current:(Variant) ` ( <{TypeArg ","}* arguments> "); + dt.md5 = normalizedMD5Hash(""); if(!isEmpty(tagsMap)) dt.tags = tagsMap; vname = prettyPrintName(var.name); if(isWildCard(vname)){ @@ -217,7 +217,7 @@ void collect(current: (Declaration) ` anno "); + dt.md5 = normalizedMD5Hash(""); if(!isEmpty(tagsMap)) dt.tags = tagsMap; // if(isWildCard(pname)){ // c.report(error(name, "Annotation names starting with `_` are deprecated; only allowed to suppress warning on unused variables")); @@ -236,7 +236,7 @@ void collect(current: (KeywordFormal) ` = "); c.define(kwformalName, keywordFormalId(), current, dt); c.calculate("keyword formal", current, [kwType, expression], AType(Solver s){ @@ -419,7 +419,7 @@ void collect(current: (FunctionDeclaration) ``, Collec endUseBoundedTypeParameters(c); - dt.md5 = md5Hash(md5Contrib); + dt.md5 = normalizedMD5Hash(md5Contrib); c.defineInScope(parentScope, prettyPrintName(fname), functionId(), current, dt); c.leaveScope(decl); c.pop(currentFunction); @@ -719,7 +719,7 @@ void collect (current: (Declaration) ` alias // c.report(warning(name, "Alias names starting with `_` are deprecated; only allowed to suppress warning on unused variables")); // } - c.define(aliasName, aliasId(), current, defType([base], AType(Solver s) { return s.getType(base); })[md5 = md5Hash("")]); + c.define(aliasName, aliasId(), current, defType([base], AType(Solver s) { return s.getType(base); })[md5 = normalizedMD5Hash("")]); c.enterScope(current); collect(tags, base, c); c.leaveScope(current); @@ -754,7 +754,7 @@ void collect (current: (Declaration) ` alias } return aalias(aliasName, params, s.getType(base)); - })[md5 = md5Hash("")]); + })[md5 = normalizedMD5Hash("")]); collect(tags, c); diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectSyntaxDeclaration.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectSyntaxDeclaration.rsc index 6a474472b4..5ca15c5903 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectSyntaxDeclaration.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectSyntaxDeclaration.rsc @@ -78,7 +78,7 @@ void declareSyntax(SyntaxDefinition current, SyntaxRole syntaxRole, IdRole idRol dt = defType(/*current is language && current.\start is present ? \start(nonterminalType) : */nonterminalType); dt.vis = vis; - dt.md5 = md5Hash("" : "">"); + dt.md5 = normalizedMD5Hash("" : "">"); syndefCounter += 1; // Define the syntax symbol itself and all labelled alternatives as constructors @@ -199,7 +199,7 @@ void collect(current: (Prod) ` : ")); } else throw "Unexpected type of production: "; - })[md5=md5Hash("")]); + })[md5=normalizedMD5Hash("")]); beginUseTypeParameters(c,closed=true); c.push(currentAlternative, ", syms>); collect(symbols, c); @@ -271,7 +271,7 @@ void collect(current: (Prod) ` | `, Collector c){ c.pop(inAlternative); if(isEmpty(c.getStack(inAlternative))){ nalternatives += 1; - c.define("alternative-", nonterminalId(), current, defType(current)[md5=md5Hash(unparseNoLayout(current))]); + c.define("alternative-", nonterminalId(), current, defType(current)[md5=normalizedMD5Hash("")]); } } else { throw "collect alt: currentAdt not found"; diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectType.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectType.rsc index 3bbdd3d0af..04b3443847 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectType.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/CollectType.rsc @@ -570,16 +570,11 @@ void collect(current:(Sym) `start [ ]`, Collector c){ collect(n, c); } -str unparseNoLayout(Tree t){ - s = ""; - return "<}>"; -} - void collect(current:(Sym) ` `, Collector c){ un = unescape(""); md5Contrib = ""; if(!isEmpty(c.getStack(currentAlternative)) && := c.top(currentAlternative)){ - md5Contrib += ""; + md5Contrib += ""; } else { throw "Cannot compute md5 for "; } @@ -589,7 +584,7 @@ void collect(current:(Sym) ` `, Collector c){ AType(Solver s){ res = s.getType(symbol)[alabel=un]; return res; - })[md5=md5Hash("")]); + })[md5=normalizedMD5Hash("")]); c.fact(current, n); collect(symbol, c); diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/BinaryDependencyTests.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/BinaryDependencyTests.rsc index f8ec3b8d6f..c0925d6860 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/BinaryDependencyTests.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/BinaryDependencyTests.rsc @@ -182,6 +182,7 @@ TModel check(str mname, RascalCompilerConfig cfg){ // --- Tests for source libraries -------------------------------------------- +@ignore{Loads TModel with version 2.0.0 while it is 3.0.0 since a22dcd4416. TODO Make this test more robust.} test bool importSimpleSourceModuleWithRascalAsLib(){ libName = "test-lib"; lib = diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/ChangeAndHashTests.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/ChangeAndHashTests.rsc index 4122d1b3ba..1fceb9d536 100644 --- a/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/ChangeAndHashTests.rsc +++ b/src/org/rascalmpl/compiler/lang/rascalcore/check/tests/ChangeAndHashTests.rsc @@ -311,6 +311,12 @@ test bool consFieldLayoutChanged1() test bool consFieldLayoutChanged2() = expectEqual("data D = d(int n);", "data D = d (int n);"); +test bool consDifferentNewlineCount() + = expectEqual("data A = a(list[A] children\n\n);", "data A = a(list[A] children\n\n\n);"); + +test bool consDifferentNewlineChars() + = expectEqual("data A = a(list[A] children\n);", "data A = a(list[A] children\r\n);"); + // Keyword fields n and m generate separate locs, therefore we filter on constructors test bool consKwFieldChanged() diff --git a/src/org/rascalmpl/compiler/lang/rascalcore/compile/Examples/CompareTPLs.rsc b/src/org/rascalmpl/compiler/lang/rascalcore/compile/Examples/CompareTPLs.rsc new file mode 100644 index 0000000000..0834402d40 --- /dev/null +++ b/src/org/rascalmpl/compiler/lang/rascalcore/compile/Examples/CompareTPLs.rsc @@ -0,0 +1,146 @@ +@license{ +Copyright (c) 2018-2025, NWO-I CWI, Swat.engineering and Paul Klint +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +} + +module lang::rascalcore::compile::Examples::CompareTPLs + +import IO; +import List; +import Location; +import Set; +import ValueIO; +import util::FileSystem; +import util::Monitor; + +import analysis::typepal::TModel; +import lang::rascalcore::check::LogicalLocations; + +str JOB = "Comparing TPLs"; + +void main() { + compareTPLs(); +} + +@synopsis{Compare locations in two TPLs two verify only expected (OS-specific newline offset) differences.} +rel[loc, loc, loc] compareTPLs() = job(JOB, rel[loc, loc, loc](void(str, int) step) { + /* + Preconditions + 1. Make sure the right Rascal release JAR is present in the Maven repository. + 2. Compile the same Rascal release locally and copy the JARs to the local target folder. + */ + loc localTarget = |home:///swat/projects/Rascal/rascal/targetBackup/relocatedClasses|; + loc remoteTarget = |mvn://org.rascalmpl--rascal--0.41.3-RC8|; + + rel[loc, loc, loc] differentLocations = {}; + step("Finding local TPLs", 1); + allTPLs = sort(find(localTarget, "tpl"), byPathLength); + jobTodo(JOB, work=size(allTPLs)); + + for (tpl <- allTPLs) { + relTplPath = relativize(localTarget, tpl); + step("Comparing ", 1); + differentLocations += toSet(compareTPL(relTplPath, localTarget, remoteTarget)); + } + + step("Computing statistics", 1); + + set[str] filesWithDiffs = {l.parent.path | l <- differentLocations<0>}; + set[loc] defs = differentLocations<0>; + + println("Number of tested TPLs: "); + println("Found different locations in files.");; + + print("Kinds of different locations: "); + iprintln({l.scheme | l <- differentLocations<0>}); + + return differentLocations; +}, totalWork=2); + +bool byPathLength(loc a, loc b) = a.path < b.path; + +lrel[loc, loc, loc] compareTPL(loc relTplPath, loc localTargetDir, loc unixTargetDir) { + loc localTplPath = resolve(localTargetDir, relTplPath); + loc unixTplPath = resolve(unixTargetDir, relTplPath); + + if (!exists(localTplPath)) { + throw "Local TPL does not exist"; + } + if (!exists(unixTplPath)) { + throw "Unix TPL does not exist"; + } + + localTpl = readBinaryValueFile(#TModel, localTplPath); + unixTpl = readBinaryValueFile(#TModel, unixTplPath); + + differentDefs = [ | <- difference(localTpl.defines.defined, unixTpl.defines.defined)]; + if ([_, *_] := differentDefs) { + println("Differences in defs of (\): "); + iprintln(differentDefs); + println(); + } + + return differentDefs; +} + +lrel[loc, loc] difference(set[loc] lLocs, set[loc] uLocs) = + [ | <- pairs, !isEqualModuloNewlines(l, u)] + when lrel[loc, loc] pairs := zip2(sort(lLocs, lessThan), sort(uLocs, lessThan)); + +bool isEqualModuloNewlines(loc localLoc, loc unixLoc) = isRascalLogicalLoc(localLoc) + ? isEqualLogicalModuloNewlines(localLoc, unixLoc) + : isEqualPhysicalModuloNewlines(localLoc, unixLoc); + +bool isEqualLogicalModuloNewlines(loc localLoc, loc unixLoc) = localLoc == unixLoc; + +bool isEqualPhysicalModuloNewlines(loc localLoc, loc unixLoc) { + if (localLoc.uri != unixLoc.uri) { + throw "URIs not equal: vs. "; + } + + if (!localLoc.begin?) { + // Cannot say anything sensible about newlines without line information + return true; + } + + if (localLoc.begin.line == localLoc.end.line) { + // Single line + return localLoc.length == unixLoc.length + && localLoc.begin == unixLoc.begin + && localLoc.end == unixLoc.end; + } + + // Multi line + return localLoc.begin == unixLoc.begin + && localLoc.end == unixLoc.end; +} + +bool lessThan(loc a, loc b) = a.offset? && a.uri == b.uri + ? a.offset < b.offset + : a.uri < b.uri; + +bool lessThan(tuple[&A, &B] a, tuple[&A, &B] b) = a<0> != b<0> + ? lessThan(a<0>, b<0>) + : lessThan(a<1>, b<1>); diff --git a/src/org/rascalmpl/library/Prelude.java b/src/org/rascalmpl/library/Prelude.java index b85fbe8bef..f325e3c794 100644 --- a/src/org/rascalmpl/library/Prelude.java +++ b/src/org/rascalmpl/library/Prelude.java @@ -3486,6 +3486,32 @@ private boolean match(IString subject, int i, IString pattern){ return true; } + private boolean isUnicodeWhitespace(Integer cp) { + return Character.isSpaceChar(cp) + // Check for characters not included in 'space chars', but considered white space + || cp == 0x0009 // \t + || cp == 0x000A // \n + || cp == 0x000B // VT + || cp == 0x000C // FF + || cp == 0x000D // \r + || cp == 0x0085;// NEL + } + + public IString removeWhitespace(IString str) { + StringBuilder b = new StringBuilder(str.length()); + var iter = str.iterator(); + + while (iter.hasNext()) { + var codepoint = iter.next(); + // Character.isWhitespace does not cover the complete range of Unicode whitespace + if (!isUnicodeWhitespace(codepoint)) { + b.appendCodePoint(codepoint); + } + } + + return values.string(b.toString()); + } + public IValue replaceAll(IString str, IString find, IString replacement){ int fLength = find.length(); if(fLength == 0){ diff --git a/src/org/rascalmpl/library/String.rsc b/src/org/rascalmpl/library/String.rsc index 8394031481..589af7d77b 100644 --- a/src/org/rascalmpl/library/String.rsc +++ b/src/org/rascalmpl/library/String.rsc @@ -254,6 +254,21 @@ public str left(str s, int n, str pad) } +@synopsis{Remove all whitespace from a string.} +@description{ +Return a copy of `subject` in which all occurrences of Unicode whitespace characters have been removed. +} +@examples{ +```rascal-shell +import String; +removeWhitespace("\rabra\ncada bra\t"); +removeWhitespace("Uni\u1680code") +``` +} +@javaClass{org.rascalmpl.library.Prelude} +public java str removeWhitespace(str subject); + + @synopsis{Replace all occurrences of a string in another string.} @description{ Return a copy of `subject` in which all occurrences of `find` (if any) have been replaced by `replacement`. diff --git a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings2.rsc b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings2.rsc index d504ac9c56..211928345b 100644 --- a/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings2.rsc +++ b/src/org/rascalmpl/library/lang/rascal/tests/basic/Strings2.rsc @@ -14,3 +14,21 @@ test bool tstWrap(str S1 , str S2) { n = max(size(S1), size(S2)) + 2; return trim(S) == trim(replaceAll(wrap(S, n), getLineSeparator(), " ")); } + +private set[str] UNICODE_WS = { + "\u0009", "\u000A", "\u000B", "\u000C", "\u000D", + "\u0020", + "\u0085", + "\u00A0", + "\u1680", + "\u2000", "\u2001", "\u2002", "\u2003", "\u2004", "\u2005", "\u2006", "\u2007", "\u2008", "\u2009", "\u200A", + "\u2028", "\u2029", + "\u202F", + "\u205F", + "\u3000" +}; + +test bool tstRemoveWhitespace1(str S1) + = removeWhitespace(S1) == "<}>"; +test bool tstRemoveWhitespace2(str S1) + = size(removeWhitespace(S1)) <= size(S1);