From 6ab19f7dfc2d3216a61eb5a3352b141732e9eea6 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Thu, 6 Jun 2024 21:33:52 +0100 Subject: [PATCH 1/5] Reallocate large+small buffers at the end of every module RSS decreases. If the OS is under memory pressure, it will not be swapping out useless pages. The SortBlocks contain pointers into the master thread's lBuffer, which need updating after reallocation. Use an "UpdateSortBlocks" function to do this, which is a trimmed-down version of IniSortBlocks which only sets the pointers. --- sources/declare.h | 1 + sources/execute.c | 31 +++++++++++++++++++++ sources/threads.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/sources/declare.h b/sources/declare.h index 5a8c3b3a..27ad3f14 100644 --- a/sources/declare.h +++ b/sources/declare.h @@ -1451,6 +1451,7 @@ extern int MasterWaitThread(int); extern void WakeupMasterFromThread(int,int); extern int LoadReadjusted(VOID); extern int IniSortBlocks(int); +extern int UpdateSortBlocks(int); extern int TreatIndexEntry(PHEAD LONG); extern WORD GetTerm2(PHEAD WORD *); extern void SetHideFiles(VOID); diff --git a/sources/execute.c b/sources/execute.c index 96947256..bef7a9b3 100644 --- a/sources/execute.c +++ b/sources/execute.c @@ -1046,6 +1046,37 @@ if ( AC.SwitchInArray > 0 ) { AC.MultiBracketBuf = 0; } + /* Reallocate the sort buffers to reduce resident set usage */ + /* AT.SS is the same as AT.S0 here */ + SORTING* S = AT.S0; + M_free(S->lBuffer, "SortReallocate lBuffer+sBuffer"); + S->lBuffer = Malloc1(sizeof(*(S->lBuffer))*(S->LargeSize+S->SmallEsize), "SortReallocate lBuffer+sBuffer"); + S->lTop = S->lBuffer+S->LargeSize; + S->sBuffer = S->lTop; + if ( S->LargeSize == 0 ) { S->lBuffer = 0; S->lTop = 0; } + S->sTop = S->sBuffer + S->SmallSize; + S->sTop2 = S->sBuffer + S->SmallEsize; + S->sHalf = S->sBuffer + (LONG)((S->SmallSize+S->SmallEsize)>>1); + +#ifdef WITHPTHREADS + /* We have to re-set the pointers into master lBuffer in the SortBlocks */ + UpdateSortBlocks(AM.totalnumberofthreads-1); + + /* The SortBots do not have a real sort buffer to reallocate. */ + /* AB[0] has been reallocated above already. */ + for ( i = 1; i < AM.totalnumberofthreads; i++ ) { + SORTING* S = AB[i]->T.S0; + M_free(S->lBuffer, "SortReallocate lBuffer+sBuffer"); + S->lBuffer = Malloc1(sizeof(*(S->lBuffer))*(S->LargeSize+S->SmallEsize), "SortReallocate lBuffer+sBuffer"); + S->lTop = S->lBuffer+S->LargeSize; + S->sBuffer = S->lTop; + if ( S->LargeSize == 0 ) { S->lBuffer = 0; S->lTop = 0; } + S->sTop = S->sBuffer + S->SmallSize; + S->sTop2 = S->sBuffer + S->SmallEsize; + S->sHalf = S->sBuffer + (LONG)((S->SmallSize+S->SmallEsize)>>1); + } +#endif + return(RetCode); } diff --git a/sources/threads.c b/sources/threads.c index 5296efe5..89bf4f0f 100644 --- a/sources/threads.c +++ b/sources/threads.c @@ -4769,6 +4769,75 @@ int IniSortBlocks(int numworkers) /* #] IniSortBlocks : + #[ UpdateSortBlocks : +*/ + +/** + * A version of IniSortBlocks which only updates the pointers in the master's + * buffer, to be used after reallocation of that buffer. + */ +int UpdateSortBlocks(int numworkers) +{ + ALLPRIVATES *B; + SORTING *S; + LONG totalsize, workersize, blocksize, numberofterms; + int maxter, id, j; + int numberofblocks = NUMBEROFBLOCKSINSORT, numparts; + WORD *w; + + if ( numworkers == 0 ) return(0); + +#ifdef WITHSORTBOTS + if ( numworkers > 2 ) { + numparts = 2*numworkers - 2; + numberofblocks = numberofblocks/2; + } + else { + numparts = numworkers; + } +#else + numparts = numworkers; +#endif + S = AM.S0; + totalsize = S->LargeSize + S->SmallEsize; + workersize = totalsize / numparts; + maxter = AM.MaxTer/sizeof(WORD); + blocksize = ( workersize - maxter )/numberofblocks; + numberofterms = blocksize / maxter; + if ( numberofterms < MINIMUMNUMBEROFTERMS ) { +/* + This should have been taken care of in RecalcSetups. +*/ + MesPrint("We have a problem with the size of the blocks in UpdateSortBlocks"); + Terminate(-1); + } +/* + Layout: For each worker + block 0: size is maxter WORDS + numberofblocks blocks of size blocksize WORDS +*/ + w = S->lBuffer; + if ( w == 0 ) w = S->sBuffer; + for ( id = 1; id <= numparts; id++ ) { + B = AB[id]; + AT.SB.MasterFill[0] = AT.SB.MasterStart[0] = w; + w += maxter; + AT.SB.MasterStop[0] = w; + for ( j = 1; j <= numberofblocks; j++ ) { + AT.SB.MasterFill[j] = AT.SB.MasterStart[j] = w; + w += blocksize; + AT.SB.MasterStop[j] = w; + } + } + if ( w > S->sTop2 ) { + MesPrint("Counting problem in UpdateSortBlocks"); + Terminate(-1); + } + return(0); +} + +/* + #] UpdateSortBlocks : #[ DefineSortBotTree : */ From 07b2c96664adf3e8092bc174fa7bf87fdf42e808 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Thu, 13 Jun 2024 09:53:34 +0100 Subject: [PATCH 2/5] Add On/Off option "sortreallocate" to control buffer reallocation. --- sources/compcomm.c | 1 + sources/execute.c | 44 +++++++++++++++++++++++--------------------- sources/structs.h | 1 + 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/sources/compcomm.c b/sources/compcomm.c index b9ab8505..648311e8 100644 --- a/sources/compcomm.c +++ b/sources/compcomm.c @@ -136,6 +136,7 @@ static KEYWORDV onoffoptions[] = { ,{"oldgcd", &(AC.OldGCDflag), 1, 0} ,{"innertest", &(AC.InnerTest), 1, 0} ,{"wtimestats", &(AC.WTimeStatsFlag), 1, 0} + ,{"sortreallocate", &(AC.SortReallocateFlag), 1, 0} }; static WORD one = 1; diff --git a/sources/execute.c b/sources/execute.c index bef7a9b3..51434238 100644 --- a/sources/execute.c +++ b/sources/execute.c @@ -1046,26 +1046,10 @@ if ( AC.SwitchInArray > 0 ) { AC.MultiBracketBuf = 0; } - /* Reallocate the sort buffers to reduce resident set usage */ - /* AT.SS is the same as AT.S0 here */ - SORTING* S = AT.S0; - M_free(S->lBuffer, "SortReallocate lBuffer+sBuffer"); - S->lBuffer = Malloc1(sizeof(*(S->lBuffer))*(S->LargeSize+S->SmallEsize), "SortReallocate lBuffer+sBuffer"); - S->lTop = S->lBuffer+S->LargeSize; - S->sBuffer = S->lTop; - if ( S->LargeSize == 0 ) { S->lBuffer = 0; S->lTop = 0; } - S->sTop = S->sBuffer + S->SmallSize; - S->sTop2 = S->sBuffer + S->SmallEsize; - S->sHalf = S->sBuffer + (LONG)((S->SmallSize+S->SmallEsize)>>1); - -#ifdef WITHPTHREADS - /* We have to re-set the pointers into master lBuffer in the SortBlocks */ - UpdateSortBlocks(AM.totalnumberofthreads-1); - - /* The SortBots do not have a real sort buffer to reallocate. */ - /* AB[0] has been reallocated above already. */ - for ( i = 1; i < AM.totalnumberofthreads; i++ ) { - SORTING* S = AB[i]->T.S0; + if ( AC.SortReallocateFlag ) { + /* Reallocate the sort buffers to reduce resident set usage */ + /* AT.SS is the same as AT.S0 here */ + SORTING* S = AT.S0; M_free(S->lBuffer, "SortReallocate lBuffer+sBuffer"); S->lBuffer = Malloc1(sizeof(*(S->lBuffer))*(S->LargeSize+S->SmallEsize), "SortReallocate lBuffer+sBuffer"); S->lTop = S->lBuffer+S->LargeSize; @@ -1074,8 +1058,26 @@ if ( AC.SwitchInArray > 0 ) { S->sTop = S->sBuffer + S->SmallSize; S->sTop2 = S->sBuffer + S->SmallEsize; S->sHalf = S->sBuffer + (LONG)((S->SmallSize+S->SmallEsize)>>1); - } + +#ifdef WITHPTHREADS + /* We have to re-set the pointers into master lBuffer in the SortBlocks */ + UpdateSortBlocks(AM.totalnumberofthreads-1); + + /* The SortBots do not have a real sort buffer to reallocate. */ + /* AB[0] has been reallocated above already. */ + for ( i = 1; i < AM.totalnumberofthreads; i++ ) { + SORTING* S = AB[i]->T.S0; + M_free(S->lBuffer, "SortReallocate lBuffer+sBuffer"); + S->lBuffer = Malloc1(sizeof(*(S->lBuffer))*(S->LargeSize+S->SmallEsize), "SortReallocate lBuffer+sBuffer"); + S->lTop = S->lBuffer+S->LargeSize; + S->sBuffer = S->lTop; + if ( S->LargeSize == 0 ) { S->lBuffer = 0; S->lTop = 0; } + S->sTop = S->sBuffer + S->SmallSize; + S->sTop2 = S->sBuffer + S->SmallEsize; + S->sHalf = S->sBuffer + (LONG)((S->SmallSize+S->SmallEsize)>>1); + } #endif + } return(RetCode); } diff --git a/sources/structs.h b/sources/structs.h index 08c47398..c72aa46a 100644 --- a/sources/structs.h +++ b/sources/structs.h @@ -1867,6 +1867,7 @@ struct C_const { int MemDebugFlag; /* Only used when MALLOCDEBUG in tools.c */ int OldGCDflag; int WTimeStatsFlag; + int SortReallocateFlag; /* Controls reallocation of large+small buffer at module end */ int doloopstacksize; int dolooplevel; int CheckpointFlag; /**< Tells preprocessor whether checkpoint code must executed. From 69cecdb74d59c6103e930656257bac207b785177 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Thu, 13 Jun 2024 21:39:12 +0100 Subject: [PATCH 3/5] Add preprocessor instruction #sortreallocate Enable the reallocation for a single module. If specified in the same module as "Off sortreallocate;", the reallocation will still happen in that module. --- sources/compcomm.c | 8 ++++++++ sources/declare.h | 1 + sources/execute.c | 5 +++++ sources/pre.c | 18 ++++++++++++++++++ sources/structs.h | 5 ++++- 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/sources/compcomm.c b/sources/compcomm.c index 648311e8..d76973ad 100644 --- a/sources/compcomm.c +++ b/sources/compcomm.c @@ -628,6 +628,14 @@ int CoOff(UBYTE *s) AC.TestValue = 0; } } + else if ( StrICont(t,(UBYTE *)"sortreallocate") == 0 ) { + if ( AC.SortReallocateFlag == 2 ) { + /* The flag has been set by #sortreallocate, and it was off before. Leave it as 2, + so that the reallocation still happens in the current module. It will be turned + off after the reallocation is done. */ + return(0); + } + } *s = c; *onoffoptions[i].var = onoffoptions[i].flags; AR.SortType = AC.SortType; diff --git a/sources/declare.h b/sources/declare.h index 27ad3f14..af21b923 100644 --- a/sources/declare.h +++ b/sources/declare.h @@ -957,6 +957,7 @@ extern int DoPrePrintTimes(UBYTE *); extern int DoPreWrite(UBYTE *); extern int DoPreClose(UBYTE *); extern int DoPreRemove(UBYTE *); +extern int DoPreSortReallocate(UBYTE *); extern int DoCommentChar(UBYTE *); extern int DoPrcExtension(UBYTE *); extern int DoPreReset(UBYTE *); diff --git a/sources/execute.c b/sources/execute.c index 51434238..9c1f29b6 100644 --- a/sources/execute.c +++ b/sources/execute.c @@ -1078,6 +1078,11 @@ if ( AC.SwitchInArray > 0 ) { } #endif } + if ( AC.SortReallocateFlag == 2 ) { + /* The Flag was set for a single module by the preprocessor #sortreallocate, + so turn it off again. */ + AC.SortReallocateFlag = 0; + } return(RetCode); } diff --git a/sources/pre.c b/sources/pre.c index 4ba61bab..c62ca5b5 100644 --- a/sources/pre.c +++ b/sources/pre.c @@ -104,6 +104,7 @@ static KEYWORD precommands[] = { ,{"setrandom" , DoSetRandom , 0, 0} ,{"show" , DoPreShow , 0, 0} ,{"skipextrasymbols" , DoSkipExtraSymbols , 0, 0} + ,{"sortreallocate", DoPreSortReallocate , 0, 0} #ifdef WITHFLOAT ,{"startfloat" , DoStartFloat , 0, 0} #endif @@ -3777,6 +3778,23 @@ int DoPrePrintTimes(UBYTE *s) /* #] DoPrePrintTimes : + #[ DoPreSortReallocate : +*/ + +int DoPreSortReallocate(UBYTE *s) +{ + DUMMYUSE(s); + if ( AC.SortReallocateFlag == 0 ) { + /* Currently off, so set to 2. Then the reallocation code knows the flag was + set here, since "On sortreallocate;" sets it to 1. */ + AC.SortReallocateFlag = 2; + } + /* If the flag is already on, do nothing. */ + return(0); +} + +/* + #] DoPreSortReallocate : #[ DoPreAppend : Syntax: diff --git a/sources/structs.h b/sources/structs.h index c72aa46a..9daaa0cb 100644 --- a/sources/structs.h +++ b/sources/structs.h @@ -1867,7 +1867,10 @@ struct C_const { int MemDebugFlag; /* Only used when MALLOCDEBUG in tools.c */ int OldGCDflag; int WTimeStatsFlag; - int SortReallocateFlag; /* Controls reallocation of large+small buffer at module end */ + int SortReallocateFlag; /* Controls reallocation of large+small buffer at module end. + 0 : Off + 1 : On, every module (set by On sortreallocate;) + 2 : On, single module (set by #sortreallocate) */ int doloopstacksize; int dolooplevel; int CheckpointFlag; /**< Tells preprocessor whether checkpoint code must executed. From 675228092cc4ed4f8c69e0a756eb431a6a2c6914 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Thu, 7 Nov 2024 16:26:22 +0000 Subject: [PATCH 4/5] Add sortreallocate documentation to the manual --- doc/manual/prepro.tex | 16 ++++++++++++++++ doc/manual/setup.tex | 8 ++++++++ doc/manual/statements.tex | 12 ++++++++++++ 3 files changed, 36 insertions(+) diff --git a/doc/manual/prepro.tex b/doc/manual/prepro.tex index f391c973..5a7e5ba3 100644 --- a/doc/manual/prepro.tex +++ b/doc/manual/prepro.tex @@ -1936,6 +1936,22 @@ \section{\#skipextrasymbols} incorrect. %--#] skipextrasymbols : +%--#[ sortreallocate : + +\section{\#sortreallocate} +\label{presortreallocate} + +\noindent Syntax: + +\#sortreallocate\index{\#sortreallocate} + +\noindent See also ``On sortreallocate;'' (\ref{substaon}). + +\noindent Reallocates the small and large buffer (also on the worker threads) +at the end of the current module. In some cases this can significantly reduce +\FORM's memory usage as measured by ``resident set size''. + +%--#] sortreallocate : %--#[ switch : \section{\#switch} diff --git a/doc/manual/setup.tex b/doc/manual/setup.tex index a1bea7d0..a7b78fce 100644 --- a/doc/manual/setup.tex +++ b/doc/manual/setup.tex @@ -439,6 +439,14 @@ \chapter{The setup} with 1/N times the size of the buffer of the master. This may get made a bit bigger when potential conflicts with MaxTermSize occur. +The (typically) largest buffers (the small and large buffers) may be reallocated +at the end of a single module (see \#sortreallocate (\ref{presortreallocate})) or +at the end of each module (see ``On sortreallocate;'' (\ref{substaon})). In some +cases this can significantly reduce \FORM's memory usage as measured by +``resident set size''. For programs which consist of a large number of very +quickly-running modules, this can incur a noticable performance penalty if performed +every module. + The default settings are \begin{center} \begin{tabular}{lrrr} diff --git a/doc/manual/statements.tex b/doc/manual/statements.tex index 5c7e55e1..38a25bd7 100644 --- a/doc/manual/statements.tex +++ b/doc/manual/statements.tex @@ -3836,6 +3836,10 @@ \section{off} \rightvitem{13cm}{Takes the writing of the statistics back from shorthand mode to the regular statistics mode in which each statistics messages takes three lines of text and one blank line.} + +\leftvitem{3.5cm}{sortreallocate\index{off!sortreallocate}} +\rightvitem{13cm}{Turns off the reallocation of the small and large buffer +at the end of each module.} \leftvitem{3.5cm}{threadloadbalancing\index{off!threadloadbalancing}} \rightvitem{13cm}{\vspace{1.5ex}Disables the loadbalancing mechanism of @@ -4001,6 +4005,14 @@ \section{on} \leftvitem{3.5cm}{shortstats\index{on!shortstats}} \rightvitem{13cm}{Same as `On shortstatistics'.} +\leftvitem{3.5cm}{sortreallocate\index{on!sortreallocate}} +\rightvitem{13cm}{Reallocate the small and large buffer (also on the worker +threads) at the end of every module. In some cases this can significantly reduce +\FORM's memory usage as measured by ``resident set size''. For programs which +consist of a large number of very quickly-running modules, this can incur a +noticable performance penalty. See also \#sortreallocate (\ref{presortreallocate}) +for a single-module version of this feature.} + \leftvitem{3.5cm}{statistics\index{on!statistics}} \rightvitem{13cm}{Turns the writing of runtime statistics on. This is the default. It is possible to change this default with one of the setup From 12fa261449391b8ee23fa47eb7cd06f548d801a9 Mon Sep 17 00:00:00 2001 From: Josh Davies Date: Fri, 8 Nov 2024 10:23:48 +0000 Subject: [PATCH 5/5] Add simple tests of the sortreallocate feature --- check/features.frm | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/check/features.frm b/check/features.frm index 6793ff52..351963a5 100644 --- a/check/features.frm +++ b/check/features.frm @@ -1111,3 +1111,27 @@ format C; assert succeeded? assert !(file("out.c") =~ /[_] [+]= /) *--#] Issue392_ContinuationLines_0 : +*--#[ Sortrealloc_1 : +On sortreallocate; +Symbol x,y; +Local F = (x+y)^10; +.sort +Identify x = - y; +.sort +Print +s; +.end +assert succeeded? +assert result("F") =~ expr("0"); +*--#] Sortrealloc_1 : +*--#[ Sortrealloc_2 : +Symbol x,y; +Local F = (x+y)^10; +.sort +#sortreallocate +Identify x = - y; +.sort +Print +s; +.end +assert succeeded? +assert result("F") =~ expr("0"); +*--#] Sortrealloc_2 :