feat: add StrictRounding statement to round floats and to trim excess GMP precision

cbmarini · cbmarini · commit 03f072c4459e · 2025-10-28T11:44:13.000+01:00
- The user can specify a precision (in bits or digits) for the rounding. If no number is given, the default precision is used. - Ensures that extra internal GMP precision (used for tracking accuracy) is cut off and properly rounded - Internally the mpf_get_str and mpf_set_str are used. This means we could also implement base 2 rounding. - refactor: changed AO.floatsize such that it can now also hold floats in base 2. - This resolves #698 - added a test and documentation for strictrounding.
diff --git a/check/features.frm b/check/features.frm
@@ -1415,6 +1415,26 @@ assert result("ATANH") =~ expr("
        - 6.08698087464190136361e-01*atanh( - 5.4321e-01)
 ")
 *--#] evaluate_atanh : 
+*--#[ strictrounding :
+#StartFloat 6d
+CFunction f;
+Local F1 = 1.23456789e-4+f(1.0)+f(1.0000001);
+Print;
+.sort
+
+Skip F1;
+Local F2 = F1;
+StrictRounding 4d;
+Argument f;
+	StrictRounding;
+EndArgument;
+Print;
+.end
+#pend_if wordsize == 2
+assert succeeded?
+assert result("F1") =~ expr("1.23457e-04 + f(1.0e+00) + f(1.0e+00)")
+assert result("F2") =~ expr("1.235e-04 + 2*f(1.0e+00)")
+*--#] strictrounding : 
 *--#[ float_error :
 Evaluate;
 ToFloat;
diff --git a/doc/manual/float.tex b/doc/manual/float.tex
@@ -51,6 +51,44 @@ \chapter{Floating point}
 arguments are the functions mzv\_, euler\_, sqrt\_ and mzvhalf\_. If any 
 (or more than one) of these are specified only those functions will be 
 evaluated.
+\item[strictrounding] This statement rounds floating point numbers to a 
+given precision. The syntax is
+\begin{verbatim}
+    strictrounding [precision];
+\end{verbatim}
+where precision is an optional argument that specifies the rounding 
+precision in either digits or bits, using the same syntax as 
+\texttt{\#startfloat}. If no argument is given, this statement rounds 
+the floating point coefficients to the default precision. Internally, 
+the GMP and mpfr libraries may use extra precision beyond that set by 
+\texttt{\#startfloat}. As a result, terms may not merge due to this 
+extra precision. For example:
+\begin{verbatim}
+    #startfloat 6d
+    CFunction f;
+    Local F = f(1.0)+f(1.0000001);
+    Print;
+    .sort
+\end{verbatim}
+results in \texttt{F = f(1.0e+00)+f(1.0e+00);}. Although it may appear 
+that the terms should merge, the extra precision maintained by GMP 
+prevents this, even though it is not displayed at 6 digits of 
+precision. Using the strictrounding statement, one can force rounding 
+to exactly 6 digits. Indeed:
+\begin{verbatim}
+    Argument f;
+        strictrounding;
+    Argument;
+    Print;
+    .end
+\end{verbatim}
+results in \texttt{F = 2*f(1.0e+00);}.
+Notice that rounding in bits may produce unexpected results when viewed 
+in decimal digits. For example, the decimal number 1.1e-4 cannot be 
+represented exactly in binary. Its binary representation, up to 20 bits of precision, is 
+$1.1100110101011111101*2^{-14}$. When rounded to 5 bits, this becomes 
+$1.1101*2^{-14}$, which in decimal digits appears as 
+1.10626220703125e-04.
 \item[Format floatprecision] This instruction controls how many digits are 
 displayed when printing floating point numbers. It only affects output 
 formatting and does not influence the internal precision or accuracy of 
diff --git a/doc/manual/statements.tex b/doc/manual/statements.tex
@@ -5357,6 +5357,18 @@ \section{splitlastarg}
 specified all arguments will be treated. \vspace{10mm}
 
 %--#] splitlastarg : 
+%--#[ strictrounding :
+\section{strictrounding}
+\label{substaevaluate}
+
+\noindent \begin{tabular}{ll}
+Type & Executable statement\\
+Syntax & strictrounding [$<$precision$>$]; \\ 
+\end{tabular} \vspace{4mm}
+
+\noindent See chapter~\ref{floatingpoint} on the floating point capability.
+ 
+%--#] strictrounding : 
 %--#[ stuffle :
 %
 \section{stuffle}
diff --git a/sources/compcomm.c b/sources/compcomm.c
@@ -373,7 +373,7 @@ int CoFormat(UBYTE *s)
 #ifdef WITHFLOAT
 			else if ( key->flags == 5 ) {
 /*
-				Syntax: Format FloatPrecision number;
+				Syntax: Format FloatPrecision [precision];
 				        Format FloatPrecision off;
 */
 				while ( FG.cTable[*s] == 0 ) s++;
@@ -391,17 +391,15 @@ int CoFormat(UBYTE *s)
 				}
 				else if ( FG.cTable[*s] == 1 ) {
 					ss = s;
-					AO.FloatPrec = 0;
-					while ( *s <= '9' && *s >= '0' )
-						AO.FloatPrec = 10*AO.FloatPrec + (*s++ - '0');
-					while ( *s == ' ' || *s == '\t' || *s == ',' ) s++;
+					ParseNumber(AO.FloatPrec,s)
 /*
 					The precision can either be in digits or bits. 
 					AO.FloatPrec is in digits. 
 */
 					if ( tolower(*s) == 'd' ) { s++; }
 					else if ( tolower(*s) == 'b' ) { AO.FloatPrec = AO.FloatPrec*log10(2.0); s++; }
 					else { s = ss; goto WrongOption; }
+					while ( *s == ' ' || *s == '\t' || *s == ',' ) s++;
 					if ( *s ) { s = ss; goto WrongOption; }
 				}
 				else {
diff --git a/sources/compiler.c b/sources/compiler.c
@@ -218,6 +218,9 @@ static KEYWORD com2commands[] = {
 	,{"splitarg",       (TFUN)CoSplitArg,         STATEMENT,    PARTEST}
 	,{"splitfirstarg",  (TFUN)CoSplitFirstArg,    STATEMENT,    PARTEST}
 	,{"splitlastarg",   (TFUN)CoSplitLastArg,     STATEMENT,    PARTEST}
+#ifdef WITHFLOAT
+	,{"strictrounding", (TFUN)CoStrictRounding,   STATEMENT,    PARTEST}
+#endif
 	,{"stuffle",        (TFUN)CoStuffle,          STATEMENT,    PARTEST}
 	,{"sum",            (TFUN)CoSum,              STATEMENT,    PARTEST}
 	,{"switch",         (TFUN)CoSwitch,           STATEMENT,    PARTEST}
diff --git a/sources/declare.h b/sources/declare.h
@@ -1743,6 +1743,8 @@ SBYTE *ReadFloat(SBYTE *);
 UBYTE *CheckFloat(UBYTE *,int *);
 void SetfFloatPrecision(LONG);
 int EvaluateFun(PHEAD WORD *, WORD, WORD *);
+int CoStrictRounding(UBYTE *);
+int StrictRounding(PHEAD WORD *, WORD, WORD, WORD);
 #endif
 
 /*
diff --git a/sources/float.c b/sources/float.c
@@ -902,10 +902,13 @@ UBYTE *CheckFloat(UBYTE *ss, int *spec)
   	#[ Float Routines :
  		#[ SetFloatPrecision :
 
-		We set the default precision of the floats and allocate
-		space for an output string if we want to write the float.
-		Space needed: exponent: up to 12 chars.
-		mantissa 2+10*prec/33 + a little bit extra.
+		Sets the default precision (in bits) of the floats and allocate
+		buffer space for an output string.
+		The buffer is used by PrintFloat (decimal output) and Strictrounding 
+		(binary or decimal output), so it must accommodate the larger space
+		requirement:
+		exponent: up to 12 chars.
+		mantissa: prec + a little bit extra
 */
 
 int SetFloatPrecision(WORD prec)
@@ -917,7 +920,7 @@ int SetFloatPrecision(WORD prec)
 	else {
 		AC.DefaultPrecision = prec;
 		if ( AO.floatspace != 0 ) M_free(AO.floatspace,"floatspace");
-		AO.floatsize = ((10*prec)/33+20)*sizeof(char);
+		AO.floatsize = (prec+20)*sizeof(char);
 		AO.floatspace = (UBYTE *)Malloc1(AO.floatsize,"floatspace");
 		mpf_set_default_prec(prec);
 		return(0);
@@ -1348,6 +1351,50 @@ int CoToRat(UBYTE *s)
 
 /*
  		#] CoToRat : 
+ 		#[ CoStrictRounding : 
+
+		Syntax: StrictRounding [precision][base]
+		- precision: number of digits to round to (optional)
+		- base: 'd' for decimal (base 10) or 'b' for binary (base 2)
+		
+		If no arguments are provided, uses default precision with binary base.
+*/
+int CoStrictRounding(UBYTE *s)
+{
+	GETIDENTITY
+	WORD x;
+	int base;
+	if ( AT.aux_ == 0 ) {
+		MesPrint("&Illegal attempt for strict rounding without activating floating point numbers.");
+		MesPrint("&Forgotten %#startfloat instruction?");
+		return(1);
+	}
+	while ( *s == ' ' || *s == ',' || *s == '\t' ) s++;
+	if ( *s == 0 ) {
+		/* No subkey, which means round to default precision */
+		x = AC.DefaultPrecision - AC.MaxWeight - 1;
+		Add4Com(TYPESTRICTROUNDING,x,2);
+		return(0);
+	}
+	if ( FG.cTable[*s] == 1 ) { /* number */
+		ParseNumber(x,s)
+		if ( tolower(*s) == 'd' ) { base = 10; s++; }      /* decimal base */
+		else if ( tolower(*s) == 'b' ){ base = 2; s++; }  /* binary base */
+		else goto IllPar;  /* invalid base specification */
+	}
+	while ( *s == ' ' || *s == ',' || *s == '\t' ) s++;
+	
+	/* Check for invalid arguments */
+	if ( *s ) {
+IllPar:
+		MesPrint("&Illegal argument(s) in StrictRounding statement: '%s'",s);
+		return(1);
+	}
+	Add4Com(TYPESTRICTROUNDING,x,base);
+	return(0);
+} 
+/*
+ 		#] CoStrictRounding : 
  		#[ ToFloat :
 
 		Converts the coefficient to floating point if it is still a rat.
@@ -1416,6 +1463,64 @@ int ToRat(PHEAD WORD *term, WORD level)
 
 /*
  		#] ToRat : 
+ 		#[ StrictRounding : 
+
+		Rounds floating point numbers to a specified precision
+		in a given base (decimal or binary).
+*/
+int StrictRounding(PHEAD WORD *term, WORD level, WORD prec, WORD base) {
+	WORD *t,*tstop;
+	int sign,size,maxprec = AC.DefaultPrecision-AC.MaxWeight-1;
+	/* maxprec is in bits */
+	if ( base == 2 && prec > maxprec ) {
+		prec = maxprec;
+	}
+	if ( base == 10 && prec > (int)(maxprec*log10(2.0)) ) {
+		prec = maxprec*log10(2.0);
+	}
+	/* Find the float which should be at the end. */
+	tstop = term + *term; size = ABS(tstop[-1]);
+	sign = tstop[-1] < 0 ? -1: 1; tstop -= size;
+	t = term+1;
+	while ( t < tstop ) {
+		if ( *t == FLOATFUN && t + t[1] == tstop && TestFloat(t) &&
+		size == 3 && tstop[0] == 1 && tstop[1] == 1) {
+			break;
+		}
+		t += t[1];
+	}
+	if ( t < tstop ) {
+/*
+		Now t points at the float_ function and everything is correct.
+		The result can go straight over the float_ function.
+*/
+		char *s;
+		mp_exp_t exp;
+		/* Extract the floating point value */
+		UnpackFloat(aux4,t);
+		/* Convert to string: 
+		   - Format as MeN with M the mantissa and N the exponent 
+		   - the generated string by mpf_get_str is the fraction/mantissa with 
+		   an implicit radix point immediately to the left of the first digit. 
+		   The applicable exponent is written in exp. */
+		s = (char *)AO.floatspace;
+		*s++ = '.';
+		mpf_get_str(s,&exp, base, prec, aux4);
+		while ( *s != 0 ) s++;
+		*s++ = 'e';
+		snprintf(s,AO.floatsize-(s-(char *)AO.floatspace),"%ld",exp);
+		/* Negative base values are used to specify that the exponent is in decimal */
+		mpf_set_str(aux4,(char *)AO.floatspace,-base);
+		/* Pack the rounded floating point value back into the term */
+		PackFloat(t,aux4);
+		t+=t[1];
+		*t++ = 1; *t++ = 1; *t++ = 3*sign;
+		*term = t - term;
+	}
+	return(Generator(BHEAD term,level));
+}
+/*
+ 		#] StrictRounding : 
   	#] Float Routines : 
   	#[ Sorting :
 
diff --git a/sources/ftypes.h b/sources/ftypes.h
@@ -605,6 +605,7 @@ typedef int (*TFUN1)();
 #define TYPEEXPAND 88
 #define TYPETOFLOAT 89
 #define TYPETORAT 90
+#define TYPESTRICTROUNDING 91
 #endif
 
 /*
diff --git a/sources/proces.c b/sources/proces.c
@@ -3990,6 +3990,10 @@ SkipCount:	level++;
 					AT.WorkPointer = term + *term;
 					if ( ToRat(BHEAD term,level) ) goto GenCall;
 					goto Return0;
+				  case TYPESTRICTROUNDING:
+					AT.WorkPointer = term + *term;
+					if ( StrictRounding(BHEAD term,level,C->lhs[level][2],C->lhs[level][3]) ) goto GenCall;
+					goto Return0;
 #endif
 				}
 				goto SkipCount;