Skip to content

Commit 03f072c

Browse files
committed
feat: add StrictRounding statement to round floats and to trim excess GMP precision
- The user can specify a precision (in bits or digits) for the rounding. If no number is given, the default precision is used. - Ensures that extra internal GMP precision (used for tracking accuracy) is cut off and properly rounded - Internally the mpf_get_str and mpf_set_str are used. This means we could also implement base 2 rounding. - refactor: changed AO.floatsize such that it can now also hold floats in base 2. - This resolves #698 - added a test and documentation for strictrounding.
1 parent afebe66 commit 03f072c

File tree

9 files changed

+193
-10
lines changed

9 files changed

+193
-10
lines changed

check/features.frm

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,26 @@ assert result("ATANH") =~ expr("
14151415
- 6.08698087464190136361e-01*atanh( - 5.4321e-01)
14161416
")
14171417
*--#] evaluate_atanh :
1418+
*--#[ strictrounding :
1419+
#StartFloat 6d
1420+
CFunction f;
1421+
Local F1 = 1.23456789e-4+f(1.0)+f(1.0000001);
1422+
Print;
1423+
.sort
1424+
1425+
Skip F1;
1426+
Local F2 = F1;
1427+
StrictRounding 4d;
1428+
Argument f;
1429+
StrictRounding;
1430+
EndArgument;
1431+
Print;
1432+
.end
1433+
#pend_if wordsize == 2
1434+
assert succeeded?
1435+
assert result("F1") =~ expr("1.23457e-04 + f(1.0e+00) + f(1.0e+00)")
1436+
assert result("F2") =~ expr("1.235e-04 + 2*f(1.0e+00)")
1437+
*--#] strictrounding :
14181438
*--#[ float_error :
14191439
Evaluate;
14201440
ToFloat;

doc/manual/float.tex

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,44 @@ \chapter{Floating point}
5151
arguments are the functions mzv\_, euler\_, sqrt\_ and mzvhalf\_. If any
5252
(or more than one) of these are specified only those functions will be
5353
evaluated.
54+
\item[strictrounding] This statement rounds floating point numbers to a
55+
given precision. The syntax is
56+
\begin{verbatim}
57+
strictrounding [precision];
58+
\end{verbatim}
59+
where precision is an optional argument that specifies the rounding
60+
precision in either digits or bits, using the same syntax as
61+
\texttt{\#startfloat}. If no argument is given, this statement rounds
62+
the floating point coefficients to the default precision. Internally,
63+
the GMP and mpfr libraries may use extra precision beyond that set by
64+
\texttt{\#startfloat}. As a result, terms may not merge due to this
65+
extra precision. For example:
66+
\begin{verbatim}
67+
#startfloat 6d
68+
CFunction f;
69+
Local F = f(1.0)+f(1.0000001);
70+
Print;
71+
.sort
72+
\end{verbatim}
73+
results in \texttt{F = f(1.0e+00)+f(1.0e+00);}. Although it may appear
74+
that the terms should merge, the extra precision maintained by GMP
75+
prevents this, even though it is not displayed at 6 digits of
76+
precision. Using the strictrounding statement, one can force rounding
77+
to exactly 6 digits. Indeed:
78+
\begin{verbatim}
79+
Argument f;
80+
strictrounding;
81+
Argument;
82+
Print;
83+
.end
84+
\end{verbatim}
85+
results in \texttt{F = 2*f(1.0e+00);}.
86+
Notice that rounding in bits may produce unexpected results when viewed
87+
in decimal digits. For example, the decimal number 1.1e-4 cannot be
88+
represented exactly in binary. Its binary representation, up to 20 bits of precision, is
89+
$1.1100110101011111101*2^{-14}$. When rounded to 5 bits, this becomes
90+
$1.1101*2^{-14}$, which in decimal digits appears as
91+
1.10626220703125e-04.
5492
\item[Format floatprecision] This instruction controls how many digits are
5593
displayed when printing floating point numbers. It only affects output
5694
formatting and does not influence the internal precision or accuracy of

doc/manual/statements.tex

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5357,6 +5357,18 @@ \section{splitlastarg}
53575357
specified all arguments will be treated. \vspace{10mm}
53585358

53595359
%--#] splitlastarg :
5360+
%--#[ strictrounding :
5361+
\section{strictrounding}
5362+
\label{substaevaluate}
5363+
5364+
\noindent \begin{tabular}{ll}
5365+
Type & Executable statement\\
5366+
Syntax & strictrounding [$<$precision$>$]; \\
5367+
\end{tabular} \vspace{4mm}
5368+
5369+
\noindent See chapter~\ref{floatingpoint} on the floating point capability.
5370+
5371+
%--#] strictrounding :
53605372
%--#[ stuffle :
53615373
%
53625374
\section{stuffle}

sources/compcomm.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ int CoFormat(UBYTE *s)
373373
#ifdef WITHFLOAT
374374
else if ( key->flags == 5 ) {
375375
/*
376-
Syntax: Format FloatPrecision number;
376+
Syntax: Format FloatPrecision [precision];
377377
Format FloatPrecision off;
378378
*/
379379
while ( FG.cTable[*s] == 0 ) s++;
@@ -391,17 +391,15 @@ int CoFormat(UBYTE *s)
391391
}
392392
else if ( FG.cTable[*s] == 1 ) {
393393
ss = s;
394-
AO.FloatPrec = 0;
395-
while ( *s <= '9' && *s >= '0' )
396-
AO.FloatPrec = 10*AO.FloatPrec + (*s++ - '0');
397-
while ( *s == ' ' || *s == '\t' || *s == ',' ) s++;
394+
ParseNumber(AO.FloatPrec,s)
398395
/*
399396
The precision can either be in digits or bits.
400397
AO.FloatPrec is in digits.
401398
*/
402399
if ( tolower(*s) == 'd' ) { s++; }
403400
else if ( tolower(*s) == 'b' ) { AO.FloatPrec = AO.FloatPrec*log10(2.0); s++; }
404401
else { s = ss; goto WrongOption; }
402+
while ( *s == ' ' || *s == '\t' || *s == ',' ) s++;
405403
if ( *s ) { s = ss; goto WrongOption; }
406404
}
407405
else {

sources/compiler.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ static KEYWORD com2commands[] = {
218218
,{"splitarg", (TFUN)CoSplitArg, STATEMENT, PARTEST}
219219
,{"splitfirstarg", (TFUN)CoSplitFirstArg, STATEMENT, PARTEST}
220220
,{"splitlastarg", (TFUN)CoSplitLastArg, STATEMENT, PARTEST}
221+
#ifdef WITHFLOAT
222+
,{"strictrounding", (TFUN)CoStrictRounding, STATEMENT, PARTEST}
223+
#endif
221224
,{"stuffle", (TFUN)CoStuffle, STATEMENT, PARTEST}
222225
,{"sum", (TFUN)CoSum, STATEMENT, PARTEST}
223226
,{"switch", (TFUN)CoSwitch, STATEMENT, PARTEST}

sources/declare.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,6 +1743,8 @@ SBYTE *ReadFloat(SBYTE *);
17431743
UBYTE *CheckFloat(UBYTE *,int *);
17441744
void SetfFloatPrecision(LONG);
17451745
int EvaluateFun(PHEAD WORD *, WORD, WORD *);
1746+
int CoStrictRounding(UBYTE *);
1747+
int StrictRounding(PHEAD WORD *, WORD, WORD, WORD);
17461748
#endif
17471749

17481750
/*

sources/float.c

Lines changed: 110 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -902,10 +902,13 @@ UBYTE *CheckFloat(UBYTE *ss, int *spec)
902902
#[ Float Routines :
903903
#[ SetFloatPrecision :
904904
905-
We set the default precision of the floats and allocate
906-
space for an output string if we want to write the float.
907-
Space needed: exponent: up to 12 chars.
908-
mantissa 2+10*prec/33 + a little bit extra.
905+
Sets the default precision (in bits) of the floats and allocate
906+
buffer space for an output string.
907+
The buffer is used by PrintFloat (decimal output) and Strictrounding
908+
(binary or decimal output), so it must accommodate the larger space
909+
requirement:
910+
exponent: up to 12 chars.
911+
mantissa: prec + a little bit extra
909912
*/
910913

911914
int SetFloatPrecision(WORD prec)
@@ -917,7 +920,7 @@ int SetFloatPrecision(WORD prec)
917920
else {
918921
AC.DefaultPrecision = prec;
919922
if ( AO.floatspace != 0 ) M_free(AO.floatspace,"floatspace");
920-
AO.floatsize = ((10*prec)/33+20)*sizeof(char);
923+
AO.floatsize = (prec+20)*sizeof(char);
921924
AO.floatspace = (UBYTE *)Malloc1(AO.floatsize,"floatspace");
922925
mpf_set_default_prec(prec);
923926
return(0);
@@ -1348,6 +1351,50 @@ int CoToRat(UBYTE *s)
13481351

13491352
/*
13501353
#] CoToRat :
1354+
#[ CoStrictRounding :
1355+
1356+
Syntax: StrictRounding [precision][base]
1357+
- precision: number of digits to round to (optional)
1358+
- base: 'd' for decimal (base 10) or 'b' for binary (base 2)
1359+
1360+
If no arguments are provided, uses default precision with binary base.
1361+
*/
1362+
int CoStrictRounding(UBYTE *s)
1363+
{
1364+
GETIDENTITY
1365+
WORD x;
1366+
int base;
1367+
if ( AT.aux_ == 0 ) {
1368+
MesPrint("&Illegal attempt for strict rounding without activating floating point numbers.");
1369+
MesPrint("&Forgotten %#startfloat instruction?");
1370+
return(1);
1371+
}
1372+
while ( *s == ' ' || *s == ',' || *s == '\t' ) s++;
1373+
if ( *s == 0 ) {
1374+
/* No subkey, which means round to default precision */
1375+
x = AC.DefaultPrecision - AC.MaxWeight - 1;
1376+
Add4Com(TYPESTRICTROUNDING,x,2);
1377+
return(0);
1378+
}
1379+
if ( FG.cTable[*s] == 1 ) { /* number */
1380+
ParseNumber(x,s)
1381+
if ( tolower(*s) == 'd' ) { base = 10; s++; } /* decimal base */
1382+
else if ( tolower(*s) == 'b' ){ base = 2; s++; } /* binary base */
1383+
else goto IllPar; /* invalid base specification */
1384+
}
1385+
while ( *s == ' ' || *s == ',' || *s == '\t' ) s++;
1386+
1387+
/* Check for invalid arguments */
1388+
if ( *s ) {
1389+
IllPar:
1390+
MesPrint("&Illegal argument(s) in StrictRounding statement: '%s'",s);
1391+
return(1);
1392+
}
1393+
Add4Com(TYPESTRICTROUNDING,x,base);
1394+
return(0);
1395+
}
1396+
/*
1397+
#] CoStrictRounding :
13511398
#[ ToFloat :
13521399
13531400
Converts the coefficient to floating point if it is still a rat.
@@ -1416,6 +1463,64 @@ int ToRat(PHEAD WORD *term, WORD level)
14161463

14171464
/*
14181465
#] ToRat :
1466+
#[ StrictRounding :
1467+
1468+
Rounds floating point numbers to a specified precision
1469+
in a given base (decimal or binary).
1470+
*/
1471+
int StrictRounding(PHEAD WORD *term, WORD level, WORD prec, WORD base) {
1472+
WORD *t,*tstop;
1473+
int sign,size,maxprec = AC.DefaultPrecision-AC.MaxWeight-1;
1474+
/* maxprec is in bits */
1475+
if ( base == 2 && prec > maxprec ) {
1476+
prec = maxprec;
1477+
}
1478+
if ( base == 10 && prec > (int)(maxprec*log10(2.0)) ) {
1479+
prec = maxprec*log10(2.0);
1480+
}
1481+
/* Find the float which should be at the end. */
1482+
tstop = term + *term; size = ABS(tstop[-1]);
1483+
sign = tstop[-1] < 0 ? -1: 1; tstop -= size;
1484+
t = term+1;
1485+
while ( t < tstop ) {
1486+
if ( *t == FLOATFUN && t + t[1] == tstop && TestFloat(t) &&
1487+
size == 3 && tstop[0] == 1 && tstop[1] == 1) {
1488+
break;
1489+
}
1490+
t += t[1];
1491+
}
1492+
if ( t < tstop ) {
1493+
/*
1494+
Now t points at the float_ function and everything is correct.
1495+
The result can go straight over the float_ function.
1496+
*/
1497+
char *s;
1498+
mp_exp_t exp;
1499+
/* Extract the floating point value */
1500+
UnpackFloat(aux4,t);
1501+
/* Convert to string:
1502+
- Format as MeN with M the mantissa and N the exponent
1503+
- the generated string by mpf_get_str is the fraction/mantissa with
1504+
an implicit radix point immediately to the left of the first digit.
1505+
The applicable exponent is written in exp. */
1506+
s = (char *)AO.floatspace;
1507+
*s++ = '.';
1508+
mpf_get_str(s,&exp, base, prec, aux4);
1509+
while ( *s != 0 ) s++;
1510+
*s++ = 'e';
1511+
snprintf(s,AO.floatsize-(s-(char *)AO.floatspace),"%ld",exp);
1512+
/* Negative base values are used to specify that the exponent is in decimal */
1513+
mpf_set_str(aux4,(char *)AO.floatspace,-base);
1514+
/* Pack the rounded floating point value back into the term */
1515+
PackFloat(t,aux4);
1516+
t+=t[1];
1517+
*t++ = 1; *t++ = 1; *t++ = 3*sign;
1518+
*term = t - term;
1519+
}
1520+
return(Generator(BHEAD term,level));
1521+
}
1522+
/*
1523+
#] StrictRounding :
14191524
#] Float Routines :
14201525
#[ Sorting :
14211526

sources/ftypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ typedef int (*TFUN1)();
605605
#define TYPEEXPAND 88
606606
#define TYPETOFLOAT 89
607607
#define TYPETORAT 90
608+
#define TYPESTRICTROUNDING 91
608609
#endif
609610

610611
/*

sources/proces.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3990,6 +3990,10 @@ SkipCount: level++;
39903990
AT.WorkPointer = term + *term;
39913991
if ( ToRat(BHEAD term,level) ) goto GenCall;
39923992
goto Return0;
3993+
case TYPESTRICTROUNDING:
3994+
AT.WorkPointer = term + *term;
3995+
if ( StrictRounding(BHEAD term,level,C->lhs[level][2],C->lhs[level][3]) ) goto GenCall;
3996+
goto Return0;
39933997
#endif
39943998
}
39953999
goto SkipCount;

0 commit comments

Comments
 (0)