Skip to content
299 changes: 295 additions & 4 deletions cpp/ql/lib/semmle/code/cpp/rangeanalysis/SimpleRangeAnalysis.qll
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,297 @@ private predicate isRecursiveExpr(Expr e) {
)
}

/**
* Provides predicates that estimate the number of bounds that the range
* analysis might produce.
*/
private module BoundsEstimate {
/**
* Gets the limit beyond which we enable widening. I.e., if the estimated
* number of bounds exceeds this limit, we enable widening such that the limit
* will not be reached.
*/
float getBoundsLimit() {
// This limit is arbitrary, but low enough that it prevents timeouts on
// specific observed customer databases (and the in the tests).
result = 2.0.pow(40)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a perfectly fine threshold to start with. FWIW when I introduce an "arbitrary threshold" like this I like to do a small amount of investigation into the underlying distribution. See for example what I did in this PR from last year where I plotted "number of nested bitwise operations" for each bitwise operation in a database. It would be interesting to see a similar plot for "number of bounds" for each expression on a database or two.

... but as I said: I think this very high arbitrary threshold is perfectly fine as a start.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. @andersfugmann also suggested that we could create a statistics query and potentially get telemetry for this to make a more quantified upper bound.

}

/** Gets the maximum number of bounds possible when widening is used. */
private int getNrOfWideningBounds() {
result =
max(ArithmeticType t | | count(wideningLowerBounds(t)).maximum(count(wideningUpperBounds(t))))
}

/**
* Holds if `boundFromGuard(guard, v, _, branch)` holds, but without
* relying on range analysis (which would cause non-monotonic recursion
* elsewhere).
*/
private predicate hasBoundFromGuard(Expr guard, VariableAccess v, boolean branch) {
exists(Expr lhs | linearAccess(lhs, v, _, _) |
relOpWithSwapAndNegate(guard, lhs, _, _, _, branch)
or
eqOpWithSwapAndNegate(guard, lhs, _, true, branch)
or
eqZeroWithNegate(guard, lhs, true, branch)
)
}

/** Holds if `def` and `v` is a guard phi node with a bound from a guard. */
predicate isGuardPhiWithBound(RangeSsaDefinition def, StackVariable v, VariableAccess access) {
exists(Expr guard, boolean branch |
def.isGuardPhi(v, access, guard, branch) and
hasBoundFromGuard(guard, access, branch)
)
}

/** Gets the number of bounds for `def` and `v` as guard phi node. */
language[monotonicAggregates]
private float nrOfBoundsPhiGuard(RangeSsaDefinition def, StackVariable v) {
// If there's different `access`es, then they refer to the same variable
// with the same lower bounds. Hence adding these guards make no sense (the
// implementation will take the union but they'll be removed by
// deduplication). Hence we use `max` as an approximation.
result =
max(VariableAccess access | isGuardPhiWithBound(def, v, access) | nrOfBoundsExpr(access))
or
def.isPhiNode(v) and
not isGuardPhiWithBound(def, v, _) and
result = 0
}

/** Gets the number of bounds for `def` and `v` as normal phi node. */
language[monotonicAggregates]
private float nrOfBoundsPhiNormal(RangeSsaDefinition def, StackVariable v) {
// The implementation
result =
strictsum(RangeSsaDefinition inputDef |
inputDef = def.getAPhiInput(v)
|
nrOfBoundsDef(inputDef, v)
)
or
def.isPhiNode(v) and
not exists(def.getAPhiInput(v)) and
result = 0
}

/** Gets the number of bounds for `def` and `v` as an NE phi node. */
private float nrOfBoundsNEPhi(RangeSsaDefinition def, StackVariable v) {
exists(VariableAccess access | isNEPhi(v, def, access, _) and result = nrOfBoundsExpr(access))
or
def.isPhiNode(v) and
not isNEPhi(v, def, _, _) and
result = 0
}

/** Gets the number of bounds for `def` and `v` as an unsupported guard phi node. */
private float nrOfBoundsUnsupportedGuardPhi(RangeSsaDefinition def, StackVariable v) {
exists(VariableAccess access |
isUnsupportedGuardPhi(v, def, access) and
result = nrOfBoundsExpr(access)
)
or
def.isPhiNode(v) and
not isUnsupportedGuardPhi(v, def, _) and
result = 0
}

private float nrOfBoundsPhi(RangeSsaDefinition def, StackVariable v) {
// The cases for phi nodes are not mutually exclusive. For instance a phi
// node can be both a guard phi node and a normal phi node. To handle this
// we sum the contributions from the different cases.
result =
nrOfBoundsPhiGuard(def, v) + nrOfBoundsPhiNormal(def, v) + nrOfBoundsNEPhi(def, v) +
nrOfBoundsUnsupportedGuardPhi(def, v) and
result != 0
}

/** Gets the estimated number of bounds for `def` and `v`. */
float nrOfBoundsDef(RangeSsaDefinition def, StackVariable v) {
// Recursive definitions are already widened, so we simply estimate them as
// having the number of widening bounds available. This is crucial as it
// ensures that we don't follow recursive cycles when calculating the
// estimate. Had that not been the case the estimate itself would be at risk
// of causing performance issues and being non-functional.
if isRecursiveDef(def, v)
then result = getNrOfWideningBounds()
else (
// Definitions with a defining value
exists(Expr defExpr | assignmentDef(def, v, defExpr) and result = nrOfBoundsExpr(defExpr))
or
// Assignment operations with a defining value
exists(AssignOperation assignOp |
def = assignOp and
assignOp.getLValue() = v.getAnAccess() and
result = nrOfBoundsExpr(assignOp)
)
or
// Phi nodes
result = nrOfBoundsPhi(def, v)
or
unanalyzableDefBounds(def, v, _, _) and result = 1
)
}

/**
* Gets a naive estimate of the number of bounds for `e`.
*
* The estimate is like an abstract interpretation of the range analysis,
* where the abstract value is the number of bounds. For instance,
* `nrOfBoundsExpr(12) = 1` and `nrOfBoundsExpr(x + y) = nrOfBoundsExpr(x) *
* nrOfBoundsExpr(y)`.
*
* The estimated number of bounds will usually be greater than the actual
* number of bounds, as the estimate can not detect cases where bounds are cut
* down when tracked precisely. For instance, in
* ```c
* int x = 1;
* if (cond) { x = 1; }
* int y = x + x;
* ```
* the actual number of bounds for `y` is 1. However, the estimate will be 4
* as the conditional assignment to `x` gives two bounds for `x` on the last
* line and the addition gives 2 * 2 bounds. There are two sources of anncuracies:
*
* 1. Without tracking the lower bounds we can't see that `x` is assigned a
* value that is equal to its lower bound.
* 2. Had the conditional assignment been `x = 2` then the estimate of two
* bounds for `x` would have been correct. However, the estimate of 4 for `y`
* would still be incorrect. Summing the actual bounds `{1,2}` with itself
* gives `{2,3,4}` which is only three bounds. Again, we can't realise this
* without tracking the bounds.
*
* Since these inaccuracies compound the estimated number of bounds can often
* be _much_ greater than the actual number of bounds. Do note though that the
* estimate is not _guaranteed_ to be an upper bound. In some cases the
* approximations might underestimate the number of bounds.
*
* This predicate is functional. This is crucial as:
*
* - It ensures that the computing the estimate itself is fast.
* - Our use of monotonic aggregates assumes functionality.
*
* Any non-functional case should be considered a bug.
*/
float nrOfBoundsExpr(Expr e) {
// Similarly to what we do for definitions, we do not attempt to measure the
// number of bounds for recursive expressions.
if isRecursiveExpr(e)
then result = getNrOfWideningBounds()
else
if analyzableExpr(e)
then
// The cases here are an abstraction of and mirrors the cases inside
// `getLowerBoundsImpl`/`getUpperBoundsImpl`.
result = 1 and exists(getValue(e).toFloat())
or
exists(Expr operand | result = nrOfBoundsExpr(operand) |
effectivelyMultipliesByPositive(e, operand, _)
or
effectivelyMultipliesByNegative(e, operand, _)
)
or
exists(ConditionalExpr condExpr |
e = condExpr and
result = nrOfBoundsExpr(condExpr.getThen()) * nrOfBoundsExpr(condExpr.getElse())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This likely doesn't give us the best possible join ordering since this recursion is non-linear, but we've never bothered to actually fix this in the main recursion of SimpleRangeAnalysis itself so it's probably all fine.

)
or
exists(BinaryArithmeticOperation binop |
e = binop and
result = nrOfBoundsExpr(binop.getLeftOperand()) * nrOfBoundsExpr(binop.getRightOperand())
|
e instanceof MaxExpr or
e instanceof MinExpr or
e instanceof AddExpr or
e instanceof SubExpr or
e instanceof UnsignedMulExpr
)
or
exists(AssignExpr assign | e = assign and result = nrOfBoundsExpr(assign.getRValue()))
or
exists(AssignArithmeticOperation assignOp |
e = assignOp and
result = nrOfBoundsExpr(assignOp.getLValue()) * nrOfBoundsExpr(assignOp.getRValue())
|
e instanceof AssignAddExpr or
e instanceof AssignSubExpr or
e instanceof UnsignedAssignMulExpr
)
or
// Handles `AssignMulByPositiveConstantExpr` and `AssignMulByNegativeConstantExpr`
exists(AssignMulByConstantExpr mulExpr |
e = mulExpr and
result = nrOfBoundsExpr(mulExpr.getLValue())
)
or
// Handles the prefix and postfix increment and decrement operators.
exists(CrementOperation crementOp |
e = crementOp and result = nrOfBoundsExpr(crementOp.getOperand())
)
or
exists(RemExpr remExpr | e = remExpr | result = nrOfBoundsExpr(remExpr.getLeftOperand()))
or
exists(Conversion convExpr |
e = convExpr and
if convExpr.getUnspecifiedType() instanceof BoolType
then result = 1
else result = nrOfBoundsExpr(convExpr.getExpr())
)
or
exists(RangeSsaDefinition def, StackVariable v |
e = def.getAUse(v) and
result = nrOfBoundsDef(def, v) and
// Avoid returning two numbers when `e` is a use with a constant value.
not exists(getValue(e).toFloat())
)
or
e instanceof UnsignedBitwiseAndExpr and
result = 1
or
exists(RShiftExpr rsExpr |
e = rsExpr and
exists(getValue(rsExpr.getRightOperand().getFullyConverted()).toInt()) and
result = nrOfBoundsExpr(rsExpr.getLeftOperand())
)
else (
exists(exprMinVal(e)) and result = 1
)
}
}

/**
* Holds if `v` is a variable for which widening should be used, as otherwise a
* very large number of bounds might be generated during the range analysis for
* `v`.
*/
private predicate varHasTooManyBounds(StackVariable v) {
exists(RangeSsaDefinition def |
def.getAVariable() = v and
BoundsEstimate::nrOfBoundsDef(def, v) > BoundsEstimate::getBoundsLimit()
)
}

/**
* Holds if `e` is an expression for which widening should be used, as otherwise
* a very large number of bounds might be generated during the range analysis
* for `e`.
*/
private predicate exprHasTooManyBounds(Expr e) {
BoundsEstimate::nrOfBoundsExpr(e) > BoundsEstimate::getBoundsLimit()
or
// A subexpressions of an expression with too many bounds may itself not have
// to many bounds. For instance, `x + y` can have too many bounds without `x`
// having as well. But in these cases, still want to consider `e` as having
// too many bounds since:
// - The overall result is widened anyway, so widening `e` as well is unlikely
// to cause further precision loss.
// - The number of bounds could be very large but still below the arbitrary
// limit. Hence widening `e` can improve performance.
exists(Expr pe | exprHasTooManyBounds(pe) and e.getParent() = pe)
}

/**
* Holds if `binop` is a binary operation that's likely to be assigned a
* quadratic (or more) number of candidate bounds during the analysis. This can
Expand Down Expand Up @@ -667,7 +958,7 @@ private float getTruncatedLowerBounds(Expr expr) {
if exprMinVal(expr) <= newLB and newLB <= exprMaxVal(expr)
then
// Apply widening where we might get a combinatorial explosion.
if isRecursiveBinary(expr)
if isRecursiveBinary(expr) or exprHasTooManyBounds(expr)
then result = widenLowerBound(expr.getUnspecifiedType(), newLB)
else result = newLB
else result = exprMinVal(expr)
Expand Down Expand Up @@ -721,7 +1012,7 @@ private float getTruncatedUpperBounds(Expr expr) {
if exprMinVal(expr) <= newUB and newUB <= exprMaxVal(expr)
then
// Apply widening where we might get a combinatorial explosion.
if isRecursiveBinary(expr)
if isRecursiveBinary(expr) or exprHasTooManyBounds(expr)
then result = widenUpperBound(expr.getUnspecifiedType(), newUB)
else result = newUB
else result = exprMaxVal(expr)
Expand Down Expand Up @@ -1812,7 +2103,7 @@ module SimpleRangeAnalysisInternal {
|
// Widening: check whether the new lower bound is from a source which
// depends recursively on the current definition.
if isRecursiveDef(def, v)
if isRecursiveDef(def, v) or varHasTooManyBounds(v)
then
// The new lower bound is from a recursive source, so we round
// down to one of a limited set of values to prevent the
Expand All @@ -1836,7 +2127,7 @@ module SimpleRangeAnalysisInternal {
|
// Widening: check whether the new upper bound is from a source which
// depends recursively on the current definition.
if isRecursiveDef(def, v)
if isRecursiveDef(def, v) or varHasTooManyBounds(v)
then
// The new upper bound is from a recursive source, so we round
// up to one of a fixed set of values to prevent the recursion
Expand Down
Loading