From 5c19aad012d3743bfebc8ebb92526b7e55c29357 Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Wed, 9 Jul 2025 14:07:01 +0100 Subject: [PATCH 1/3] Java: Make Virtual Dispatch Global, but keep SSA local. Use forceLocal to achive this. --- java/ql/lib/semmle/code/java/dataflow/SSA.qll | 10 +++++-- .../code/java/dataflow/internal/SsaImpl.qll | 26 +++++++++++++++++-- .../code/java/dispatch/DispatchFlow.qll | 2 +- .../lib/semmle/code/java/dispatch/ObjFlow.qll | 2 +- .../code/java/dispatch/VirtualDispatch.qll | 2 +- .../java/dispatch/internal/Unification.qll | 2 +- 6 files changed, 36 insertions(+), 8 deletions(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/SSA.qll b/java/ql/lib/semmle/code/java/dataflow/SSA.qll index dd902b70e35a..3f47689fd973 100644 --- a/java/ql/lib/semmle/code/java/dataflow/SSA.qll +++ b/java/ql/lib/semmle/code/java/dataflow/SSA.qll @@ -253,17 +253,18 @@ class SsaImplicitUpdate extends SsaUpdate { or if this.hasImplicitQualifierUpdate() then - if exists(this.getANonLocalUpdate()) + if isNonNonLocal(this) then result = "nonlocal + nonlocal qualifier" else result = "nonlocal qualifier" else ( - exists(this.getANonLocalUpdate()) and result = "nonlocal" + isNonNonLocal(this) and result = "nonlocal" ) } /** * Gets a reachable `FieldWrite` that might represent this ssa update, if any. */ + overlay[global] FieldWrite getANonLocalUpdate() { exists(SsaSourceField f, Callable setter | relevantFieldUpdate(setter, f.getField(), result) and @@ -287,6 +288,11 @@ class SsaImplicitUpdate extends SsaUpdate { } } +overlay[global] +private predicate isNonNonLocalImpl(SsaImplicitUpdate su) { exists(su.getANonLocalUpdate()) } + +private predicate isNonNonLocal(SsaImplicitUpdate su) = forceLocal(isNonNonLocalImpl/1)(su) + /** * An SSA variable that represents an uncertain implicit update of the value. * This is a `Call` that might reach a non-local update of the field or one of diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll b/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll index dc2cb7e7d00d..13e1aee6ad83 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll @@ -157,15 +157,20 @@ private predicate hasEntryDef(TrackedVar v, BasicBlock b) { } /** Holds if `n` might update the locally tracked variable `v`. */ +overlay[global] pragma[nomagic] -private predicate uncertainVariableUpdate(TrackedVar v, ControlFlowNode n, BasicBlock b, int i) { +private predicate uncertainVariableUpdateImpl(TrackedVar v, ControlFlowNode n, BasicBlock b, int i) { exists(Call c | c = n.asCall() | updatesNamedField(c, v, _)) and b.getNode(i) = n and hasDominanceInformation(b) or - uncertainVariableUpdate(v.getQualifier(), n, b, i) + uncertainVariableUpdateImpl(v.getQualifier(), n, b, i) } +/** Holds if `n` might update the locally tracked variable `v`. */ +predicate uncertainVariableUpdate(TrackedVar v, ControlFlowNode n, BasicBlock b, int i) = + forceLocal(uncertainVariableUpdateImpl/4)(v, n, b, i) + private module SsaInput implements SsaImplCommon::InputSig { class SourceVariable = SsaSourceVariable; @@ -335,6 +340,7 @@ private module Cached { * Constructor --(intraInstanceCallEdge)-->+ Method(setter of this.f) * ``` */ + overlay[global] private predicate intraInstanceCallEdge(Callable c1, Method m2) { exists(MethodCall ma, RefType t1 | ma.getCaller() = c1 and @@ -355,6 +361,7 @@ private module Cached { ) } + overlay[global] private Callable tgt(Call c) { result = viableImpl_v2(c) or @@ -364,11 +371,13 @@ private module Cached { } /** Holds if `(c1,c2)` is an edge in the call graph. */ + overlay[global] private predicate callEdge(Callable c1, Callable c2) { exists(Call c | c.getCaller() = c1 and c2 = tgt(c)) } /** Holds if `(c1,c2)` is an edge in the call graph excluding `intraInstanceCallEdge`. */ + overlay[global] private predicate crossInstanceCallEdge(Callable c1, Callable c2) { callEdge(c1, c2) and not intraInstanceCallEdge(c1, c2) } @@ -382,6 +391,7 @@ private module Cached { relevantFieldUpdate(_, f.getField(), _) } + overlay[global] private predicate source(Call call, TrackedField f, Field field, Callable c, boolean fresh) { relevantCall(call, f) and field = f.getField() and @@ -395,9 +405,11 @@ private module Cached { * `fresh` indicates whether the instance `this` in `c` has been freshly * allocated along the call-chain. */ + overlay[global] private newtype TCallableNode = MkCallableNode(Callable c, boolean fresh) { source(_, _, _, c, fresh) or edge(_, c, fresh) } + overlay[global] private predicate edge(TCallableNode n, Callable c2, boolean f2) { exists(Callable c1, boolean f1 | n = MkCallableNode(c1, f1) | intraInstanceCallEdge(c1, c2) and f2 = f1 @@ -407,6 +419,7 @@ private module Cached { ) } + overlay[global] private predicate edge(TCallableNode n1, TCallableNode n2) { exists(Callable c2, boolean f2 | edge(n1, c2, f2) and @@ -414,6 +427,7 @@ private module Cached { ) } + overlay[global] pragma[noinline] private predicate source(Call call, TrackedField f, Field field, TCallableNode n) { exists(Callable c, boolean fresh | @@ -422,24 +436,28 @@ private module Cached { ) } + overlay[global] private predicate sink(Callable c, Field f, TCallableNode n) { setsOwnField(c, f) and n = MkCallableNode(c, false) or setsOtherField(c, f) and n = MkCallableNode(c, _) } + overlay[global] private predicate prunedNode(TCallableNode n) { sink(_, _, n) or exists(TCallableNode mid | edge(n, mid) and prunedNode(mid)) } + overlay[global] private predicate prunedEdge(TCallableNode n1, TCallableNode n2) { prunedNode(n1) and prunedNode(n2) and edge(n1, n2) } + overlay[global] private predicate edgePlus(TCallableNode c1, TCallableNode c2) = fastTC(prunedEdge/2)(c1, c2) /** @@ -447,6 +465,7 @@ private module Cached { * where `f` and `call` share the same enclosing callable in which a * `FieldRead` of `f` is reachable from `call`. */ + overlay[global] pragma[noopt] private predicate updatesNamedFieldImpl(Call call, TrackedField f, Callable setter) { exists(TCallableNode src, TCallableNode sink, Field field | @@ -457,11 +476,13 @@ private module Cached { } bindingset[call, f] + overlay[global] pragma[inline_late] private predicate updatesNamedField0(Call call, TrackedField f, Callable setter) { updatesNamedField(call, f, setter) } + overlay[global] cached predicate defUpdatesNamedField(SsaImplicitUpdate def, TrackedField f, Callable setter) { f = def.getSourceVariable() and @@ -534,6 +555,7 @@ private module Cached { Impl::phiHasInputFromBlock(phi, inp, bb) } + overlay[global] cached module DataFlowIntegration { import DataFlowIntegrationImpl diff --git a/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll b/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll index a9988e920c62..16b46762524c 100644 --- a/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll +++ b/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll @@ -5,7 +5,7 @@ * data flow check for lambdas, anonymous classes, and other sufficiently * private classes where all object instantiations are accounted for. */ -overlay[local?] +overlay[global] module; import java diff --git a/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll b/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll index 86915f802743..0fcd5cb28762 100644 --- a/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll +++ b/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll @@ -6,7 +6,7 @@ * The set of dispatch targets for `Object.toString()` calls are reduced based * on possible data flow from objects of more specific types to the qualifier. */ -overlay[local?] +overlay[global] module; import java diff --git a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll index 877a62fb9455..837f0bd764c8 100644 --- a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll +++ b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll @@ -2,7 +2,7 @@ * Provides predicates for reasoning about runtime call targets through virtual * dispatch. */ -overlay[local?] +overlay[global] module; import java diff --git a/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll b/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll index cd585de58e4e..e1cbf6be7f6e 100644 --- a/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll +++ b/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll @@ -1,7 +1,7 @@ /** * Provides a module to check whether two `ParameterizedType`s are unifiable. */ -overlay[local?] +overlay[global] module; import java From dcc55727671836eea209ffc264a1fee3d59fa02b Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Wed, 10 Sep 2025 14:41:31 +0100 Subject: [PATCH 2/3] Java: Hnalde global files as exceptions rather than annotating them This allows us to merge them without redundent annoations for now. --- config/add-overlay-annotations.py | 9 +++++++++ java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll | 2 -- java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll | 2 -- .../ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll | 2 -- .../semmle/code/java/dispatch/internal/Unification.qll | 2 -- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/config/add-overlay-annotations.py b/config/add-overlay-annotations.py index 85b42026d8d7..0a30eee5799b 100644 --- a/config/add-overlay-annotations.py +++ b/config/add-overlay-annotations.py @@ -177,6 +177,12 @@ def insert_overlay_caller_annotations(lines): out_lines.append(line) return out_lines +explicitly_global = set([ + "java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll", + "java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll", + "java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll", + "java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll", +]) def annotate_as_appropriate(filename, lines): ''' @@ -196,6 +202,9 @@ def annotate_as_appropriate(filename, lines): ((filename.endswith("Query.qll") or filename.endswith("Config.qll")) and any("implements DataFlow::ConfigSig" in line for line in lines))): return None + elif filename in explicitly_global: + # These files are explicitly global and should not be annotated. + return None elif not any(line for line in lines if line.strip()): return None diff --git a/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll b/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll index 16b46762524c..bd293eed6b3a 100644 --- a/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll +++ b/java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll @@ -5,8 +5,6 @@ * data flow check for lambdas, anonymous classes, and other sufficiently * private classes where all object instantiations are accounted for. */ -overlay[global] -module; import java private import VirtualDispatch diff --git a/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll b/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll index 0fcd5cb28762..a3da9118acc7 100644 --- a/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll +++ b/java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll @@ -6,8 +6,6 @@ * The set of dispatch targets for `Object.toString()` calls are reduced based * on possible data flow from objects of more specific types to the qualifier. */ -overlay[global] -module; import java private import VirtualDispatch diff --git a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll index 837f0bd764c8..78bf1ad0bdc1 100644 --- a/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll +++ b/java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll @@ -2,8 +2,6 @@ * Provides predicates for reasoning about runtime call targets through virtual * dispatch. */ -overlay[global] -module; import java import semmle.code.java.dataflow.TypeFlow diff --git a/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll b/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll index e1cbf6be7f6e..6c92f7298d92 100644 --- a/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll +++ b/java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll @@ -1,8 +1,6 @@ /** * Provides a module to check whether two `ParameterizedType`s are unifiable. */ -overlay[global] -module; import java From d5ee91b1e86c37f055a23d597ddd12242a06261d Mon Sep 17 00:00:00 2001 From: Alex Eyers-Taylor Date: Thu, 11 Sep 2025 17:14:08 +0100 Subject: [PATCH 3/3] Java: Adress comments form code review. --- java/ql/lib/semmle/code/java/dataflow/SSA.qll | 8 ++++---- .../ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/SSA.qll b/java/ql/lib/semmle/code/java/dataflow/SSA.qll index 3f47689fd973..962f38e80ea6 100644 --- a/java/ql/lib/semmle/code/java/dataflow/SSA.qll +++ b/java/ql/lib/semmle/code/java/dataflow/SSA.qll @@ -253,11 +253,11 @@ class SsaImplicitUpdate extends SsaUpdate { or if this.hasImplicitQualifierUpdate() then - if isNonNonLocal(this) + if isNonLocal(this) then result = "nonlocal + nonlocal qualifier" else result = "nonlocal qualifier" else ( - isNonNonLocal(this) and result = "nonlocal" + isNonLocal(this) and result = "nonlocal" ) } @@ -289,9 +289,9 @@ class SsaImplicitUpdate extends SsaUpdate { } overlay[global] -private predicate isNonNonLocalImpl(SsaImplicitUpdate su) { exists(su.getANonLocalUpdate()) } +private predicate isNonLocalImpl(SsaImplicitUpdate su) { exists(su.getANonLocalUpdate()) } -private predicate isNonNonLocal(SsaImplicitUpdate su) = forceLocal(isNonNonLocalImpl/1)(su) +private predicate isNonLocal(SsaImplicitUpdate su) = forceLocal(isNonLocalImpl/1)(su) /** * An SSA variable that represents an uncertain implicit update of the value. diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll b/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll index 13e1aee6ad83..275a0afafc08 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll @@ -555,7 +555,6 @@ private module Cached { Impl::phiHasInputFromBlock(phi, inp, bb) } - overlay[global] cached module DataFlowIntegration { import DataFlowIntegrationImpl