diff --git a/powershell/ql/lib/powershell.qll b/powershell/ql/lib/powershell.qll index 41890c0dc14d..8bbfe93262ab 100644 --- a/powershell/ql/lib/powershell.qll +++ b/powershell/ql/lib/powershell.qll @@ -81,4 +81,5 @@ import semmle.code.powershell.HashTable import semmle.code.powershell.SplitExpr import semmle.code.powershell.CommentEntity import semmle.code.powershell.Variable -import semmle.code.powershell.internal.Internal::Public \ No newline at end of file +import semmle.code.powershell.internal.Internal::Public +import semmle.code.powershell.ModuleManifest \ No newline at end of file diff --git a/powershell/ql/lib/qlpack.yml b/powershell/ql/lib/qlpack.yml index 8accccc63909..1632e530e5fa 100644 --- a/powershell/ql/lib/qlpack.yml +++ b/powershell/ql/lib/qlpack.yml @@ -12,4 +12,8 @@ dependencies: codeql/dataflow: ${workspace} codeql/ssa: ${workspace} codeql/util: ${workspace} + codeql/mad: ${workspace} +dataExtensions: + - semmle/code/powershell/frameworks/**/model.yml + - semmle/code/powershell/frameworks/**/*.model.yml warnOnImplicitThis: true \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll b/powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll new file mode 100644 index 000000000000..607c5248ffa2 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/ApiGraphs.qll @@ -0,0 +1,681 @@ +/** + * Provides an implementation of _API graphs_, which allow efficient modelling of how a given + * value is used by the code base or how values produced by the code base are consumed by a library. + * + * See `API::Node` for more details. + */ + +private import powershell +private import semmle.code.powershell.dataflow.DataFlow +private import semmle.code.powershell.typetracking.ApiGraphShared +private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl +private import semmle.code.powershell.controlflow.Cfg +private import semmle.code.powershell.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch + +/** + * Provides classes and predicates for working with APIs used in a database. + */ +module API { + /** + * A node in the API graph, that is, a value that can be tracked interprocedurally. + * + * The API graph is a graph for tracking values of certain types in a way that accounts for inheritance + * and interprocedural data flow. + * + * API graphs are typically used to identify "API calls", that is, calls to an external function + * whose implementation is not necessarily part of the current codebase. + * + * ### Basic usage + * + * The most basic use of API graphs is typically as follows: + * 1. Start with `API::getTopLevelMember` for the relevant library. + * 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function. + * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource`, `asSink`, or `asCall`. + * + * ### Data flow + * + * The members predicates on this class generally take inheritance and data flow into account. + * + * ### Backward data flow + * + * When inspecting the arguments of a call, the data flow direction is backwards. + * + * ### Inheritance + * + * When a class or module object is tracked, inheritance is taken into account. + * + * ### Backward data flow and classes + * + * When inspecting the arguments of a call, and the value flowing into that argument is a user-defined class (or an instance thereof), + * uses of `getMethod` will find method definitions in that class (including inherited ones) rather than finding method calls. + * + * When modeling an external library that is known to call a specific method on a parameter, this makes + * it possible to find the corresponding method definition in user code. + * + * ### Strict left-to-right evaluation + * + * Most member predicates on this class are intended to be chained, and are always evaluated from left to right, which means + * the caller should restrict the initial set of values. + * + * For example, in the following snippet, we always find the uses of `Foo` before finding calls to `bar`: + * ```ql + * API::getTopLevelMember("Foo").getMethod("bar") + * ``` + * In particular, the implementation will never look for calls to `bar` and work backward from there. + * + * Beware of the footgun that is to use API graphs with an unrestricted receiver: + * ```ql + * API::Node barCall(API::Node base) { + * result = base.getMethod("bar") // Do not do this! + * } + * ``` + * The above predicate does not restrict the receiver, and will thus perform an interprocedural data flow + * search starting at every node in the graph, which is very expensive. + */ + class Node extends Impl::TApiNode { + /** + * Gets a data-flow node where this value may flow interprocedurally. + * + * This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow. + * See `asSource()` for examples. + */ + bindingset[this] + pragma[inline_late] + DataFlow::Node getAValueReachableFromSource() { + result = getAValueReachableFromSourceInline(this) + } + + /** + * Gets a data-flow node where this value enters the current codebase. + */ + bindingset[this] + pragma[inline_late] + DataFlow::LocalSourceNode asSource() { result = asSourceInline(this) } + + /** Gets a data-flow node where this value potentially flows into an external library. */ + bindingset[this] + pragma[inline_late] + DataFlow::Node asSink() { result = asSinkInline(this) } + + /** Gets a callable that can reach this sink. */ + bindingset[this] + pragma[inline_late] + DataFlow::CallableNode asCallable() { Impl::asCallable(this.getAnEpsilonSuccessor(), result) } + + /** + * Get a data-flow node that transitively flows to this value, provided that this value corresponds + * to a sink. + * + * This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow. + * See `asSink()` for examples. + */ + bindingset[this] + pragma[inline_late] + DataFlow::Node getAValueReachingSink() { result = getAValueReachingSinkInline(this) } + + /** Gets the call referred to by this API node. */ + bindingset[this] + pragma[inline_late] + DataFlow::CallNode asCall() { this = Impl::MkMethodAccessNode(result) } + + pragma[inline] + Node getMember(string m) { + // This predicate is currently not 'inline_late' because 'm' can be an input or output + Impl::memberEdge(this.getAnEpsilonSuccessor(), m, result) + } + + /** + * Gets a node that may refer to an instance of the module or class represented by this API node. + */ + bindingset[this] + pragma[inline_late] + Node getInstance() { Impl::instanceEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets a call to `method` with this value as the receiver, or the definition of `method` on + * an object that can reach this sink. + */ + pragma[inline] + Node getMethod(string method) { + // TODO: Consider 'getMethodTarget(method)' for looking up method definitions? + // This predicate is currently not 'inline_late' because 'method' can be an input or output + Impl::methodEdge(this.getAnEpsilonSuccessor(), method, result) + } + + /** + * Gets the result of this call, or the return value of this callable. + */ + bindingset[this] + pragma[inline_late] + Node getReturn() { Impl::returnEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the result of a call to `method` with this value as the receiver, or the return value of `method` defined on + * an object that can reach this sink. + * + * This is a shorthand for `getMethod(method).getReturn()`. + */ + pragma[inline] + Node getReturn(string method) { + // This predicate is currently not 'inline_late' because 'method' can be an input or output + result = this.getMethod(method).getReturn() + } + + /** + * Gets the `n`th positional argument to this call. + */ + pragma[inline] + Node getArgument(int n) { + // This predicate is currently not 'inline_late' because 'n' can be an input or output + Impl::positionalArgumentEdge(this, n, result) + } + + /** + * Gets the given keyword argument to this call. + */ + pragma[inline] + Node getKeywordArgument(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::keywordArgumentEdge(this, name, result) + } + + /** + * Gets the `n`th positional parameter of this callable, or the `n`th positional argument to this call. + * + * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. + * When referring to an argument, it is recommended to use `getArgument(n)` instead. + */ + pragma[inline] + Node getParameter(int n) { + // This predicate is currently not 'inline_late' because 'n' can be an input or output + Impl::positionalParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), n, result) + } + + /** + * Gets the given keyword parameter of this callable, or keyword argument to this call. + * + * Note: for historical reasons, this predicate may refer to an argument of a call, but this may change in the future. + * When referring to an argument, it is recommended to use `getKeywordArgument(n)` instead. + */ + pragma[inline] + Node getKeywordParameter(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::keywordParameterOrArgumentEdge(this.getAnEpsilonSuccessor(), name, result) + } + + /** + * Gets the argument passed in argument position `pos` at this call. + */ + pragma[inline] + Node getArgumentAtPosition(DataFlowDispatch::ArgumentPosition pos) { + // This predicate is currently not 'inline_late' because 'pos' can be an input or output + Impl::argumentEdge(pragma[only_bind_out](this), pos, result) // note: no need for epsilon step since 'this' must be a call + } + + /** + * Gets the parameter at position `pos` of this callable. + */ + pragma[inline] + Node getParameterAtPosition(DataFlowDispatch::ParameterPosition pos) { + // This predicate is currently not 'inline_late' because 'pos' can be an input or output + Impl::parameterEdge(this.getAnEpsilonSuccessor(), pos, result) + } + + /** + * Gets a representative for the `content` of this value. + * + * When possible, it is preferrable to use one of the specialized variants of this predicate, such as `getAnElement`. + * + * Concretely, this gets sources where `content` is read from this value, and as well as sinks where + * `content` is stored onto this value or onto an object that can reach this sink. + */ + pragma[inline] + Node getContent(DataFlow::Content content) { + // This predicate is currently not 'inline_late' because 'content' can be an input or output + Impl::contentEdge(this.getAnEpsilonSuccessor(), content, result) + } + + /** + * Gets a representative for the `contents` of this value. + * + * See `getContent()` for more details. + */ + bindingset[this, contents] + pragma[inline_late] + Node getContents(DataFlow::ContentSet contents) { + // We always use getAStoreContent when generating content edges, and we always use getAReadContent when querying the graph. + result = this.getContent(contents.getAReadContent()) + } + + /** + * Gets a representative for the instance field of the given `name`. + */ + pragma[inline] + Node getField(string name) { + // This predicate is currently not 'inline_late' because 'name' can be an input or output + Impl::fieldEdge(this.getAnEpsilonSuccessor(), name, result) + } + + /** + * Gets a representative for an arbitrary element of this collection. + */ + bindingset[this] + pragma[inline_late] + Node getAnElement() { Impl::elementEdge(this.getAnEpsilonSuccessor(), result) } + + /** + * Gets the data-flow node that gives rise to this node, if any. + */ + DataFlow::Node getInducingNode() { + this = Impl::MkMethodAccessNode(result) or + this = Impl::MkBackwardNode(result, _) or + this = Impl::MkForwardNode(result, _) or + this = Impl::MkSinkNode(result) + } + + /** Gets the location of this node. */ + Location getLocation() { + result = this.getInducingNode().getLocation() + or + this instanceof RootNode and + result instanceof EmptyLocation + } + + /** + * Gets a textual representation of this element. + */ + string toString() { none() } + + pragma[inline] + private Node getAnEpsilonSuccessor() { result = getAnEpsilonSuccessorInline(this) } + } + + /** The root node of an API graph. */ + private class RootNode extends Node, Impl::MkRoot { + override string toString() { result = "Root()" } + } + + /** A node representing a given type-tracking state when tracking forwards. */ + private class ForwardNode extends Node, Impl::MkForwardNode { + private DataFlow::LocalSourceNode node; + private TypeTracker tracker; + + ForwardNode() { this = Impl::MkForwardNode(node, tracker) } + + override string toString() { + if tracker.start() + then result = "ForwardNode(" + node + ")" + else result = "ForwardNode(" + node + ", " + tracker + ")" + } + } + + /** A node representing a given type-tracking state when tracking backwards. */ + private class BackwardNode extends Node, Impl::MkBackwardNode { + private DataFlow::LocalSourceNode node; + private TypeTracker tracker; + + BackwardNode() { this = Impl::MkBackwardNode(node, tracker) } + + override string toString() { + if tracker.start() + then result = "BackwardNode(" + node + ")" + else result = "BackwardNode(" + node + ", " + tracker + ")" + } + } + + /** A node representing a module/class object with epsilon edges to its descendents. */ + private class ModuleNode extends Node, Impl::MkModule { + /** Gets the module represented by this API node. */ + string getModule() { this = Impl::MkModule(result) } + + override string toString() { result = "Module(" + this.getModule() + ")" } + + TypeNode getType(string name) { result.getType() = this.getModule() + "." + name } // TODO: Check that name exists in module + } + + private class TypeNode extends Node, Impl::MkType { + /** Gets the type represented by this API node. */ + string getType() { this = Impl::MkType(result) } + + override string toString() { result = "Type(" + this.getType() + ")" } + } + + /** A node representing instances of a module/class with epsilon edges to its ancestors. */ + private class InstanceUp extends Node, Impl::MkInstanceUp { + /** Gets the module whose instances are represented by this API node. */ + string getType() { this = Impl::MkInstanceUp(result) } + + override string toString() { result = "ModuleInstanceUp(" + this.getType() + ")" } + } + + /** A node representing instances of a module/class with epsilon edges to its descendents. */ + private class InstanceDownNode extends Node, Impl::MkInstanceDown { + /** Gets the module whose instances are represented by this API node. */ + string getType() { this = Impl::MkInstanceDown(result) } + + override string toString() { result = "ModuleInstanceDown(" + this.getType() + ")" } + } + + /** A node corresponding to the method being invoked at a method call. */ + class MethodAccessNode extends Node, Impl::MkMethodAccessNode { + override string toString() { result = "MethodAccessNode(" + this.asCall() + ")" } + } + + /** + * A node corresponding to an argument, right-hand side of a store, or return value from a callable. + * + * Such a node may serve as the starting-point of backtracking, and has epsilon edges going to + * the backward nodes corresponding to `getALocalSource`. + */ + private class SinkNode extends Node, Impl::MkSinkNode { + override string toString() { result = "SinkNode(" + this.getInducingNode() + ")" } + } + + /** + * An API entry point. + * + * By default, API graph nodes are only created for nodes that come from an external + * library or escape into an external library. The points where values are cross the boundary + * between codebases are called "entry points". + * + * Anything in the global scope is considered to be an entry point, but + * additional entry points may be added by extending this class. + */ + abstract class EntryPoint extends string { + // Note: this class can be deprecated in Ruby, but is still referenced by shared code in ApiGraphModels.qll, + // where it can't be removed since other languages are still dependent on the EntryPoint class. + bindingset[this] + EntryPoint() { any() } + + /** Gets a data-flow node corresponding to a use-node for this entry point. */ + DataFlow::LocalSourceNode getASource() { none() } + + /** Gets a data-flow node corresponding to a def-node for this entry point. */ + DataFlow::Node getASink() { none() } + + /** Gets a call corresponding to a method access node for this entry point. */ + DataFlow::CallNode getACall() { none() } + + /** Gets an API-node for this entry point. */ + API::Node getANode() { Impl::entryPointEdge(this, result) } + } + + // Ensure all entry points are imported from ApiGraphs.qll + private module ImportEntryPoints { + private import semmle.code.powershell.frameworks.data.ModelsAsData + } + + /** Gets the root node. */ + Node root() { result instanceof RootNode } + + /** + * Gets the node that represents the module with qualified + * name `qualifiedModule`. + */ + ModuleNode mod(string qualifiedModule) { result = Impl::MkModule(qualifiedModule) } + + /** + * Gets the node that represents the type with qualified + * name `qualifiedType`. + */ + TypeNode type(string qualifiedType) { result = Impl::MkType(qualifiedType) } + + /** + * Gets an unqualified call at the top-level with the given method name. + */ + pragma[inline] + MethodAccessNode getTopLevelCall(string name) { Impl::toplevelCall(name, result) } + + pragma[nomagic] + private predicate isReachable(DataFlow::LocalSourceNode node, TypeTracker t) { + t.start() and exists(node) + or + exists(DataFlow::LocalSourceNode prev, TypeTracker t2 | + isReachable(prev, t2) and + node = prev.track(t2, t) + ) + } + + private module SharedArg implements ApiGraphSharedSig { + class ApiNode = Node; + + ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { + result = Impl::MkForwardNode(node, t) + } + + ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { + result = Impl::MkBackwardNode(node, t) + } + + ApiNode getSinkNode(DataFlow::Node node) { result = Impl::MkSinkNode(node) } + + pragma[nomagic] + predicate specificEpsilonEdge(ApiNode pred, ApiNode succ) { none() } + } + + /** INTERNAL USE ONLY. */ + module Internal { + private module MkShared = ApiGraphShared; + + import MkShared + } + + private import Internal + import Internal::Public + + cached + private module Impl { + cached + newtype TApiNode = + /** The root of the API graph. */ + MkRoot() or + /** The method accessed at `call`, synthetically treated as a separate object. */ + MkMethodAccessNode(DataFlow::CallNode call) or + MkModule(string qualifiedModule) { + any(UsingStmt using).getName() = qualifiedModule + or + any(Cmd cmd).getQualifiedCommandName() = qualifiedModule + or + any(ModuleManifest manifest).getModuleName() = qualifiedModule + } or + MkType(string qualifiedType) { any(ConstantValue cv).asString() = qualifiedType } or // TODO + /** Instances of `mod` with epsilon edges to its ancestors. */ + MkInstanceUp(string qualifiedType) { exists(MkType(qualifiedType)) } or + /** Instances of `mod` with epsilon edges to its descendents, and to its upward node. */ + MkInstanceDown(string qualifiedType) { exists(MkType(qualifiedType)) } or + /** Intermediate node for following forward data flow. */ + MkForwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + /** Intermediate node for following backward data flow. */ + MkBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + MkSinkNode(DataFlow::Node node) { needsSinkNode(node) } + + private predicate needsSinkNode(DataFlow::Node node) { + node instanceof DataFlowPrivate::ArgumentNode + or + TypeTrackingInput::storeStep(node, _, _) + or + node = any(DataFlow::CallableNode callable).getAReturnNode() + or + node = any(EntryPoint e).getASink() + } + + bindingset[e] + pragma[inline_late] + private DataFlow::Node getNodeFromExpr(Expr e) { result.asExpr().getExpr() = e } + + cached + predicate toplevelCall(string name, Node node) { + exists(DataFlow::CallNode call | + call.asExpr().getExpr().getEnclosingScope() instanceof TopLevel and + call.getName() = name and + node = MkMethodAccessNode(call) + ) + } + + cached + predicate callEdge(Node pred, string name, Node succ) { + exists(DataFlow::CallNode call | + // from receiver to method call node + pred = getForwardEndNode(getALocalSourceStrict(call.getQualifier())) and + succ = MkMethodAccessNode(call) and + name = call.getName() + ) + } + + cached + predicate typeEdge(Node pred, string name, Node succ) { + exists(ModuleNode mod | + pred = mod and + succ = mod.getType(name) + ) + } + + cached + predicate memberEdge(Node pred, string name, Node succ) { + exists(MemberExpr member | succ = getForwardStartNode(getNodeFromExpr(member)) | + pred = getForwardEndNode(getALocalSourceStrict(getNodeFromExpr(member.getQualifier()))) and + name = member.getMemberName() + ) + } + + cached + predicate methodEdge(Node pred, string name, Node succ) { + exists(DataFlow::CallNode call | succ = MkMethodAccessNode(call) and name = call.getName() | + pred = getForwardEndNode(getALocalSourceStrict(call.getQualifier())) + or + exists(string qualifiedModule, ModuleManifest manifest | + pred = mod(qualifiedModule) and + manifest.getModuleName() = qualifiedModule + | + manifest.getACmdLetToExport() = name + or + manifest.getAFunctionToExport() = name + ) + ) + } + + cached + predicate asCallable(Node apiNode, DataFlow::CallableNode callable) { + apiNode = getBackwardStartNode(callable) + } + + cached + predicate contentEdge(Node pred, DataFlow::Content content, Node succ) { + exists(DataFlow::Node object, DataFlow::Node value, DataFlow::ContentSet c | + TypeTrackingInput::loadStep(object, value, c) and + content = c.getAStoreContent() and + // `x -> x.foo` with content "foo" + pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and + succ = getForwardStartNode(value) + or + // Based on `object.c = value` generate `object -> value` with content `c` + TypeTrackingInput::storeStep(value, object, c) and + content = c.getAStoreContent() and + pred = getForwardOrBackwardEndNode(getALocalSourceStrict(object)) and + succ = MkSinkNode(value) + ) + } + + cached + predicate fieldEdge(Node pred, string name, Node succ) { + Impl::contentEdge(pred, DataFlowPrivate::TFieldContent(name), succ) + } + + cached + predicate elementEdge(Node pred, Node succ) { + contentEdge(pred, any(DataFlow::ContentSet set | set.isAnyElement()).getAReadContent(), succ) + } + + cached + predicate parameterEdge(Node pred, DataFlowDispatch::ParameterPosition paramPos, Node succ) { + exists(DataFlowPrivate::ParameterNodeImpl parameter, DataFlow::CallableNode callable | + parameter.isSourceParameterOf(callable.asCallableAstNode(), paramPos) and + pred = getBackwardEndNode(callable) and + succ = getForwardStartNode(parameter) + ) + } + + cached + predicate argumentEdge(Node pred, DataFlowDispatch::ArgumentPosition argPos, Node succ) { + exists(DataFlow::CallNode call, DataFlowPrivate::ArgumentNode argument | + argument.sourceArgumentOf(call.asExpr(), argPos) and + pred = MkMethodAccessNode(call) and + succ = MkSinkNode(argument) + ) + } + + cached + predicate positionalArgumentEdge(Node pred, int n, Node succ) { + argumentEdge(pred, + any(DataFlowDispatch::ArgumentPosition pos | + pos.isPositional(n, DataFlowPrivate::emptyNamedSet()) + ), succ) + } + + cached + predicate keywordArgumentEdge(Node pred, string name, Node succ) { + argumentEdge(pred, any(DataFlowDispatch::ArgumentPosition pos | pos.isKeyword(name)), succ) + } + + private predicate positionalParameterEdge(Node pred, int n, Node succ) { + parameterEdge(pred, + any(DataFlowDispatch::ParameterPosition pos | + pos.isPositional(n, DataFlowPrivate::emptyNamedSet()) + ), succ) + } + + private predicate keywordParameterEdge(Node pred, string name, Node succ) { + parameterEdge(pred, any(DataFlowDispatch::ParameterPosition pos | pos.isKeyword(name)), succ) + } + + cached + predicate positionalParameterOrArgumentEdge(Node pred, int n, Node succ) { + positionalArgumentEdge(pred, n, succ) + or + positionalParameterEdge(pred, n, succ) + } + + cached + predicate keywordParameterOrArgumentEdge(Node pred, string name, Node succ) { + keywordArgumentEdge(pred, name, succ) + or + keywordParameterEdge(pred, name, succ) + } + + cached + predicate instanceEdge(Node pred, Node succ) { + exists(string qualifiedType | pred = MkType(qualifiedType) | + exists(DataFlow::ObjectCreationNode objCreation | + objCreation.getConstructedTypeName() = qualifiedType and + succ = getForwardStartNode(objCreation) + ) + or + exists(DataFlow::ParameterNode p | + p.getParameter().getStaticType() = qualifiedType and + succ = getForwardStartNode(p) + ) + ) + } + + cached + predicate returnEdge(Node pred, Node succ) { + exists(DataFlow::CallNode call | + pred = MkMethodAccessNode(call) and + succ = getForwardStartNode(call) + ) + or + exists(DataFlow::CallableNode callable | + pred = getBackwardEndNode(callable) and + succ = MkSinkNode(callable.getAReturnNode()) + ) + } + + cached + predicate entryPointEdge(EntryPoint entry, Node node) { + node = MkSinkNode(entry.getASink()) or + node = getForwardStartNode(entry.getASource()) or + node = MkMethodAccessNode(entry.getACall()) + } + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/Call.qll b/powershell/ql/lib/semmle/code/powershell/Call.qll index e630ebc14094..b5516344cc60 100644 --- a/powershell/ql/lib/semmle/code/powershell/Call.qll +++ b/powershell/ql/lib/semmle/code/powershell/Call.qll @@ -6,6 +6,8 @@ private import semmle.code.powershell.controlflow.CfgNodes abstract private class AbstractCall extends Ast { abstract Expr getCommand(); + abstract string getName(); + /** Gets the i'th argument to this call. */ abstract Expr getArgument(int i); @@ -34,6 +36,8 @@ class CmdCall extends AbstractCall instanceof Cmd { final override Expr getPositionalArgument(int i) { result = Cmd.super.getPositionalArgument(i) } + final override string getName() { result = Cmd.super.getCommandName() } + final override Expr getArgument(int i) { result = Cmd.super.getArgument(i) } final override Expr getNamedArgument(string name) { result = Cmd.super.getNamedArgument(name) } @@ -68,6 +72,8 @@ class MethodCall extends AbstractCall instanceof InvokeMemberExpr { result.getBody() = getTarget(call) ) } + + final override string getName() { result = InvokeMemberExpr.super.getName() } } final class Call = AbstractCall; diff --git a/powershell/ql/lib/semmle/code/powershell/Command.qll b/powershell/ql/lib/semmle/code/powershell/Command.qll index 5617704789e2..4b00d7e1b976 100644 --- a/powershell/ql/lib/semmle/code/powershell/Command.qll +++ b/powershell/ql/lib/semmle/code/powershell/Command.qll @@ -1,11 +1,32 @@ import powershell +private predicate parseCommandName(Cmd cmd, string namespace, string name) { + exists(string qualified | command(cmd, qualified, _, _, _) | + namespace = qualified.regexpCapture("([^\\\\]+)\\\\([^\\\\]+)", 1) and + name = qualified.regexpCapture("([^\\\\]+)\\\\([^\\\\]+)", 2) + or + // Not a qualified name + not exists(qualified.indexOf("\\")) and + namespace = "" and + name = qualified + ) +} class Cmd extends @command, CmdBase { - override string toString() { result = this.getCommandName() } + override string toString() { result = this.getQualifiedCommandName() } override SourceLocation getLocation() { command_location(this, result) } - string getCommandName() { command(this, result, _, _, _) } + /** Gets the name of the command without any qualifiers. */ + string getCommandName() { parseCommandName(this, _, result) } + + /** Holds if the command is qualified. */ + predicate isQualified() { parseCommandName(this, any(string s | s != ""), _) } + + /** Gets the namespace qualifier of this command, if any. */ + string getNamespaceQualifier() { parseCommandName(this, result, _) } + + /** Gets the (possibly qualified) name of this command. */ + string getQualifiedCommandName() { command(this, result, _, _, _) } int getKind() { command(this, _, result, _, _) } @@ -15,8 +36,10 @@ class Cmd extends @command, CmdBase { CmdElement getElement(int i) { command_command_element(this, i, result) } + /** Gets the expression that determines the command to invoke. */ Expr getCommand() { result = this.getElement(0) } + /** Gets the name of this command, if this is statically known. */ StringConstExpr getCmdName() { result = this.getElement(0) } /** Gets any argument to this command. */ diff --git a/powershell/ql/lib/semmle/code/powershell/Frameworks.qll b/powershell/ql/lib/semmle/code/powershell/Frameworks.qll new file mode 100644 index 000000000000..19c46aa64dad --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/Frameworks.qll @@ -0,0 +1,4 @@ +/** + * Helper file that imports all framework modeling. + */ + diff --git a/powershell/ql/lib/semmle/code/powershell/HashTable.qll b/powershell/ql/lib/semmle/code/powershell/HashTable.qll index edfcd2eb92ee..5bd825a18f49 100644 --- a/powershell/ql/lib/semmle/code/powershell/HashTable.qll +++ b/powershell/ql/lib/semmle/code/powershell/HashTable.qll @@ -7,6 +7,10 @@ class HashTableExpr extends @hash_table, Expr { Stmt getElement(Expr key) { hash_table_key_value_pairs(this, _, key, result) } // TODO: Change @ast to @expr in db scheme + Stmt getElementFromConstant(string key) { + result = this.getElement(any(StringConstExpr sc | sc.getValue().getValue() = key)) + } + predicate hasKey(Expr key) { exists(this.getElement(key)) } Stmt getAnElement() { result = this.getElement(_) } diff --git a/powershell/ql/lib/semmle/code/powershell/InvokeMemberExpression.qll b/powershell/ql/lib/semmle/code/powershell/InvokeMemberExpression.qll index 7cd9d71a6159..c11d542cb228 100644 --- a/powershell/ql/lib/semmle/code/powershell/InvokeMemberExpression.qll +++ b/powershell/ql/lib/semmle/code/powershell/InvokeMemberExpression.qll @@ -9,6 +9,8 @@ class InvokeMemberExpr extends @invoke_member_expression, MemberExprBase { CmdElement getMember() { invoke_member_expression(this, _, result) } + string getMemberName() { result = this.getMember().(StringConstExpr).getValue().getValue() } + Expr getArgument(int i) { invoke_member_expression_argument(this, i, result) } Expr getAnArgument() { invoke_member_expression_argument(this, _, result) } diff --git a/powershell/ql/lib/semmle/code/powershell/MemberExpr.qll b/powershell/ql/lib/semmle/code/powershell/MemberExpr.qll index 5af9eef345c5..1d6d143c053e 100644 --- a/powershell/ql/lib/semmle/code/powershell/MemberExpr.qll +++ b/powershell/ql/lib/semmle/code/powershell/MemberExpr.qll @@ -3,7 +3,7 @@ import powershell class MemberExpr extends @member_expression, MemberExprBase { final override Location getLocation() { member_expression_location(this, result) } - Expr getBase() { member_expression(this, result, _, _, _) } + Expr getQualifier() { member_expression(this, result, _, _, _) } CmdElement getMember() { member_expression(this, _, result, _, _) } diff --git a/powershell/ql/lib/semmle/code/powershell/ModuleManifest.qll b/powershell/ql/lib/semmle/code/powershell/ModuleManifest.qll new file mode 100644 index 000000000000..691216d391d3 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/ModuleManifest.qll @@ -0,0 +1,44 @@ +import powershell + +class ModuleManifestFile extends File { + ModuleManifestFile() { this.getExtension() = "psd1" } +} + +private Expr getEntry(HashTableExpr ht, string key) { + result = ht.getElementFromConstant(key).(CmdExpr).getExpr() and + not result instanceof ArrayLiteral +} + +private Expr getAnEntry(HashTableExpr ht, string key) { + exists(Expr e | e = ht.getElementFromConstant(key).(CmdExpr).getExpr() | + not e instanceof ArrayLiteral and result = e + or + result = e.(ArrayLiteral).getAnElement() + ) +} + +class ModuleManifest extends HashTableExpr { + string moduleVersion; + + ModuleManifest() { + // The hash table is in a .psd1 file + this.getLocation().getFile() instanceof ModuleManifestFile and + // It's at the top level of the file + this.getParent().(CmdExpr).getParent().(NamedBlock).getParent() instanceof TopLevel and + // It has a `ModuleVersion` entry. The only required field is ModuleVersion. + // https://learn.microsoft.com/en-us/powershell/scripting/developer/module/how-to-write-a-powershell-module-manifest?view=powershell-7.4#to-create-and-use-a-module-manifest + moduleVersion = getEntry(this, "ModuleVersion").getValue().asString() + } + + string getModuleVersion() { result = moduleVersion } + + string getModuleName() { + result + ".psd1" = this.getLocation().getFile().getBaseName() + } + + string getAFunctionToExport() { + result = getAnEntry(this, "FunctionsToExport").getValue().asString() + } + + string getACmdLetToExport() { result = getAnEntry(this, "CmdletsToExport").getValue().asString() } +} diff --git a/powershell/ql/lib/semmle/code/powershell/Type.qll b/powershell/ql/lib/semmle/code/powershell/Type.qll index 11eb1afa6e7d..d67aea74ad15 100644 --- a/powershell/ql/lib/semmle/code/powershell/Type.qll +++ b/powershell/ql/lib/semmle/code/powershell/Type.qll @@ -22,4 +22,10 @@ class Type extends @type_definition, Stmt { } Method getAMethod() { result = this.getMethod(_) } + + TypeConstraint getBaseType(int i) { type_definition_base_type(this, i, result) } + + TypeConstraint getABaseType() { result = this.getBaseType(_) } + + Type getASubtype() { result.getABaseType().getName() = this.getName() } } diff --git a/powershell/ql/lib/semmle/code/powershell/UsingStmt.qll b/powershell/ql/lib/semmle/code/powershell/UsingStmt.qll index 37687cee46c4..2278409c0a55 100644 --- a/powershell/ql/lib/semmle/code/powershell/UsingStmt.qll +++ b/powershell/ql/lib/semmle/code/powershell/UsingStmt.qll @@ -4,4 +4,18 @@ class UsingStmt extends @using_statement, Stmt { override SourceLocation getLocation() { using_statement_location(this, result) } override string toString() { result = "using ..." } + + string getName() { + exists(StringConstExpr const | + using_statement_name(this, const) and // TODO: Change dbscheme + result = const.getValue().getValue() + ) + } + + string getAlias() { + exists(StringConstExpr const | + using_statement_alias(this, const) and // TODO: Change dbscheme + result = const.getValue().getValue() + ) + } } diff --git a/powershell/ql/lib/semmle/code/powershell/Variable.qll b/powershell/ql/lib/semmle/code/powershell/Variable.qll index 18117db52b51..39ac5bdabb2a 100644 --- a/powershell/ql/lib/semmle/code/powershell/Variable.qll +++ b/powershell/ql/lib/semmle/code/powershell/Variable.qll @@ -53,12 +53,16 @@ private predicate isParameterImpl(string name, Scope scope) { name = "_" } +private predicate isThisParameter(Scope scope, Type t) { + t = scope.getEnclosingFunction().getDeclaringType() +} + private newtype TParameterImpl = TInternalParameter(Internal::Parameter p) or TUnderscore(Scope scope) { exists(VarAccess va | va.getUserPath() = ["_", "PSItem"] and scope = va.getEnclosingScope()) } or - TThisParameter(Scope scope) { exists(scope.getEnclosingFunction().getDeclaringType()) } + TThisParameter(Scope scope) { isThisParameter(scope, _) } private class ParameterImpl extends TParameterImpl { abstract Location getLocation(); @@ -88,6 +92,13 @@ private class ParameterImpl extends TParameterImpl { abstract predicate isPipeline(); abstract predicate isPipelineByPropertyName(); + + /** + * Gets the static type of this parameter. + * The type of this parameter at runtime may be a subtype of this static + * type. + */ + abstract string getStaticType(); } private class InternalParameter extends ParameterImpl, TInternalParameter { @@ -122,6 +133,8 @@ private class InternalParameter extends ParameterImpl, TInternalParameter { override predicate isPipelineByPropertyName() { this.getAnAttribute().getANamedArgument() instanceof ValueFromPipelineByPropertyName } + + final override string getStaticType() { result = p.getStaticType() } } /** @@ -158,6 +171,8 @@ private class Underscore extends ParameterImpl, TUnderscore { final override predicate isPipelineByPropertyName() { none() } final override predicate isFunctionParameter(Function f, int i) { f.getBody() = scope and i = -1 } + + final override string getStaticType() { none() } } private class ThisParameter extends ParameterImpl, TThisParameter { @@ -176,6 +191,13 @@ private class ThisParameter extends ParameterImpl, TThisParameter { final override predicate isPipeline() { none() } final override predicate isPipelineByPropertyName() { none() } + + final override string getStaticType() { + exists(Type t | + isThisParameter(scope, t) and + result = t.getName() + ) + } } private predicate isPipelineIteratorVariable(ParameterImpl p, ProcessBlock pb) { @@ -305,6 +327,8 @@ class Parameter extends AbstractLocalScopeVariable, TParameter { predicate isPipeline() { p.isPipeline() } predicate isPipelineByPropertyName() { p.isPipelineByPropertyName() } + + string getStaticType() { result = p.getStaticType() } } class PipelineParameter extends Parameter { diff --git a/powershell/ql/lib/semmle/code/powershell/controlflow/CfgNodes.qll b/powershell/ql/lib/semmle/code/powershell/controlflow/CfgNodes.qll index f31e29ccd0bd..a55fa3f680ae 100644 --- a/powershell/ql/lib/semmle/code/powershell/controlflow/CfgNodes.qll +++ b/powershell/ql/lib/semmle/code/powershell/controlflow/CfgNodes.qll @@ -159,6 +159,8 @@ abstract private class AbstractCallCfgNode extends AstCfgNode { * Gets the expression that provides the call target of this call, if any. */ abstract ExprCfgNode getCommand(); + + int getNumberOfArguments() { result = count(this.getAnArgument()) } } final class CallCfgNode = AbstractCallCfgNode; @@ -371,7 +373,7 @@ module ExprNodes { } class MemberChildMapping extends ExprChildMapping, MemberExpr { - override predicate relevantChild(Ast n) { n = this.getBase() or n = this.getMember() } + override predicate relevantChild(Ast n) { n = this.getQualifier() or n = this.getMember() } } /** A control-flow node that wraps a `MemberExpr` expression. */ @@ -382,7 +384,7 @@ module ExprNodes { final override MemberExpr getExpr() { result = super.getExpr() } - final ExprCfgNode getBase() { e.hasCfgChild(e.getBase(), this, result) } + final ExprCfgNode getQualifier() { e.hasCfgChild(e.getQualifier(), this, result) } final string getMemberName() { result = e.getMemberName() } diff --git a/powershell/ql/lib/semmle/code/powershell/controlflow/internal/ControlFlowGraphImpl.qll b/powershell/ql/lib/semmle/code/powershell/controlflow/internal/ControlFlowGraphImpl.qll index ba5656a8de2b..b821c91523a9 100644 --- a/powershell/ql/lib/semmle/code/powershell/controlflow/internal/ControlFlowGraphImpl.qll +++ b/powershell/ql/lib/semmle/code/powershell/controlflow/internal/ControlFlowGraphImpl.qll @@ -468,7 +468,7 @@ module Trees { class MemberExprTree extends StandardPostOrderTree instanceof MemberExpr { override AstNode getChildNode(int i) { - i = 0 and result = super.getBase() + i = 0 and result = super.getQualifier() or i = 1 and result = super.getMember() } diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/FlowSummary.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/FlowSummary.qll new file mode 100644 index 000000000000..65b446a9ea0b --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/FlowSummary.qll @@ -0,0 +1,61 @@ +/** Provides classes and predicates for defining flow summaries. */ + +import powershell +private import semmle.code.powershell.controlflow.Cfg +private import semmle.code.powershell.typetracking.TypeTracking +private import semmle.code.powershell.dataflow.DataFlow +private import internal.FlowSummaryImpl as Impl +private import internal.DataFlowDispatch +private import internal.DataFlowImplCommon as DataFlowImplCommon +private import internal.DataFlowPrivate + +// import all instances below +private module Summaries { + private import semmle.code.powershell.Frameworks + private import semmle.code.powershell.frameworks.data.ModelsAsData +} + +/** A callable with a flow summary, identified by a unique string. */ +abstract class SummarizedCallable extends LibraryCallable, Impl::Public::SummarizedCallable { + bindingset[this] + SummarizedCallable() { any() } + + override predicate propagatesFlow( + string input, string output, boolean preservesValue, string model + ) { + this.propagatesFlow(input, output, preservesValue) and model = "" + } + + /** + * Holds if data may flow from `input` to `output` through this callable. + * + * `preservesValue` indicates whether this is a value-preserving step or a taint-step. + */ + predicate propagatesFlow(string input, string output, boolean preservesValue) { none() } + + /** + * Gets the synthesized parameter that results from an input specification + * that starts with `Argument[s]` for this library callable. + */ + DataFlow::ParameterNode getParameter(string s) { + exists(ParameterPosition pos | + DataFlowImplCommon::parameterNode(result, TLibraryCallable(this), pos) and + s = Impl::Input::encodeParameterPosition(pos) + ) + } +} + +/** + * A callable with a flow summary, identified by a unique string, where all + * calls to a method with the same name are considered relevant. + */ +abstract class SimpleSummarizedCallable extends SummarizedCallable { + Call c; + + bindingset[this] + SimpleSummarizedCallable() { c.getName() = this } + + final override Call getACall() { result = c } + + final override Call getACallSimple() { result = c } +} diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/FlowSources.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/FlowSources.qll new file mode 100644 index 000000000000..71f9450e8338 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/FlowSources.qll @@ -0,0 +1,18 @@ +/** Provides classes representing various flow sources for taint tracking. */ +import semmle.code.powershell.dataflow.internal.DataFlowPublic as DataFlow +import semmle.code.powershell.dataflow.flowsources.Remote +import semmle.code.powershell.dataflow.flowsources.Local +import semmle.code.powershell.frameworks.data.internal.ApiGraphModels + +/** + * A data flow source. + */ +abstract class SourceNode extends DataFlow::Node { + /** + * Gets a string that represents the source kind with respect to threat modeling. + */ + abstract string getThreatModel(); + + /** Gets a string that describes the type of this flow source. */ + abstract string getSourceType(); +} diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/Local.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/Local.qll new file mode 100644 index 000000000000..48f64b17be4b --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/Local.qll @@ -0,0 +1,87 @@ +/** + * Provides classes representing sources of local input. + */ + +import powershell +private import FlowSources + +/** A data flow source of local data. */ +abstract class LocalFlowSource extends SourceNode { + override string getSourceType() { result = "local flow source" } + + override string getThreatModel() { result = "local" } +} + +private class ExternalLocalFlowSource extends LocalFlowSource { + ExternalLocalFlowSource() { this = ModelOutput::getASourceNode("local", _).asSource() } + + override string getSourceType() { result = "external" } +} + +/** A data flow source of local user input. */ +abstract class LocalUserInputSource extends LocalFlowSource { } + +/** + * A dataflow source that represents the access of an environment variable. + */ +abstract class EnvironmentVariableSource extends LocalFlowSource { + override string getThreatModel() { result = "environment" } + + override string getSourceType() { result = "environment variable" } +} + +private class ExternalEnvironmentVariableSource extends EnvironmentVariableSource { + ExternalEnvironmentVariableSource() { + this = ModelOutput::getASourceNode("environment", _).asSource() + } +} + +/** + * A dataflow source that represents the access of a command line argument. + */ +abstract class CommandLineArgumentSource extends LocalFlowSource { + override string getThreatModel() { result = "commandargs" } + + override string getSourceType() { result = "command line argument" } +} + +private class ExternalCommandLineArgumentSource extends CommandLineArgumentSource { + ExternalCommandLineArgumentSource() { + this = ModelOutput::getASourceNode("command-line", _).asSource() + } +} + +/** + * A data flow source that represents the parameters of the `Main` method of a program. + */ +private class MainMethodArgumentSource extends CommandLineArgumentSource { + MainMethodArgumentSource() { this.asParameter().getFunction() instanceof TopLevel } +} + +/** + * A data flow source that represents the access of a value from the Windows registry. + */ +abstract class WindowsRegistrySource extends LocalFlowSource { + override string getThreatModel() { result = "windows-registry" } + + override string getSourceType() { result = "a value from the Windows registry" } +} + +private class ExternalWindowsRegistrySource extends WindowsRegistrySource { + ExternalWindowsRegistrySource() { + this = ModelOutput::getASourceNode("windows-registry", _).asSource() + } +} + +/** + * A dataflow source that represents the reading from stdin. + */ +abstract class StdinSource extends LocalFlowSource { + override string getThreatModel() { result = "stdin" } + + override string getSourceType() { result = "read from stdin" } +} + +private class ExternalStdinSource extends StdinSource { + ExternalStdinSource() { this = ModelOutput::getASourceNode("stdin", _).asSource() } +} diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/Remote.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/Remote.qll new file mode 100644 index 000000000000..f6b4edd93cb6 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/flowsources/Remote.qll @@ -0,0 +1,33 @@ +/** + * Provides an extension point for modeling user-controlled data. + * Such data is often used as data-flow sources in security queries. + */ + +private import semmle.code.powershell.dataflow.internal.DataFlowPublic as DataFlow +// Need to import since frameworks can extend `RemoteFlowSource::Range` +private import semmle.code.powershell.Frameworks + +/** + * A data flow source of remote user input. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RemoteFlowSource::Range` instead. + */ +class RemoteFlowSource extends DataFlow::Node instanceof RemoteFlowSource::Range { + /** Gets a string that describes the type of this remote flow source. */ + string getSourceType() { result = super.getSourceType() } +} + +/** Provides a class for modeling new sources of remote user input. */ +module RemoteFlowSource { + /** + * A data flow source of remote user input. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RemoteFlowSource` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets a string that describes the type of this remote flow source. */ + abstract string getSourceType(); + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowDispatch.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowDispatch.qll index 9eea2e314834..4c397fbdd9a6 100644 --- a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowDispatch.qll +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowDispatch.qll @@ -3,6 +3,8 @@ private import semmle.code.powershell.Cfg private import DataFlowPrivate private import DataFlowPublic private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl +private import FlowSummaryImpl as FlowSummaryImpl +private import semmle.code.powershell.dataflow.FlowSummary private import codeql.util.Boolean private import codeql.util.Unit @@ -38,6 +40,15 @@ abstract class LibraryCallable extends string { /** Gets a call to this library callable. */ Call getACall() { none() } + + /** Same as `getACall()` except this does not depend on the call graph or API graph. */ + Call getACallSimple() { none() } +} + +/** A callable defined in library code, which should be taken into account in type tracking. */ +abstract class LibraryCallableToIncludeInTypeTracking extends LibraryCallable { + bindingset[this] + LibraryCallableToIncludeInTypeTracking() { exists(this) } } /** @@ -108,6 +119,24 @@ abstract class DataFlowCall extends TDataFlowCall { } } +class SummaryCall extends DataFlowCall, TSummaryCall { + private FlowSummaryImpl::Public::SummarizedCallable c; + private FlowSummaryImpl::Private::SummaryNode receiver; + + SummaryCall() { this = TSummaryCall(c, receiver) } + + /** Gets the data flow node that this call targets. */ + FlowSummaryImpl::Private::SummaryNode getReceiver() { result = receiver } + + override DataFlowCallable getEnclosingCallable() { result.asLibraryCallable() = c } + + override CfgNodes::CallCfgNode asCall() { none() } + + override string toString() { result = "[summary] call to " + receiver + " in " + c } + + override EmptyLocation getLocation() { any() } +} + class NormalCall extends DataFlowCall, TNormalCall { private CfgNodes::CallCfgNode c; @@ -134,6 +163,9 @@ private module TrackInstanceInput implements CallGraphConstruction::InputSig { or start.asExpr().(CfgNodes::ExprNodes::TypeNameCfgNode).getTypeName() = typename and exact = true + or + start.asParameter().getStaticType() = typename and + exact = false } newtype State = additional MkState(string typename, Boolean exact) { start0(_, typename, exact) } @@ -174,12 +206,20 @@ Node trackInstance(string typename, boolean exact) { exact)) } +private Type getTypeWithName(string s, boolean exact) { + result.getName() = s and + exact = true + or + result.getASubtype+().getName() = s and + exact = false +} + private CfgScope getTargetInstance(CfgNodes::CallCfgNode call) { // TODO: Also match argument/parameter types - exists(Node receiver, string method, string typename, Type t | + exists(Node receiver, string method, string typename, Type t, boolean exact | qualifiedCall(call, receiver, method) and - receiver = trackInstance(typename, _) and - t.getName() = typename + receiver = trackInstance(typename, exact) and + t = getTypeWithName(typename, exact) | if method = "new" then result = t.getAConstructor().getBody() @@ -199,6 +239,14 @@ class AdditionalCallTarget extends Unit { abstract DataFlowCallable viableTarget(CfgNodes::CallCfgNode call); } +/** Holds if `call` may resolve to the returned summarized library method. */ +DataFlowCallable viableLibraryCallable(DataFlowCall call) { + exists(LibraryCallable callable | + result = TLibraryCallable(callable) and + call.asCall().getAstNode() = [callable.getACall(), callable.getACallSimple()] + ) +} + cached private module Cached { cached @@ -207,7 +255,13 @@ private module Cached { TLibraryCallable(LibraryCallable callable) cached - newtype TDataFlowCall = TNormalCall(CfgNodes::CallCfgNode c) + newtype TDataFlowCall = + TNormalCall(CfgNodes::CallCfgNode c) or + TSummaryCall( + FlowSummaryImpl::Public::SummarizedCallable c, FlowSummaryImpl::Private::SummaryNode receiver + ) { + FlowSummaryImpl::Private::summaryCallbackRange(c, receiver) + } /** Gets a viable run-time target for the call `call`. */ cached @@ -223,12 +277,18 @@ private module Cached { cached newtype TArgumentPosition = TThisArgumentPosition() or - TKeywordArgumentPosition(string name) { name = any(Argument p).getName() } or + TKeywordArgumentPosition(string name) { + name = any(Argument p).getName() + or + FlowSummaryImpl::ParsePositions::isParsedKeywordParameterPosition(_, name) + } or TPositionalArgumentPosition(int pos, NamedSet ns) { exists(CfgNodes::CallCfgNode call | call = ns.getABindingCall() and exists(call.getArgument(pos)) ) + or + FlowSummaryImpl::ParsePositions::isParsedParameterPosition(_, pos) } or TPipelineArgumentPosition() diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll index 67ee749b771e..d61db2b6e567 100644 --- a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPrivate.qll @@ -6,6 +6,8 @@ private import semmle.code.powershell.dataflow.Ssa private import DataFlowPublic private import DataFlowDispatch private import SsaImpl as SsaImpl +private import FlowSummaryImpl as FlowSummaryImpl +private import semmle.code.powershell.frameworks.data.ModelsAsData /** Gets the callable in which this node occurs. */ DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.(NodeImpl).getEnclosingCallable() } @@ -135,8 +137,25 @@ module LocalFlow { ) } + predicate flowSummaryLocalStep( + FlowSummaryNode nodeFrom, FlowSummaryNode nodeTo, FlowSummaryImpl::Public::SummarizedCallable c, + string model + ) { + FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom.getSummaryNode(), + nodeTo.getSummaryNode(), true, model) and + c = nodeFrom.getSummarizedCallable() + } + predicate localMustFlowStep(Node nodeFrom, Node nodeTo) { + SsaFlow::localMustFlowStep(_, nodeFrom, nodeTo) + or nodeFrom.asStmt() = nodeTo.asStmt().(CfgNodes::StmtNodes::AssignStmtCfgNode).getRightHandSide() + or + nodeFrom = + unique(FlowSummaryNode n1 | + FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(), + nodeTo.(FlowSummaryNode).getSummaryNode(), true, _) + ) } } @@ -168,18 +187,26 @@ private module Cached { n instanceof CfgNodes::ExprNodes::QualifierCfgNode or exists(CfgNodes::ExprNodes::MemberCfgNode member | - n = member.getBase() and + n = member.getQualifier() and not member.isStatic() ) or n = any(CfgNodes::ExprNodes::IndexCfgNode index).getBase() } or + TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn) or TPreReturnNodeImpl(CfgNodes::AstCfgNode n, Boolean isArray) { isMultiReturned(n) } or TImplicitWrapNode(CfgNodes::AstCfgNode n, Boolean shouldWrap) { isMultiReturned(n) } or TReturnNodeImpl(CfgScope scope) or TProcessNode(ProcessBlock process) or TProcessPropertyByNameNode(PipelineByPropertyNameIteratorVariable iter) { isProcessPropertyByNameNode(iter, _) + } or + TScriptBlockNode(ScriptBlock scriptBlock) or + TForbiddenRecursionGuard() { + none() and + // We want to prune irrelevant models before materialising data flow nodes, so types contributed + // directly from CodeQL must expose their pruning info without depending on data flow nodes. + (any(ModelInput::TypeModel tm).isTypeUsed("") implies any()) } cached @@ -197,7 +224,14 @@ private module Cached { ( LocalFlow::localFlowStepCommon(nodeFrom, nodeTo) or - SsaFlow::localFlowStep(_, nodeFrom, nodeTo, _) + exists(SsaImpl::DefinitionExt def, boolean isUseStep | + SsaFlow::localFlowStep(def, nodeFrom, nodeTo, isUseStep) + | + isUseStep = false + or + isUseStep = true and + not FlowSummaryImpl::Private::Steps::prohibitsUseUseFlow(nodeFrom, _) + ) ) and model = "" } @@ -208,6 +242,10 @@ private module Cached { LocalFlow::localFlowStepCommon(nodeFrom, nodeTo) or SsaFlow::localFlowStep(_, nodeFrom, nodeTo, _) + or + // Simple flow through library code is included in the exposed local + // step relation, even though flow is technically inter-procedural + FlowSummaryImpl::Private::Steps::summaryThroughStepValue(nodeFrom, nodeTo, _) } /** @@ -255,7 +293,15 @@ private module Cached { n instanceof ParameterNode or // Expressions that can't be reached from another entry definition or expression - n instanceof ExprNode and + ( + n instanceof ExprNode + or + exists(CfgNodes::StmtNodes::AssignStmtCfgNode assign | assign.getRightHandSide() = n.asStmt()) + or + n.asStmt() instanceof CfgNodes::StmtNodes::CmdCfgNode + or + exists(CfgNodes::StmtNodes::PipelineCfgNode pipeline | n.asStmt() = pipeline.getAComponent()) + ) and not reachedFromExprOrEntrySsaDef(n) or // Ensure all entry SSA definitions are local sources, except those that correspond @@ -442,11 +488,20 @@ class NamedSet extends NamedSet0 { } } +NamedSet emptyNamedSet() { result.isEmpty() } + private module ParameterNodes { abstract class ParameterNodeImpl extends NodeImpl { abstract Parameter getParameter(); abstract predicate isParameterOf(DataFlowCallable c, ParameterPosition pos); + + final predicate isSourceParameterOf(CfgScope c, ParameterPosition pos) { + exists(DataFlowCallable callable | + this.isParameterOf(callable, pos) and + c = callable.asCfgScope() + ) + } } /** @@ -514,15 +569,52 @@ private module ParameterNodes { string getPropretyName() { result = this.getParameter().getName() } } + + /** A parameter for a library callable with a flow summary. */ + class SummaryParameterNode extends ParameterNodeImpl, FlowSummaryNode { + private ParameterPosition pos_; + + SummaryParameterNode() { + FlowSummaryImpl::Private::summaryParameterNode(this.getSummaryNode(), pos_) + } + + override Parameter getParameter() { none() } + + override predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { + this.getSummarizedCallable() = c.asLibraryCallable() and pos = pos_ + } + } } import ParameterNodes +/** A data-flow node used to model flow summaries. */ +class FlowSummaryNode extends NodeImpl, TFlowSummaryNode { + FlowSummaryImpl::Private::SummaryNode getSummaryNode() { this = TFlowSummaryNode(result) } + + /** Gets the summarized callable that this node belongs to. */ + FlowSummaryImpl::Public::SummarizedCallable getSummarizedCallable() { + result = this.getSummaryNode().getSummarizedCallable() + } + + override CfgScope getCfgScope() { none() } + + override DataFlowCallable getEnclosingCallable() { + result.asLibraryCallable() = this.getSummarizedCallable() + } + + override EmptyLocation getLocationImpl() { any() } + + override string toStringImpl() { result = this.getSummaryNode().toString() } +} + /** A data-flow node that represents a call argument. */ abstract class ArgumentNode extends Node { /** Holds if this argument occurs at the given position in the given call. */ abstract predicate argumentOf(DataFlowCall call, ArgumentPosition pos); + abstract predicate sourceArgumentOf(CfgNodes::CallCfgNode call, ArgumentPosition pos); + /** Gets the call in which this node is an argument. */ final DataFlowCall getCall() { this.argumentOf(result, _) } } @@ -534,13 +626,17 @@ module ArgumentNodes { ExplicitArgumentNode() { this.asExpr() = arg } override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - arg.getCall() = call.asCall() and + this.sourceArgumentOf(call.asCall(), pos) + } + + override predicate sourceArgumentOf(CfgNodes::CallCfgNode call, ArgumentPosition pos) { + arg.getCall() = call and ( pos.isKeyword(arg.getName()) or exists(NamedSet ns, int i | i = arg.getPosition() and - ns.getAnExactBindingCall() = call.asCall() and + ns.getAnExactBindingCall() = call and pos.isPositional(i, ns) ) or @@ -565,10 +661,29 @@ module ArgumentNodes { PipelineArgumentNode() { isPipelineInput(this.getStmtNode(), consumer) } override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { - call.asCall() = consumer and + this.sourceArgumentOf(call.asCall(), pos) + } + + override predicate sourceArgumentOf(CfgNodes::CallCfgNode call, ArgumentPosition pos) { + call = consumer and pos.isPipeline() } } + + private class SummaryArgumentNode extends FlowSummaryNode, ArgumentNode { + private FlowSummaryImpl::Private::SummaryNode receiver; + private ArgumentPosition pos_; + + SummaryArgumentNode() { + FlowSummaryImpl::Private::summaryArgumentNode(receiver, this.getSummaryNode(), pos_) + } + + override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { + call.(SummaryCall).getReceiver() = receiver and pos = pos_ + } + + override predicate sourceArgumentOf(CfgNodes::CallCfgNode call, ArgumentPosition pos) { none() } + } } import ArgumentNodes @@ -601,6 +716,14 @@ private module EscapeContainer { this.getAChild().(EscapeContainer).mayBeMultiReturned(n) } } + + private class SummaryReturnNode extends FlowSummaryNode, ReturnNode { + private ReturnKind rk; + + SummaryReturnNode() { FlowSummaryImpl::Private::summaryReturnNode(this.getSummaryNode(), rk) } + + override ReturnKind getKind() { result = rk } + } } private module ReturnNodes { @@ -655,12 +778,24 @@ private module OutNodes { kind instanceof NormalReturnKind } } + + private class SummaryOutNode extends FlowSummaryNode, OutNode { + private SummaryCall call; + private ReturnKind kind_; + + SummaryOutNode() { + FlowSummaryImpl::Private::summaryOutNode(call.getReceiver(), this.getSummaryNode(), kind_) + } + + override DataFlowCall getCall(ReturnKind kind) { result = call and kind = kind_ } + } } import OutNodes predicate jumpStep(Node pred, Node succ) { - none() // TODO + FlowSummaryImpl::Private::Steps::summaryJumpStep(pred.(FlowSummaryNode).getSummaryNode(), + succ.(FlowSummaryNode).getSummaryNode()) } /** @@ -669,7 +804,7 @@ predicate jumpStep(Node pred, Node succ) { */ predicate storeStep(Node node1, ContentSet c, Node node2) { exists(CfgNodes::ExprNodes::MemberCfgWriteAccessNode var, Content::FieldContent fc | - node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getBase() and + node2.(PostUpdateNode).getPreUpdateNode().asExpr() = var.getQualifier() and node1.asStmt() = var.getAssignStmt().getRightHandSide() and fc.getName() = var.getMemberName() and c.isSingleton(fc) @@ -727,6 +862,9 @@ predicate storeStep(Node node1, ContentSet c, Node node2) { c.isAnyElement() and node2.(ReturnNodeImpl).getCfgScope() = cfgNode.getScope() ) + or + FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, + node2.(FlowSummaryNode).getSummaryNode()) } /** @@ -735,7 +873,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) { predicate readStep(Node node1, ContentSet c, Node node2) { exists(CfgNodes::ExprNodes::MemberCfgReadAccessNode var, Content::FieldContent fc | node2.asExpr() = var and - node1.asExpr() = var.getBase() and + node1.asExpr() = var.getQualifier() and fc.getName() = var.getMemberName() and c.isSingleton(fc) ) @@ -787,6 +925,9 @@ predicate readStep(Node node1, ContentSet c, Node node2) { def.getSourceVariable() = node1.(PipelineByPropertyNameParameterNode).getParameter() and SsaImpl::firstRead(def, node2.asExpr()) ) + or + FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, + node2.(FlowSummaryNode).getSummaryNode()) } /** @@ -795,6 +936,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) { * in `x.f = newValue`. */ predicate clearsContent(Node n, ContentSet c) { + FlowSummaryImpl::Private::Steps::summaryClearsContent(n.(FlowSummaryNode).getSummaryNode(), c) + or c.isSingleton(any(Content::FieldContent fc)) and n = any(PostUpdateNode pun | storeStep(_, c, pun)).getPreUpdateNode() or @@ -807,6 +950,8 @@ predicate clearsContent(Node n, ContentSet c) { * at node `n`. */ predicate expectsContent(Node n, ContentSet c) { + FlowSummaryImpl::Private::Steps::summaryExpectsContent(n.(FlowSummaryNode).getSummaryNode(), c) + or n = TPreReturnNodeImpl(_, true) and c.isKnownOrUnknownElement(any(Content::KnownElementContent ec | exists(ec.getIndex().asInt()))) or @@ -871,6 +1016,16 @@ private module PostUpdateNodes { override string toStringImpl() { result = "[post] " + e.toString() } } + + private class SummaryPostUpdateNode extends FlowSummaryNode, PostUpdateNodeImpl { + private FlowSummaryNode pre; + + SummaryPostUpdateNode() { + FlowSummaryImpl::Private::summaryPostUpdateNode(this.getSummaryNode(), pre.getSummaryNode()) + } + + override Node getPreUpdateNode() { result = pre } + } } private import PostUpdateNodes @@ -976,6 +1131,22 @@ private class ProcessPropertyByNameNode extends TProcessPropertyByNameNode, Node } } +class ScriptBlockNode extends TScriptBlockNode, NodeImpl { + private ScriptBlock scriptBlock; + + ScriptBlockNode() { this = TScriptBlockNode(scriptBlock) } + + ScriptBlock getScriptBlock() { result = scriptBlock } + + override CfgScope getCfgScope() { result = scriptBlock } + + override Location getLocationImpl() { result = scriptBlock.getLocation() } + + override string toStringImpl() { result = scriptBlock.toString() } + + override predicate nodeIsHidden() { any() } +} + /** A node that performs a type cast. */ class CastNode extends Node { CastNode() { none() } @@ -1028,9 +1199,13 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { /** Extra data-flow steps needed for lambda flow analysis. */ predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() } -predicate knownSourceModel(Node source, string model) { none() } +predicate knownSourceModel(Node source, string model) { + source = ModelOutput::getASourceNode(_, model).asSource() +} -predicate knownSinkModel(Node sink, string model) { none() } +predicate knownSinkModel(Node sink, string model) { + sink = ModelOutput::getASinkNode(_, model).asSink() +} class DataFlowSecondLevelScope = Unit; @@ -1042,7 +1217,10 @@ class DataFlowSecondLevelScope = Unit; * by default as a heuristic. */ predicate allowParameterReturnInSelf(ParameterNodeImpl p) { - none() // TODO + exists(DataFlowCallable c, ParameterPosition pos | + p.isParameterOf(c, pos) and + FlowSummaryImpl::Private::summaryAllowParameterReturnInSelf(c.asLibraryCallable(), pos) + ) } /** An approximated `Content`. */ diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll index 7c2e37eb6eac..3978e8085c5b 100644 --- a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/DataFlowPublic.qll @@ -1,6 +1,8 @@ private import powershell private import DataFlowDispatch private import DataFlowPrivate +private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl +private import semmle.code.powershell.ApiGraphs private import semmle.code.powershell.Cfg /** @@ -13,6 +15,8 @@ class Node extends TNode { CfgNodes::StmtCfgNode asStmt() { result = this.(StmtNode).getStmtNode() } + ScriptBlock asCallable() { result = this.(CallableNode).asCallableAstNode() } + /** Gets the parameter corresponding to this node, if any. */ Parameter asParameter() { result = this.(ParameterNode).getParameter() } @@ -27,6 +31,12 @@ class Node extends TNode { */ Node getAPredecessor() { localFlowStep(result, this) } + /** + * Gets a local source node from which data may flow to this node in zero or + * more local data-flow steps. + */ + LocalSourceNode getALocalSource() { result.flowsTo(this) } + /** * Gets a data flow node to which data may flow from this node in one local step. */ @@ -86,11 +96,81 @@ class ParameterNode extends Node { final Parameter getParameter() { result = getParameter(this) } } +/** + * A data flow node corresponding to a method, block, or lambda expression. + */ +class CallableNode extends Node instanceof ScriptBlockNode { + private ParameterPosition getParameterPosition(ParameterNodeImpl node) { + exists(DataFlowCallable c | + c.asCfgScope() = this.asCallableAstNode() and + result = getParameterPosition(node, c) + ) + } + + /** Gets the underlying AST node as a `Callable`. */ + ScriptBlock asCallableAstNode() { result = super.getScriptBlock() } + + /** Gets the `n`th positional parameter. */ + ParameterNode getParameter(int n) { + this.getParameterPosition(result).isPositional(n, emptyNamedSet()) + } + + /** Gets the number of positional parameters of this callable. */ + final int getNumberOfParameters() { result = count(this.getParameter(_)) } + + /** Gets the keyword parameter of the given name. */ + ParameterNode getKeywordParameter(string name) { + this.getParameterPosition(result).isKeyword(name) + } + + /** + * Gets a data flow node whose value is about to be returned by this callable. + */ + Node getAReturnNode() { result = getAReturnNode(this.asCallableAstNode()) } +} + /** * A data-flow node that is a source of local flow. */ class LocalSourceNode extends Node { LocalSourceNode() { isLocalSourceNode(this) } + + /** Starts tracking this node forward using API graphs. */ + pragma[inline] + API::Node track() { result = API::Internal::getNodeForForwardTracking(this) } + + /** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */ + pragma[inline] + predicate flowsTo(Node nodeTo) { flowsTo(this, nodeTo) } + + /** + * Gets a node that this node may flow to using one heap and/or interprocedural step. + * + * See `TypeTracker` for more details about how to use this. + */ + pragma[inline] + LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) } + + /** + * Gets a node that may flow into this one using one heap and/or interprocedural step. + * + * See `TypeBackTracker` for more details about how to use this. + */ + pragma[inline] + LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t = t2.step(result, this) } + + /** + * Gets a node to which data may flow from this node in zero or + * more local data-flow steps. + */ + pragma[inline] + Node getALocalUse() { flowsTo(this, result) } + + /** Gets a method call where this node flows to the receiver. */ + CallNode getAMethodCall() { Cached::hasMethodCall(this, result, _) } + + /** Gets a call to a method named `name`, where this node flows to the receiver. */ + CallNode getAMethodCall(string name) { Cached::hasMethodCall(this, result, name) } } /** @@ -115,9 +195,18 @@ class PostUpdateNode extends Node { cached private module Cached { + cached + predicate hasMethodCall(LocalSourceNode source, CallNode call, string name) { + source.flowsTo(call.getQualifier()) and + call.getName() = name + } + cached CfgScope getCfgScope(NodeImpl node) { result = node.getCfgScope() } + cached + ReturnNode getAReturnNode(ScriptBlock scriptBlock) { getCfgScope(result) = scriptBlock } + cached Parameter getParameter(ParameterNodeImpl param) { result = param.getParameter() } @@ -126,6 +215,11 @@ private module Cached { param.isParameterOf(c, result) } + cached + ParameterPosition getSourceParameterPosition(ParameterNodeImpl param, ScriptBlock c) { + param.isSourceParameterOf(c, result) + } + cached Node getPreUpdateNode(PostUpdateNodeImpl node) { result = node.getPreUpdateNode() } @@ -210,6 +304,23 @@ module Content { override string toString() { result = name } } + + /** Gets the element content corresponding to constant value `cv`. */ + ElementContent getElementContent(ConstantValue cv) { + result = TKnownElementContent(cv) + or + not exists(TKnownElementContent(cv)) and + result = TUnknownElementContent() + } + + /** + * Gets the constant value of `e`, which corresponds to a valid known + * element index. Unlike calling simply `e.getConstantValue()`, this + * excludes negative array indices. + */ + ConstantValue getKnownElementIndex(Expr e) { + result = getElementContent(e.getValue()).(KnownElementContent).getIndex() + } } /** @@ -321,6 +432,8 @@ class ObjectCreationNode extends Node { } final CfgNodes::ObjectCreationCfgNode getObjectCreationNode() { result = objectCreation } + + string getConstructedTypeName() { result = this.getObjectCreationNode().getConstructedTypeName() } } /** A call, viewed as a node in a data flow graph. */ @@ -330,4 +443,10 @@ class CallNode extends AstNode { CallNode() { call = this.getCfgNode() } CfgNodes::CallCfgNode getCallNode() { result = call } + + string getName() { result = call.getName() } + + Node getQualifier() { result.asExpr() = call.getQualifier() } + + int getNumberOfArguments() { result = call.getNumberOfArguments() } } diff --git a/powershell/ql/lib/semmle/code/powershell/dataflow/internal/FlowSummaryImpl.qll b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/FlowSummaryImpl.qll new file mode 100644 index 000000000000..e35e33c4b5e4 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/dataflow/internal/FlowSummaryImpl.qll @@ -0,0 +1,232 @@ +/** + * Provides classes and predicates for defining flow summaries. + */ + +private import codeql.dataflow.internal.FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax as AccessPath +private import powershell +private import semmle.code.powershell.dataflow.internal.DataFlowImplSpecific as DataFlowImplSpecific +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public + +module Input implements InputSig { + class SummarizedCallableBase = string; + + ArgumentPosition callbackSelfParameterPosition() { none() } + + ReturnKind getStandardReturnValueKind() { result instanceof NormalReturnKind } + + string encodeParameterPosition(ParameterPosition pos) { + exists(int i | + pos.isPositional(i, emptyNamedSet()) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = "-" + name + ) + or + pos.isThis() and + result = "this" + } + + string encodeArgumentPosition(ArgumentPosition pos) { + pos.isThis() and result = "this" + or + exists(int i | + pos.isPositional(i, emptyNamedSet()) and + result = i.toString() + ) + or + exists(string name | + pos.isKeyword(name) and + result = "-" + name + ) + } + + string encodeContent(ContentSet cs, string arg) { + exists(Content c | cs = TSingletonContent(c) | + c = TFieldContent(arg) and result = "Field" + or + exists(ConstantValue cv | + c = TKnownElementContent(cv) and + result = "Element" and + arg = cv.serialize() + "!" + ) + or + c = TUnknownElementContent() and result = "Element" and arg = "?" + ) + or + cs = TAnyElementContent() and result = "Element" and arg = "any" + or + exists(Content::KnownElementContent kec | + cs = TKnownOrUnknownElementContent(kec) and + result = "Element" and + arg = kec.getIndex().serialize() + ) + } + + string encodeReturn(ReturnKind rk, string arg) { + not rk = Input::getStandardReturnValueKind() and + result = "ReturnValue" and + arg = rk.toString() + } + + string encodeWithoutContent(ContentSet c, string arg) { + result = "Without" + encodeContent(c, arg) + } + + string encodeWithContent(ContentSet c, string arg) { result = "With" + encodeContent(c, arg) } + + bindingset[token] + ParameterPosition decodeUnknownParameterPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Argument[x..y]` ranges + token.getName() = "Argument" and + result.isPositional(AccessPath::parseInt(token.getAnArgument()), emptyNamedSet()) + } + + bindingset[token] + ArgumentPosition decodeUnknownArgumentPosition(AccessPath::AccessPathTokenBase token) { + // needed to support `Parameter[x..y]` ranges + token.getName() = "Parameter" and + result.isPositional(AccessPath::parseInt(token.getAnArgument()), emptyNamedSet()) + } + + bindingset[token] + ContentSet decodeUnknownContent(AccessPath::AccessPathTokenBase token) { + token.getName() = "Element" and + result = TSingletonContent(TUnknownElementContent()) + } + + bindingset[token] + ContentSet decodeUnknownWithContent(AccessPath::AccessPathTokenBase token) { + token.getName() = "WithElement" and + result = TAnyElementContent() + } +} + +private import Make as Impl + +private module StepsInput implements Impl::Private::StepsInputSig { + DataFlowCall getACall(Public::SummarizedCallable sc) { + result.asCall().getAstNode() = sc.(LibraryCallable).getACall() + or + result.asCall().getAstNode() = sc.(LibraryCallable).getACallSimple() + } +} + +module Private { + import Impl::Private + + module Steps = Impl::Private::Steps; + + /** + * Provides predicates for constructing summary components. + */ + module SummaryComponent { + private import Impl::Private::SummaryComponent as SC + + predicate parameter = SC::parameter/1; + + predicate argument = SC::argument/1; + + predicate content = SC::content/1; + + predicate withoutContent = SC::withoutContent/1; + + predicate withContent = SC::withContent/1; + + /** Gets a summary component that represents a receiver. */ + SummaryComponent receiver() { result = argument(any(ParameterPosition pos | pos.isThis())) } + + /** Gets a summary component that represents an element in a collection at an unknown index. */ + SummaryComponent elementUnknown() { + result = SC::content(TSingletonContent(TUnknownElementContent())) + } + + /** Gets a summary component that represents an element in a collection at a known index. */ + SummaryComponent elementKnown(ConstantValue cv) { + result = SC::content(TSingletonContent(Content::getElementContent(cv))) + } + + /** + * Gets a summary component that represents an element in a collection at a specific + * known index `cv`, or an unknown index. + */ + SummaryComponent elementKnownOrUnknown(ConstantValue cv) { + result = SC::content(TKnownOrUnknownElementContent(TKnownElementContent(cv))) + or + not exists(TKnownElementContent(cv)) and + result = elementUnknown() + } + + /** + * Gets a summary component that represents an element in a collection at either an unknown + * index or known index. This has the same semantics as + * + * ```ql + * elementKnown() or elementUnknown(_) + * ``` + * + * but is more efficient, because it is represented by a single value. + */ + SummaryComponent elementAny() { result = SC::content(TAnyElementContent()) } + + /** Gets a summary component that represents the return value of a call. */ + SummaryComponent return() { result = SC::return(any(NormalReturnKind rk)) } + } + + /** + * Provides predicates for constructing stacks of summary components. + */ + module SummaryComponentStack { + private import Impl::Private::SummaryComponentStack as SCS + + predicate singleton = SCS::singleton/1; + + predicate push = SCS::push/2; + + predicate argument = SCS::argument/1; + + /** Gets a singleton stack representing a receiver. */ + SummaryComponentStack receiver() { result = singleton(SummaryComponent::receiver()) } + + /** Gets a singleton stack representing the return value of a call. */ + SummaryComponentStack return() { result = singleton(SummaryComponent::return()) } + } +} + +module Public = Impl::Public; + +module ParsePositions { + private import Private + + private predicate isParamBody(string body) { + body = any(AccessPathToken tok).getAnArgument("Parameter") + } + + private predicate isArgBody(string body) { + body = any(AccessPathToken tok).getAnArgument("Argument") + } + + predicate isParsedParameterPosition(string c, int i) { + isParamBody(c) and + i = AccessPath::parseInt(c) + } + + predicate isParsedArgumentPosition(string c, int i) { + isArgBody(c) and + i = AccessPath::parseInt(c) + } + + predicate isParsedKeywordParameterPosition(string c, string paramName) { + isParamBody(c) and + c = paramName + ":" + } + + predicate isParsedKeywordArgumentPosition(string c, string paramName) { + isArgBody(c) and + c = paramName + ":" + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftPowershellUtility/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftPowershellUtility/model.yml new file mode 100644 index 000000000000..3db1b7e4d586 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftPowershellUtility/model.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["Microsoft.PowerShell.Utility", "Method[Read-Host].ReturnValue", "stdin"] diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftWin32Registry/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftWin32Registry/model.yml new file mode 100644 index 000000000000..567209acd712 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftWin32Registry/model.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["Microsoft.Win32.Registry", "Method[GetValue]", "windows-registry"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftWin32RegistryKey/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftWin32RegistryKey/model.yml new file mode 100644 index 000000000000..af7e5e3608d9 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/MicrosoftWin32RegistryKey/model.yml @@ -0,0 +1,8 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["Microsoft.Win32.RegistryKey", "Instance.Method[GetValue].ReturnValue", "windows-registry"] + - ["Microsoft.Win32.RegistryKey", "Instance.Method[GetValueNames].ReturnValue", "windows-registry"] + - ["Microsoft.Win32.RegistryKey", "Instance.Method[GetSubKeyNames].ReturnValue", "windows-registry"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemConsole/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemConsole/model.yml new file mode 100644 index 000000000000..afc66e43f9f2 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemConsole/model.yml @@ -0,0 +1,8 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.Console", "Method[Read].ReturnValue", "stdin"] + - ["System.Console", "Method[ReadKey].ReturnValue", "stdin"] + - ["System.Console", "Method[ReadLine].ReturnValue", "stdin"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemEnvironment/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemEnvironment/model.yml new file mode 100644 index 000000000000..4d5a13178aa5 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemEnvironment/model.yml @@ -0,0 +1,9 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.Environment", "Method[ExpandEnvironmentVariables].ReturnValue", "environment"] + - ["System.Environment", "Method[GetCommandLineArgs].ReturnValue", "commandargs"] + - ["System.Environment", "Method[GetEnvironmentVariable].ReturnValue", "environment"] + - ["System.Environment", "Method[GetEnvironmentVariables].ReturnValue", "environment"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFile/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFile/model.yml new file mode 100644 index 000000000000..01a5ee388706 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFile/model.yml @@ -0,0 +1,21 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.IO.File", "Method[AppendText].ReturnValue", "file-write"] + - ["System.IO.File", "Method[Create].ReturnValue", "file-write"] + - ["System.IO.File", "Method[CreateText].ReturnValue", "file-write"] + - ["System.IO.File", "Method[Open].ReturnValue", "file-write"] + - ["System.IO.File", "Method[Open].ReturnValue", "file"] + - ["System.IO.File", "Method[OpenRead].ReturnValue", "file"] + - ["System.IO.File", "Method[OpenText].ReturnValue", "file"] + - ["System.IO.File", "Method[OpenWrite].ReturnValue", "file-write"] + - ["System.IO.File", "Method[ReadAllBytes].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadAllBytesAsync].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadAllLines].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadAllLinesAsync].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadAllText].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadAllTextAsync].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadLines].ReturnValue", "file"] + - ["System.IO.File", "Method[ReadLinesAsync].ReturnValue", "file"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFileInfo/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFileInfo/model.yml new file mode 100644 index 000000000000..c10519e9ef76 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFileInfo/model.yml @@ -0,0 +1,13 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.IO.FileInfo", "Method[AppendText].ReturnValue", "file-write"] + - ["System.IO.FileInfo", "Method[Create].ReturnValue", "file-write"] + - ["System.IO.FileInfo", "Method[CreateText].ReturnValue", "file-write"] + - ["System.IO.FileInfo", "Method[Open].ReturnValue", "file-write"] + - ["System.IO.FileInfo", "Method[Open].ReturnValue", "file"] + - ["System.IO.FileInfo", "Method[OpenRead].ReturnValue", "file"] + - ["System.IO.FileInfo", "Method[OpenText].ReturnValue", "file"] + - ["System.IO.FileInfo", "Method[OpenWrite].ReturnValue", "file-write"] diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFileStream/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFileStream/model.yml new file mode 100644 index 000000000000..7d1fc1d93b20 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOFileStream/model.yml @@ -0,0 +1,7 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.IO.FileStream", "Instance", "file"] + - ["System.IO.FileStream", "Instance", "file-write"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOStreamWriter/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOStreamWriter/model.yml new file mode 100644 index 000000000000..20524b44d613 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemIOStreamWriter/model.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.IO.StreamWriter", "Instance", "file-write"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/SystemNetSockets/model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemNetSockets/model.yml new file mode 100644 index 000000000000..196d6838ce28 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/SystemNetSockets/model.yml @@ -0,0 +1,9 @@ +extensions: + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: + - ["System.Net.Sockets.TcpClient", "Instance.Method[GetStream].ReturnValue", "remote"] + - ["System.Net.Sockets.UpdClient", "Instance.Method[EndReceive].ReturnValue", "remote"] + - ["System.Net.Sockets.UpdClient", "Instance.Method[Receive].ReturnValue", "remote"] + - ["System.Net.Sockets.UpdClient", "Instance.Method[ReceiveAsync].ReturnValue", "remote"] \ No newline at end of file diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/ModelsAsData.qll b/powershell/ql/lib/semmle/code/powershell/frameworks/data/ModelsAsData.qll new file mode 100644 index 000000000000..ddacb7b2d258 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/ModelsAsData.qll @@ -0,0 +1,51 @@ +/** + * Provides classes for contributing a model, or using the interpreted results + * of a model represented as data. + */ + +private import powershell +private import semmle.code.powershell.ApiGraphs +private import internal.ApiGraphModels as Shared +private import internal.ApiGraphModelsSpecific as Specific +import Shared::ModelInput as ModelInput +import Shared::ModelOutput as ModelOutput +private import semmle.code.powershell.dataflow.flowsources.FlowSources +private import semmle.code.powershell.dataflow.FlowSummary + +/** + * A remote flow source originating from a CSV source row. + */ +private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range { + RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").asSource() } + + override string getSourceType() { result = "Remote flow (from model)" } +} + +private class SummarizedCallableFromModel extends SummarizedCallable { + string type; + string path; + + SummarizedCallableFromModel() { + ModelOutput::relevantSummaryModel(type, path, _, _, _, _) and + this = type + ";" + path + } + + override Call getACall() { + exists(API::MethodAccessNode base | + ModelOutput::resolvedSummaryBase(type, path, base) and + result = base.asCall().asExpr().getExpr() + ) + } + + override predicate propagatesFlow( + string input, string output, boolean preservesValue, string model + ) { + exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind, model) | + kind = "value" and + preservesValue = true + or + kind = "taint" and + preservesValue = false + ) + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/empty.model.yml b/powershell/ql/lib/semmle/code/powershell/frameworks/data/empty.model.yml new file mode 100644 index 000000000000..63cc261eb306 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/empty.model.yml @@ -0,0 +1,32 @@ +extensions: + # Make sure that the extensible model predicates have at least one definition + # to avoid errors about undefined extensionals. + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sourceModel + data: [] + + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: sinkModel + data: [] + + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: summaryModel + data: [] + + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: neutralModel + data: [] + + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: typeModel + data: [] + + - addsTo: + pack: microsoft-sdl/powershell-all + extensible: typeVariableModel + data: [] diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModels.qll b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModels.qll new file mode 100644 index 000000000000..aec65d1819cf --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModels.qll @@ -0,0 +1,634 @@ +/** + * INTERNAL use only. This is an experimental API subject to change without notice. + * + * Provides classes and predicates for dealing with flow models specified in extensible predicates. + * + * The extensible predicates have the following columns: + * - Sources: + * `type, path, kind` + * - Sinks: + * `type, path, kind` + * - Summaries: + * `type, path, input, output, kind` + * - Types: + * `type1, type2, path` + * + * The interpretation of a row is similar to API-graphs with a left-to-right + * reading. + * 1. The `type` column selects all instances of a named type. The syntax of this column is language-specific. + * The language defines some type names that the analysis knows how to identify without models. + * It can also be a synthetic type name defined by a type definition (see type definitions below). + * 2. The `path` column is a `.`-separated list of "access path tokens" to resolve, starting at the node selected by `type`. + * + * Every language supports the following tokens: + * - Argument[n]: the n-th argument to a call. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * Additionally, `N-1` refers to the last argument, `N-2` refers to the second-last, and so on. + * - Parameter[n]: the n-th parameter of a callback. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * - ReturnValue: the value returned by a function call + * - WithArity[n]: match a call with the given arity. May be a range of form `x..y` (inclusive) and/or a comma-separated list. + * + * The following tokens are common and should be implemented for languages where it makes sense: + * - Member[x]: a member named `x`; exactly what a "member" is depends on the language. May be a comma-separated list of names. + * - Instance: an instance of a class + * - Subclass: a subclass of a class + * - ArrayElement: an element of array + * - Element: an element of a collection-like object + * - MapKey: a key in map-like object + * - MapValue: a value in a map-like object + * - Awaited: the value from a resolved promise/future-like object + * + * For the time being, please consult `ApiGraphModelsSpecific.qll` to see which language-specific tokens are currently supported. + * + * 3. The `input` and `output` columns specify how data enters and leaves the element selected by the + * first `(type, path)` tuple. Both strings are `.`-separated access paths + * of the same syntax as the `path` column. + * 4. The `kind` column is a tag that can be referenced from QL to determine to + * which classes the interpreted elements should be added. For example, for + * sources `"remote"` indicates a default remote flow source, and for summaries + * `"taint"` indicates a default additional taint step and `"value"` indicates a + * globally applicable value-preserving step. + * + * ### Types + * + * A type row of form `type1; type2; path` indicates that `type2; path` + * should be seen as an instance of the type `type1`. + * + * A type may refer to a static type or a synthetic type name used internally in the model. + * Synthetic type names can be used to reuse intermediate sub-paths, when there are multiple ways to access the same + * element. + * See `ModelsAsData.qll` for the language-specific interpretation of type names. + * + * By convention, if one wants to avoid clashes with static types, the type name + * should be prefixed with a tilde character (`~`). For example, `~Bar` can be used to indicate that + * the type is not intended to match a static type. + */ + +private import codeql.util.Unit +private import ApiGraphModelsSpecific as Specific + +private module API = Specific::API; + +private module DataFlow = Specific::DataFlow; + +private import semmle.code.powershell.controlflow.CfgNodes +private import ApiGraphModelsExtensions as Extensions +private import codeql.dataflow.internal.AccessPathSyntax + +/** Module containing hooks for providing input data to be interpreted as a model. */ +module ModelInput { + /** + * A unit class for adding additional type model rows from CodeQL models. + */ + class TypeModel extends Unit { + /** + * Holds if any of the other predicates in this class might have a result + * for the given `type`. + * + * The implementation of this predicate should not depend on `DataFlow::Node`. + */ + bindingset[type] + predicate isTypeUsed(string type) { none() } + + /** + * Gets a data-flow node that is a source of the given `type`. + * + * Note that `type` should also be included in `isTypeUsed`. + * + * This must not depend on API graphs, but ensures that an API node is generated for + * the source. + */ + DataFlow::Node getASource(string type) { none() } + + /** + * Gets a data-flow node that is a sink of the given `type`, + * usually because it is an argument passed to a parameter of that type. + * + * Note that `type` should also be included in `isTypeUsed`. + * + * This must not depend on API graphs, but ensures that an API node is generated for + * the sink. + */ + DataFlow::Node getASink(string type) { none() } + + /** + * Gets an API node that is a source or sink of the given `type`. + * + * Note that `type` should also be included in `isTypeUsed`. + * + * Unlike `getASource` and `getASink`, this may depend on API graphs. + */ + API::Node getAnApiNode(string type) { none() } + } +} + +private import ModelInput + +/** + * An empty class, except in specific tests. + * + * If this is non-empty, all models are parsed even if the type name is not + * considered relevant for the current database. + */ +abstract class TestAllModels extends Unit { } + +/** Holds if a source model exists for the given parameters. */ +predicate sourceModel(string type, string path, string kind, string model) { + exists(QlBuiltins::ExtensionId madId | + Extensions::sourceModel(type, path, kind, madId) and + model = "MaD:" + madId.toString() + ) +} + +/** Holds if a sink model exists for the given parameters. */ +private predicate sinkModel(string type, string path, string kind, string model) { + exists(QlBuiltins::ExtensionId madId | + Extensions::sinkModel(type, path, kind, madId) and + model = "MaD:" + madId.toString() + ) +} + +/** Holds if a summary model `row` exists for the given parameters. */ +private predicate summaryModel( + string type, string path, string input, string output, string kind, string model +) { + exists(QlBuiltins::ExtensionId madId | + Extensions::summaryModel(type, path, input, output, kind, madId) and + model = "MaD:" + madId.toString() + ) +} + +/** Holds if `(type2, path)` should be seen as an instance of `type1`. */ +predicate typeModel(string type1, string type2, string path) { + Extensions::typeModel(type1, type2, path) +} + +/** Holds if a type variable model exists for the given parameters. */ +private predicate typeVariableModel(string name, string path) { + Extensions::typeVariableModel(name, path) +} + +/** + * Holds if the given extension tuple `madId` should pretty-print as `model`. + * + * This predicate should only be used in tests. + */ +predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { + exists(string type, string path, string kind | + Extensions::sourceModel(type, path, kind, madId) and + model = "Source: " + type + "; " + path + "; " + kind + ) + or + exists(string type, string path, string kind | + Extensions::sinkModel(type, path, kind, madId) and + model = "Sink: " + type + "; " + path + "; " + kind + ) + or + exists(string type, string path, string input, string output, string kind | + Extensions::summaryModel(type, path, input, output, kind, madId) and + model = "Summary: " + type + "; " + path + "; " + input + "; " + output + "; " + kind + ) +} + +/** + * Holds if rows involving `type` might be relevant for the analysis of this database. + */ +predicate isRelevantType(string type) { + ( + sourceModel(type, _, _, _) or + sinkModel(type, _, _, _) or + summaryModel(type, _, _, _, _, _) or + typeModel(_, type, _) + ) and + ( + Specific::isTypeUsed(type) + or + any(TypeModel model).isTypeUsed(type) + or + exists(TestAllModels t) + ) + or + exists(string other | isRelevantType(other) | + typeModel(type, other, _) + or + Specific::hasImplicitTypeModel(type, other) + ) +} + +/** + * Holds if `type,path` is used in some row. + */ +pragma[nomagic] +predicate isRelevantFullPath(string type, string path) { + isRelevantType(type) and + ( + sourceModel(type, path, _, _) or + sinkModel(type, path, _, _) or + summaryModel(type, path, _, _, _, _) or + typeModel(_, type, path) + ) +} + +/** A string from a row that should be parsed as an access path. */ +private predicate accessPathRange(string s) { + isRelevantFullPath(_, s) + or + exists(string type | isRelevantType(type) | + summaryModel(type, _, s, _, _, _) or + summaryModel(type, _, _, s, _, _) + ) + or + typeVariableModel(_, s) +} + +import AccessPath + +/** + * Gets a successor of `node` in the API graph. + */ +bindingset[token] +API::Node getSuccessorFromNode(API::Node node, AccessPathTokenBase token) { + // API graphs use the same label for arguments and parameters. An edge originating from a + // use-node represents an argument, and an edge originating from a def-node represents a parameter. + // We just map both to the same thing. + token.getName() = ["Argument", "Parameter"] and + result = node.getParameter(parseIntUnbounded(token.getAnArgument())) + or + token.getName() = "ReturnValue" and + result = node.getReturn() + or + // Language-specific tokens + result = Specific::getExtraSuccessorFromNode(node, token) +} + +/** + * Gets an API-graph successor for the given invocation. + */ +bindingset[token] +API::Node getSuccessorFromInvoke(Specific::InvokeNode invoke, AccessPathTokenBase token) { + token.getName() = "Argument" and + result = invoke.getParameter(parseIntWithArity(token.getAnArgument(), invoke.getNumArgument())) + or + token.getName() = "ReturnValue" and + result = invoke.getReturn() + or + // Language-specific tokens + result = Specific::getExtraSuccessorFromInvoke(invoke, token) +} + +/** + * Holds if `invoke` invokes a call-site filter given by `token`. + */ +bindingset[token] +private predicate invocationMatchesCallSiteFilter( + Specific::InvokeNode invoke, AccessPathTokenBase token +) { + token.getName() = "WithArity" and + invoke.getNumArgument() = parseIntUnbounded(token.getAnArgument()) + or + Specific::invocationMatchesExtraCallSiteFilter(invoke, token) +} + +private class TypeModelUseEntry extends API::EntryPoint { + private string type; + + TypeModelUseEntry() { + exists(any(TypeModel tm).getASource(type)) and + this = "TypeModelUseEntry;" + type + } + + override DataFlow::LocalSourceNode getASource() { result = any(TypeModel tm).getASource(type) } + + API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() } +} + +private class TypeModelDefEntry extends API::EntryPoint { + private string type; + + TypeModelDefEntry() { + exists(any(TypeModel tm).getASink(type)) and + this = "TypeModelDefEntry;" + type + } + + override DataFlow::Node getASink() { result = any(TypeModel tm).getASink(type) } + + API::Node getNodeForType(string type_) { type = type_ and result = this.getANode() } +} + +/** + * Gets an API node identified by the given `type`. + */ +pragma[nomagic] +private API::Node getNodeFromType(string type) { + exists(string type2, AccessPath path2 | + typeModel(type, type2, path2) and + result = getNodeFromPath(type2, path2) + ) + or + result = any(TypeModelUseEntry e).getNodeForType(type) + or + result = any(TypeModelDefEntry e).getNodeForType(type) + or + result = any(TypeModel t).getAnApiNode(type) + or + result = Specific::getExtraNodeFromType(type) +} + +/** + * Gets the API node identified by the first `n` tokens of `path` in the given `(type, path)` tuple. + */ +pragma[nomagic] +API::Node getNodeFromPath(string type, AccessPath path, int n) { + isRelevantFullPath(type, path) and + ( + n = 0 and + result = getNodeFromType(type) + or + result = Specific::getExtraNodeFromPath(type, path, n) + ) + or + result = getSuccessorFromNode(getNodeFromPath(type, path, n - 1), path.getToken(n - 1)) + or + // Similar to the other recursive case, but where the path may have stepped through one or more call-site filters + result = getSuccessorFromInvoke(getInvocationFromPath(type, path, n - 1), path.getToken(n - 1)) + or + // Apply a subpath + result = getNodeFromSubPath(getNodeFromPath(type, path, n - 1), getSubPathAt(path, n - 1)) + or + // Apply a type step + typeStep(getNodeFromPath(type, path, n), result) + or + // Apply a fuzzy step (without advancing 'n') + path.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromPath(type, path, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + path.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromPath(type, path, n - 1) +} + +/** + * Gets a subpath for the `TypeVar` token found at the `n`th token of `path`. + */ +pragma[nomagic] +private AccessPath getSubPathAt(AccessPath path, int n) { + exists(string typeVarName | + path.getToken(n).getAnArgument("TypeVar") = typeVarName and + typeVariableModel(typeVarName, result) + ) +} + +/** + * Gets a node that is found by evaluating the first `n` tokens of `subPath` starting at `base`. + */ +pragma[nomagic] +private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath, int n) { + exists(AccessPath path, int k | + base = [getNodeFromPath(_, path, k), getNodeFromSubPath(_, path, k)] and + subPath = getSubPathAt(path, k) and + result = base and + n = 0 + ) + or + exists(string type, AccessPath basePath | + typeStepModel(type, basePath, subPath) and + base = getNodeFromPath(type, basePath) and + result = base and + n = 0 + ) + or + result = getSuccessorFromNode(getNodeFromSubPath(base, subPath, n - 1), subPath.getToken(n - 1)) + or + result = + getSuccessorFromInvoke(getInvocationFromSubPath(base, subPath, n - 1), subPath.getToken(n - 1)) + or + result = + getNodeFromSubPath(getNodeFromSubPath(base, subPath, n - 1), getSubPathAt(subPath, n - 1)) + or + typeStep(getNodeFromSubPath(base, subPath, n), result) and + // Only apply type-steps strictly between the steps on the sub path, not before and after. + // Steps before/after lead to unnecessary transitive edges, which the user of the sub-path + // will themselves find by following type-steps. + n > 0 and + n < subPath.getNumToken() + or + // Apply a fuzzy step (without advancing 'n') + subPath.getToken(n).getName() = "Fuzzy" and + result = Specific::getAFuzzySuccessor(getNodeFromSubPath(base, subPath, n)) + or + // Skip a fuzzy step (advance 'n' without changing the current node) + subPath.getToken(n - 1).getName() = "Fuzzy" and + result = getNodeFromSubPath(base, subPath, n - 1) +} + +/** + * Gets a call site that is found by evaluating the first `n` tokens of `subPath` starting at `base`. + */ +private Specific::InvokeNode getInvocationFromSubPath(API::Node base, AccessPath subPath, int n) { + result = Specific::getAnInvocationOf(getNodeFromSubPath(base, subPath, n)) + or + result = getInvocationFromSubPath(base, subPath, n - 1) and + invocationMatchesCallSiteFilter(result, subPath.getToken(n - 1)) +} + +/** + * Gets a node that is found by evaluating `subPath` starting at `base`. + */ +pragma[nomagic] +private API::Node getNodeFromSubPath(API::Node base, AccessPath subPath) { + result = getNodeFromSubPath(base, subPath, subPath.getNumToken()) +} + +/** Gets the node identified by the given `(type, path)` tuple. */ +private API::Node getNodeFromPath(string type, AccessPath path) { + result = getNodeFromPath(type, path, path.getNumToken()) +} + +pragma[nomagic] +private predicate typeStepModel(string type, AccessPath basePath, AccessPath output) { + summaryModel(type, basePath, "", output, "type", _) +} + +pragma[nomagic] +private predicate typeStep(API::Node pred, API::Node succ) { + exists(string type, AccessPath basePath, AccessPath output | + typeStepModel(type, basePath, output) and + pred = getNodeFromPath(type, basePath) and + succ = getNodeFromSubPath(pred, output) + ) +} + +/** + * Gets an invocation identified by the given `(type, path)` tuple. + * + * Unlike `getNodeFromPath`, the `path` may end with one or more call-site filters. + */ +private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path, int n) { + result = Specific::getAnInvocationOf(getNodeFromPath(type, path, n)) + or + result = getInvocationFromPath(type, path, n - 1) and + invocationMatchesCallSiteFilter(result, path.getToken(n - 1)) +} + +/** Gets an invocation identified by the given `(type, path)` tuple. */ +private Specific::InvokeNode getInvocationFromPath(string type, AccessPath path) { + result = getInvocationFromPath(type, path, path.getNumToken()) +} + +/** + * Holds if `name` is a valid name for an access path token in the identifying access path. + */ +bindingset[name] +private predicate isValidTokenNameInIdentifyingAccessPath(string name) { + name = ["Argument", "Parameter", "ReturnValue", "WithArity", "TypeVar", "Fuzzy"] + or + Specific::isExtraValidTokenNameInIdentifyingAccessPath(name) +} + +/** + * Holds if `name` is a valid name for an access path token with no arguments, occurring + * in an identifying access path. + */ +bindingset[name] +private predicate isValidNoArgumentTokenInIdentifyingAccessPath(string name) { + name = ["ReturnValue", "Fuzzy"] + or + Specific::isExtraValidNoArgumentTokenInIdentifyingAccessPath(name) +} + +/** + * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring + * in an identifying access path. + */ +bindingset[name, argument] +private predicate isValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { + name = ["Argument", "Parameter"] and + argument.regexpMatch("(N-|-)?\\d+(\\.\\.((N-|-)?\\d+)?)?") + or + name = "WithArity" and + argument.regexpMatch("\\d+(\\.\\.(\\d+)?)?") + or + name = "TypeVar" and + exists(argument) + or + Specific::isExtraValidTokenArgumentInIdentifyingAccessPath(name, argument) +} + +/** + * Module providing access to the imported models in terms of API graph nodes. + */ +module ModelOutput { + cached + private module Cached { + /** + * Holds if a source model contributed `source` with the given `kind`. + */ + cached + API::Node getASourceNode(string kind, string model) { + exists(string type, string path | + sourceModel(type, path, kind, model) and + result = getNodeFromPath(type, path) + ) + } + + /** + * Holds if a sink model contributed `sink` with the given `kind`. + */ + cached + API::Node getASinkNode(string kind, string model) { + exists(string type, string path | + sinkModel(type, path, kind, model) and + result = getNodeFromPath(type, path) + ) + } + + /** + * Holds if a relevant summary exists for these parameters. + */ + cached + predicate relevantSummaryModel( + string type, string path, string input, string output, string kind, string model + ) { + isRelevantType(type) and + summaryModel(type, path, input, output, kind, model) + } + + /** + * Holds if a `baseNode` is an invocation identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryBase(string type, string path, Specific::InvokeNode baseNode) { + summaryModel(type, path, _, _, _, _) and + baseNode = getInvocationFromPath(type, path) + } + + /** + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + + /** + * Holds if `node` is seen as an instance of `type` due to a type definition + * contributed by a model. + */ + cached + API::Node getATypeNode(string type) { result = getNodeFromType(type) } + } + + import Cached + import Specific::ModelOutputSpecific + private import codeql.mad.ModelValidation as SharedModelVal + + /** + * Holds if a CSV source model contributed `source` with the given `kind`. + */ + API::Node getASourceNode(string kind) { result = getASourceNode(kind, _) } + + /** + * Holds if a CSV sink model contributed `sink` with the given `kind`. + */ + API::Node getASinkNode(string kind) { result = getASinkNode(kind, _) } + + private module KindValConfig implements SharedModelVal::KindValidationConfigSig { + predicate summaryKind(string kind) { summaryModel(_, _, _, _, kind, _) } + + predicate sinkKind(string kind) { sinkModel(_, _, kind, _) } + + predicate sourceKind(string kind) { sourceModel(_, _, kind, _) } + } + + private module KindVal = SharedModelVal::KindValidation; + + /** + * Gets an error message relating to an invalid CSV row in a model. + */ + string getAWarning() { + // Check names and arguments of access path tokens + exists(AccessPath path, AccessPathToken token | + (isRelevantFullPath(_, path) or typeVariableModel(_, path)) and + token = path.getToken(_) + | + not isValidTokenNameInIdentifyingAccessPath(token.getName()) and + result = "Invalid token name '" + token.getName() + "' in access path: " + path + or + isValidTokenNameInIdentifyingAccessPath(token.getName()) and + exists(string argument | + argument = token.getAnArgument() and + not isValidTokenArgumentInIdentifyingAccessPath(token.getName(), argument) and + result = + "Invalid argument '" + argument + "' in token '" + token + "' in access path: " + path + ) + or + isValidTokenNameInIdentifyingAccessPath(token.getName()) and + token.getNumArgument() = 0 and + not isValidNoArgumentTokenInIdentifyingAccessPath(token.getName()) and + result = "Invalid token '" + token + "' is missing its arguments, in access path: " + path + ) + or + // Check for invalid model kinds + result = KindVal::getInvalidModelKind() + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsExtensions.qll b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsExtensions.qll new file mode 100644 index 000000000000..b86d7de457ee --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsExtensions.qll @@ -0,0 +1,69 @@ +/** + * Defines extensible predicates for contributing library models from data extensions. + */ + +/** + * Holds if the value at `(type, path)` should be seen as a flow + * source of the given `kind`. + * + * The kind `remote` represents a general remote flow source. + */ +extensible predicate sourceModel( + string type, string path, string kind, QlBuiltins::ExtensionId madId +); + +/** + * Holds if the value at `(type, path)` should be seen as a sink + * of the given `kind`. + */ +extensible predicate sinkModel(string type, string path, string kind, QlBuiltins::ExtensionId madId); + +/** + * Holds if in calls to `(type, path)`, the value referred to by `input` + * can flow to the value referred to by `output`. + * + * `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps, + * respectively. + */ +extensible predicate summaryModel( + string type, string path, string input, string output, string kind, QlBuiltins::ExtensionId madId +); + +/** + * Holds if calls to `(type, path)` should be considered neutral. The meaning of this depends on the `kind`. + * If `kind` is `summary`, the call does not propagate data flow. If `kind` is `source`, the call is not a source. + * If `kind` is `sink`, the call is not a sink. + */ +extensible predicate neutralModel(string type, string path, string kind); + +/** + * Holds if `(type2, path)` should be seen as an instance of `type1`. + */ +extensible predicate typeModel(string type1, string type2, string path); + +/** + * Holds if `path` can be substituted for a token `TypeVar[name]`. + */ +extensible predicate typeVariableModel(string name, string path); + +/** + * Holds if the given extension tuple `madId` should pretty-print as `model`. + * + * This predicate should only be used in tests. + */ +predicate interpretModelForTest(QlBuiltins::ExtensionId madId, string model) { + exists(string type, string path, string kind | + sourceModel(type, path, kind, madId) and + model = "Source: " + type + "; " + path + "; " + kind + ) + or + exists(string type, string path, string kind | + sinkModel(type, path, kind, madId) and + model = "Sink: " + type + "; " + path + "; " + kind + ) + or + exists(string type, string path, string input, string output, string kind | + summaryModel(type, path, input, output, kind, madId) and + model = "Summary: " + type + "; " + path + "; " + input + "; " + output + "; " + kind + ) +} diff --git a/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsSpecific.qll b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsSpecific.qll new file mode 100644 index 000000000000..d4f61ab4d4ac --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -0,0 +1,201 @@ +/** + * Contains the language-specific part of the models-as-data implementation found in `ApiGraphModels.qll`. + * + * It must export the following members: + * ```ql + * class Unit // a unit type + * class InvokeNode // a type representing an invocation connected to the API graph + * module API // the API graph module + * predicate isPackageUsed(string package) + * API::Node getExtraNodeFromPath(string package, string type, string path, int n) + * API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) + * API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) + * predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) + * InvokeNode getAnInvocationOf(API::Node node) + * predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) + * predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) + * predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) + * ``` + */ + +private import powershell +private import ApiGraphModels +private import semmle.code.powershell.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import codeql.dataflow.internal.AccessPathSyntax +// Re-export libraries needed by ApiGraphModels.qll +import semmle.code.powershell.ApiGraphs +import semmle.code.powershell.dataflow.DataFlow::DataFlow as DataFlow +private import FlowSummaryImpl::Public +private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch + +bindingset[rawType] +predicate isTypeUsed(string rawType) { any() } + +bindingset[rawType] +private predicate parseType(string rawType, string mod, string type) { + exists(string regexp | + regexp = "(.+)\\.([^\\.]+)" and + mod = rawType.regexpCapture(regexp, 1) and + type = rawType.regexpCapture(regexp, 2) + ) +} + +private predicate parseRelevantType(string rawType, string consts, string suffix) { + isRelevantType(rawType) and + parseType(rawType, consts, suffix) +} + +/** + * Holds if `type` can be obtained from an instance of `otherType` due to + * language semantics modeled by `getExtraNodeFromType`. + */ +bindingset[otherType] +predicate hasImplicitTypeModel(string type, string otherType) { none() } + +/** Gets a Powershell-specific interpretation of the `(type, path)` tuple after resolving the first `n` access path tokens. */ +bindingset[type, path] +API::Node getExtraNodeFromPath(string type, AccessPath path, int n) { + // A row of form `any;Method[foo]` should match any method named `foo`. + type = "any" and + n = 1 and + exists(string methodName, DataFlow::CallNode call | + methodMatchedByName(path, methodName) and + call.getName() = methodName and + result.(API::MethodAccessNode).asCall() = call + ) +} + +/** Gets a Powershell-specific interpretation of the given `type`. */ +API::Node getExtraNodeFromType(string qualifiedType) { + qualifiedType = "" and + result = API::root() + or + // TODO: How to distinguish between these two cases? And do we need to? + exists(string mod, string type | parseRelevantType(qualifiedType, mod, type) | + result = API::mod(qualifiedType) + or + result = API::mod(mod).getType(type) + ) +} + +/** + * Holds if `path` occurs in a CSV row with type `any`, meaning it can start + * matching anywhere, and the path begins with `Method[methodName]`. + */ +private predicate methodMatchedByName(AccessPath path, string methodName) { + isRelevantFullPath("any", path) and + exists(AccessPathToken token | + token = path.getToken(0) and + token.getName() = "Method" and + methodName = token.getAnArgument() + ) +} + +/** + * Gets a Powershell-specific API graph successor of `node` reachable by resolving `token`. + */ +bindingset[token] +API::Node getExtraSuccessorFromNode(API::Node node, AccessPathTokenBase token) { + token.getName() = "Member" and + result = node.getMember(token.getAnArgument()) + or + token.getName() = "Method" and + result = node.getMethod(token.getAnArgument()) + or + token.getName() = "Instance" and + result = node.getInstance() + or + token.getName() = "Parameter" and + exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | + token.getAnArgument() = FlowSummaryImpl::Input::encodeArgumentPosition(argPos) and + DataFlowDispatch::parameterMatch(paramPos, argPos) and + result = node.getParameterAtPosition(paramPos) + ) + or + exists(DataFlow::ContentSet contents | + token.getName() = FlowSummaryImpl::Input::encodeContent(contents, token.getAnArgument()) and + result = node.getContents(contents) + ) +} + +/** + * Gets a Powershell-specific API graph successor of `node` reachable by resolving `token`. + */ +bindingset[token] +API::Node getExtraSuccessorFromInvoke(InvokeNode node, AccessPathTokenBase token) { + token.getName() = "Argument" and + exists(DataFlowDispatch::ArgumentPosition argPos, DataFlowDispatch::ParameterPosition paramPos | + token.getAnArgument() = FlowSummaryImpl::Input::encodeParameterPosition(paramPos) and + DataFlowDispatch::parameterMatch(paramPos, argPos) and + result = node.getArgumentAtPosition(argPos) + ) +} + +pragma[inline] +API::Node getAFuzzySuccessor(API::Node node) { + result = node.getMethod(_) + or + result = + node.getArgumentAtPosition(any(DataFlowDispatch::ArgumentPosition apos | not apos.isThis())) + or + result = + node.getParameterAtPosition(any(DataFlowDispatch::ParameterPosition ppos | not ppos.isThis())) + or + result = node.getReturn() + or + result = node.getAnElement() + or + result = node.getInstance() +} + +/** + * Holds if `invoke` matches the Powershell-specific call site filter in `token`. + */ +bindingset[token] +predicate invocationMatchesExtraCallSiteFilter(InvokeNode invoke, AccessPathTokenBase token) { + none() +} + +/** An API graph node representing a method call. */ +class InvokeNode extends API::MethodAccessNode { + /** Gets the number of arguments to the call. */ + int getNumArgument() { result = this.asCall().getNumberOfArguments() } +} + +/** Gets the `InvokeNode` corresponding to a specific invocation of `node`. */ +InvokeNode getAnInvocationOf(API::Node node) { result = node } + +/** + * Holds if `name` is a valid name for an access path token in the identifying access path. + */ +bindingset[name] +predicate isExtraValidTokenNameInIdentifyingAccessPath(string name) { + name = ["Member", "Method", "Instance", "WithBlock", "WithoutBlock", "Element", "Field"] +} + +/** + * Holds if `name` is a valid name for an access path token with no arguments, occurring + * in an identifying access path. + */ +predicate isExtraValidNoArgumentTokenInIdentifyingAccessPath(string name) { + name = ["Instance", "WithBlock", "WithoutBlock"] +} + +/** + * Holds if `argument` is a valid argument to an access path token with the given `name`, occurring + * in an identifying access path. + */ +bindingset[name, argument] +predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string argument) { + name = ["Member", "Method", "Element", "Field"] and + exists(argument) + or + name = ["Argument", "Parameter"] and + ( + argument = ["self", "lambda-self", "block", "any", "any-named"] + or + argument.regexpMatch("\\w+:") // keyword argument + ) +} + +module ModelOutputSpecific { } diff --git a/powershell/ql/lib/semmle/code/powershell/typetracking/ApiGraphShared.qll b/powershell/ql/lib/semmle/code/powershell/typetracking/ApiGraphShared.qll new file mode 100644 index 000000000000..8efac32b7624 --- /dev/null +++ b/powershell/ql/lib/semmle/code/powershell/typetracking/ApiGraphShared.qll @@ -0,0 +1,328 @@ +/** + * Parts of API graphs that can be shared with other dynamic languages. + * + * Depends on TypeTrackerSpecific for the corresponding language. + */ + +private import codeql.util.Location +private import semmle.code.powershell.dataflow.DataFlow +private import semmle.code.powershell.typetracking.internal.TypeTrackingImpl + +/** + * The signature to use when instantiating `ApiGraphShared`. + * + * The implementor should define a newtype with at least three branches as follows: + * ```ql + * newtype TApiNode = + * MkForwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + * MkBackwardNode(LocalSourceNode node, TypeTracker t) { isReachable(node, t) } or + * MkSinkNode(Node node) { ... } or + * ... + * ``` + * + * The three branches should be exposed through `getForwardNode`, `getBackwardNode`, and `getSinkNode`, respectively. + */ +signature module ApiGraphSharedSig { + /** A node in the API graph. */ + class ApiNode { + /** Gets a string representation of this API node. */ + string toString(); + + /** Gets the location associated with this API node, if any. */ + Location getLocation(); + } + + /** + * Gets the forward node with the given type-tracking state. + * + * This node will have outgoing epsilon edges to its type-tracking successors. + */ + ApiNode getForwardNode(DataFlow::LocalSourceNode node, TypeTracker t); + + /** + * Gets the backward node with the given type-tracking state. + * + * This node will have outgoing epsilon edges to its type-tracking predecessors. + */ + ApiNode getBackwardNode(DataFlow::LocalSourceNode node, TypeTracker t); + + /** + * Gets the sink node corresponding to `node`. + * + * Since sinks are not generally `LocalSourceNode`s, such nodes are materialised separately in order for + * the API graph to include representatives for sinks. Note that there is no corresponding case for "source" + * nodes as these are represented as forward nodes with initial-state type-trackers. + * + * Sink nodes have outgoing epsilon edges to the backward nodes corresponding to their local sources. + */ + ApiNode getSinkNode(DataFlow::Node node); + + /** + * Holds if a language-specific epsilon edge `pred -> succ` should be generated. + */ + predicate specificEpsilonEdge(ApiNode pred, ApiNode succ); +} + +/** + * Parts of API graphs that can be shared between language implementations. + */ +module ApiGraphShared { + private import S + + /** Gets a local source of `node`. */ + bindingset[node] + pragma[inline_late] + DataFlow::LocalSourceNode getALocalSourceStrict(DataFlow::Node node) { + result = node.getALocalSource() + } + + cached + private module Cached { + /** + * Holds if there is an epsilon edge `pred -> succ`. + * + * That relation is reflexive, so `fastTC` produces the equivalent of a reflexive, transitive closure. + */ + pragma[noopt] + cached + predicate epsilonEdge(ApiNode pred, ApiNode succ) { + exists( + StepSummary summary, DataFlow::LocalSourceNode predNode, TypeTracker predState, + DataFlow::LocalSourceNode succNode, TypeTracker succState + | + step(predNode, succNode, summary) + | + pred = getForwardNode(predNode, predState) and + succState = append(predState, summary) and + succ = getForwardNode(succNode, succState) + or + succ = getBackwardNode(predNode, predState) and // swap order for backward flow + succState = append(predState, summary) and + pred = getBackwardNode(succNode, succState) // swap order for backward flow + ) + or + exists(DataFlow::Node sink, DataFlow::LocalSourceNode localSource | + pred = getSinkNode(sink) and + localSource = getALocalSourceStrict(sink) and + succ = getBackwardStartNode(localSource) + ) + or + specificEpsilonEdge(pred, succ) + or + succ instanceof ApiNode and + succ = pred + } + + /** + * Holds if `pred` can reach `succ` by zero or more epsilon edges. + */ + cached + predicate epsilonStar(ApiNode pred, ApiNode succ) = fastTC(epsilonEdge/2)(pred, succ) + + /** Gets the API node to use when starting forward flow from `source` */ + cached + ApiNode forwardStartNode(DataFlow::LocalSourceNode source) { + result = getForwardNode(source, noContentTypeTracker(false)) + } + + /** Gets the API node to use when starting backward flow from `sink` */ + cached + ApiNode backwardStartNode(DataFlow::LocalSourceNode sink) { + // There is backward flow A->B iff there is forward flow B->A. + // The starting point of backward flow corresponds to the end of a forward flow, and vice versa. + result = getBackwardNode(sink, noContentTypeTracker(_)) + } + + /** Gets `node` as a data flow source. */ + cached + DataFlow::LocalSourceNode asSourceCached(ApiNode node) { node = forwardEndNode(result) } + + /** Gets `node` as a data flow sink. */ + cached + DataFlow::Node asSinkCached(ApiNode node) { node = getSinkNode(result) } + } + + private import Cached + + /** Gets an API node corresponding to the end of forward-tracking to `localSource`. */ + pragma[nomagic] + private ApiNode forwardEndNode(DataFlow::LocalSourceNode localSource) { + result = getForwardNode(localSource, noContentTypeTracker(_)) + } + + /** Gets an API node corresponding to the end of backtracking to `localSource`. */ + pragma[nomagic] + private ApiNode backwardEndNode(DataFlow::LocalSourceNode localSource) { + result = getBackwardNode(localSource, noContentTypeTracker(false)) + } + + /** Gets a node reachable from `node` by zero or more epsilon edges, including `node` itself. */ + bindingset[node] + pragma[inline_late] + ApiNode getAnEpsilonSuccessorInline(ApiNode node) { epsilonStar(node, result) } + + /** Gets `node` as a data flow sink. */ + bindingset[node] + pragma[inline_late] + DataFlow::Node asSinkInline(ApiNode node) { result = asSinkCached(node) } + + /** Gets `node` as a data flow source. */ + bindingset[node] + pragma[inline_late] + DataFlow::LocalSourceNode asSourceInline(ApiNode node) { result = asSourceCached(node) } + + /** Gets a value reachable from `source`. */ + bindingset[source] + pragma[inline_late] + DataFlow::Node getAValueReachableFromSourceInline(ApiNode source) { + exists(DataFlow::LocalSourceNode src | + src = asSourceInline(getAnEpsilonSuccessorInline(source)) and + src.flowsTo(pragma[only_bind_into](result)) + ) + } + + /** Gets a value that can reach `sink`. */ + bindingset[sink] + pragma[inline_late] + DataFlow::Node getAValueReachingSinkInline(ApiNode sink) { + backwardStartNode(result) = getAnEpsilonSuccessorInline(sink) + } + + /** + * Gets the starting point for forward-tracking at `node`. + * + * Should be used to obtain the successor of an edge when constructing labelled edges. + */ + bindingset[node] + pragma[inline_late] + ApiNode getForwardStartNode(DataFlow::Node node) { result = forwardStartNode(node) } + + /** + * Gets the starting point of backtracking from `node`. + * + * Should be used to obtain the successor of an edge when constructing labelled edges. + */ + bindingset[node] + pragma[inline_late] + ApiNode getBackwardStartNode(DataFlow::Node node) { result = backwardStartNode(node) } + + /** + * Gets a possible ending point of forward-tracking at `node`. + * + * Should be used to obtain the predecessor of an edge when constructing labelled edges. + * + * This is not backed by a `cached` predicate, and should only be used for materialising `cached` + * predicates in the API graph implementation - it should not be called in later stages. + */ + bindingset[node] + pragma[inline_late] + ApiNode getForwardEndNode(DataFlow::Node node) { result = forwardEndNode(node) } + + /** + * Gets a possible ending point backtracking to `node`. + * + * Should be used to obtain the predecessor of an edge when constructing labelled edges. + * + * This is not backed by a `cached` predicate, and should only be used for materialising `cached` + * predicates in the API graph implementation - it should not be called in later stages. + */ + bindingset[node] + pragma[inline_late] + ApiNode getBackwardEndNode(DataFlow::Node node) { result = backwardEndNode(node) } + + /** + * Gets a possible eding point of forward or backward tracking at `node`. + * + * Should be used to obtain the predecessor of an edge generated from store or load edges. + */ + bindingset[node] + pragma[inline_late] + ApiNode getForwardOrBackwardEndNode(DataFlow::Node node) { + result = getForwardEndNode(node) or result = getBackwardEndNode(node) + } + + /** Gets an API node for tracking forward starting at `node`. This is the implementation of `DataFlow::LocalSourceNode.track()` */ + bindingset[node] + pragma[inline_late] + ApiNode getNodeForForwardTracking(DataFlow::Node node) { result = forwardStartNode(node) } + + /** Gets an API node for backtracking starting at `node`. The implementation of `DataFlow::Node.backtrack()`. */ + bindingset[node] + pragma[inline_late] + ApiNode getNodeForBacktracking(DataFlow::Node node) { + result = getBackwardStartNode(getALocalSourceStrict(node)) + } + + /** Parts of the shared module to be re-exported by the user-facing `API` module. */ + module Public { + /** + * The signature to use when instantiating the `ExplainFlow` module. + */ + signature module ExplainFlowSig { + /** Holds if `node` should be a source. */ + predicate isSource(ApiNode node); + + /** Holds if `node` should be a sink. */ + default predicate isSink(ApiNode node) { any() } + + /** Holds if `node` should be skipped in the generated paths. */ + default predicate isHidden(ApiNode node) { none() } + } + + /** + * Module to help debug and visualize the data flows underlying API graphs. + * + * This module exports the query predicates for a path-problem query, and should be imported + * into the top-level of such a query. + * + * The module argument should specify source and sink API nodes, and the resulting query + * will show paths of epsilon edges that go from a source to a sink. Only epsilon edges are visualized. + * + * To condense the output a bit, paths in which the source and sink are the same node are omitted. + */ + module ExplainFlow { + private import T + + private ApiNode relevantNode() { + isSink(result) and + result = getAnEpsilonSuccessorInline(any(ApiNode node | isSource(node))) + or + epsilonEdge(result, relevantNode()) + } + + /** Holds if `node` is part of the graph to visualize. */ + query predicate nodes(ApiNode node) { node = relevantNode() and not isHidden(node) } + + private predicate edgeToHiddenNode(ApiNode pred, ApiNode succ) { + epsilonEdge(pred, succ) and + isHidden(succ) and + pred = relevantNode() and + succ = relevantNode() + } + + /** Holds if `pred -> succ` is an edge in the graph to visualize. */ + query predicate edges(ApiNode pred, ApiNode succ) { + nodes(pred) and + nodes(succ) and + exists(ApiNode mid | + edgeToHiddenNode*(pred, mid) and + epsilonEdge(mid, succ) + ) + } + + /** Holds for each source/sink pair to visualize in the graph. */ + query predicate problems( + ApiNode location, ApiNode sourceNode, ApiNode sinkNode, string message + ) { + nodes(sourceNode) and + nodes(sinkNode) and + isSource(sourceNode) and + isSink(sinkNode) and + sinkNode = getAnEpsilonSuccessorInline(sourceNode) and + sourceNode != sinkNode and + location = sinkNode and + message = "Node flows here" + } + } + } +} diff --git a/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll b/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll index e2d21739a57d..8908c99491c9 100644 --- a/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll +++ b/powershell/ql/lib/semmle/code/powershell/typetracking/internal/TypeTrackingImpl.qll @@ -5,20 +5,36 @@ private import semmle.code.powershell.controlflow.Cfg as Cfg private import Cfg::CfgNodes private import codeql.typetracking.internal.SummaryTypeTracker as SummaryTypeTracker private import semmle.code.powershell.dataflow.DataFlow +private import semmle.code.powershell.dataflow.FlowSummary as FlowSummary private import semmle.code.powershell.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon private import semmle.code.powershell.dataflow.internal.DataFlowPublic as DataFlowPublic private import semmle.code.powershell.dataflow.internal.DataFlowPrivate as DataFlowPrivate private import semmle.code.powershell.dataflow.internal.DataFlowDispatch as DataFlowDispatch +private import semmle.code.powershell.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl private import codeql.util.Unit +pragma[noinline] +private predicate sourceArgumentPositionMatch( + CallCfgNode call, DataFlowPrivate::ArgumentNode arg, DataFlowDispatch::ParameterPosition ppos +) { + exists(DataFlowDispatch::ArgumentPosition apos | + arg.sourceArgumentOf(call, apos) and + DataFlowDispatch::parameterMatch(ppos, apos) + ) +} + pragma[noinline] private predicate argumentPositionMatch( DataFlowDispatch::DataFlowCall call, DataFlowPrivate::ArgumentNode arg, DataFlowDispatch::ParameterPosition ppos ) { + sourceArgumentPositionMatch(call.asCall(), arg, ppos) + or exists(DataFlowDispatch::ArgumentPosition apos | + DataFlowDispatch::parameterMatch(ppos, apos) and arg.argumentOf(call, apos) and - DataFlowDispatch::parameterMatch(ppos, apos) + call.getEnclosingCallable().asLibraryCallable() instanceof + DataFlowDispatch::LibraryCallableToIncludeInTypeTracking ) } @@ -29,6 +45,12 @@ private predicate viableParam( ) { exists(DataFlowDispatch::DataFlowCallable callable | DataFlowDispatch::getTarget(call) = callable.asCfgScope() + or + call.asCall().getAstNode() = + callable + .asLibraryCallable() + .(DataFlowDispatch::LibraryCallableToIncludeInTypeTracking) + .getACallSimple() | p.isParameterOf(callable, ppos) ) @@ -71,43 +93,64 @@ private module SummaryTypeTrackerInput implements SummaryTypeTracker::Input { } // Summaries and their stacks - class SummaryComponent extends Unit { - SummaryComponent() { none() } - } + class SummaryComponent = FlowSummaryImpl::Private::SummaryComponent; - class SummaryComponentStack extends Unit { - SummaryComponent head() { none() } - } + class SummaryComponentStack = FlowSummaryImpl::Private::SummaryComponentStack; - SummaryComponentStack singleton(SummaryComponent component) { none() } + predicate singleton = FlowSummaryImpl::Private::SummaryComponentStack::singleton/1; - SummaryComponentStack push(SummaryComponent head, SummaryComponentStack tail) { none() } + predicate push = FlowSummaryImpl::Private::SummaryComponentStack::push/2; - SummaryComponent return() { none() } + // Relating content to summaries + predicate content = FlowSummaryImpl::Private::SummaryComponent::content/1; - SummaryComponent content(Content contents) { none() } + predicate withoutContent = FlowSummaryImpl::Private::SummaryComponent::withoutContent/1; - SummaryComponent withoutContent(Content contents) { none() } + predicate withContent = FlowSummaryImpl::Private::SummaryComponent::withContent/1; - SummaryComponent withContent(Content contents) { none() } + predicate return = FlowSummaryImpl::Private::SummaryComponent::return/0; - class SummarizedCallable extends Unit { - SummarizedCallable() { none() } + // Callables + class SummarizedCallable instanceof FlowSummaryImpl::Private::SummarizedCallableImpl { + string toString() { result = super.toString() } predicate propagatesFlow( SummaryComponentStack input, SummaryComponentStack output, boolean preservesValue ) { - none() + super.propagatesFlow(input, output, preservesValue, _) } } - Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) { none() } + // Relating nodes to summaries + Node argumentOf(Node call, SummaryComponent arg, boolean isPostUpdate) { + exists(DataFlowDispatch::ParameterPosition pos, DataFlowPrivate::ArgumentNode n | + arg = FlowSummaryImpl::Private::SummaryComponent::argument(pos) and + sourceArgumentPositionMatch(call.asExpr(), n, pos) + | + isPostUpdate = false and result = n + or + isPostUpdate = true and result.(DataFlowPublic::PostUpdateNode).getPreUpdateNode() = n + ) + } - Node parameterOf(Node callable, SummaryComponent param) { none() } + Node parameterOf(Node callable, SummaryComponent param) { + exists(DataFlowDispatch::ArgumentPosition apos, DataFlowDispatch::ParameterPosition ppos | + param = FlowSummaryImpl::Private::SummaryComponent::parameter(apos) and + DataFlowDispatch::parameterMatch(ppos, apos) and + result.(DataFlowPrivate::ParameterNodeImpl).isSourceParameterOf(callable.asCallable(), ppos) + ) + } - Node returnOf(Node callable, SummaryComponent return) { none() } + Node returnOf(Node callable, SummaryComponent return) { + return = FlowSummaryImpl::Private::SummaryComponent::return() and + result.(DataFlowPrivate::ReturnNode).(DataFlowPrivate::NodeImpl).getCfgScope() = + callable.asCallable() + } - Node callTo(SummarizedCallable callable) { none() } + // Relating callables to nodes + Node callTo(SummarizedCallable callable) { + result.asExpr().getExpr() = callable.(FlowSummary::SummarizedCallable).getACallSimple() + } } private module TypeTrackerSummaryFlow = SummaryTypeTracker::SummaryFlow; @@ -131,7 +174,8 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { /** Gets the content of a type-tracker that matches this filter. */ Content getAMatchingContent() { - none() // TODO + this = MkElementFilter() and + result.getAReadContent() instanceof DataFlow::Content::ElementContent } }