Absolute path | Base name |
---|---|
"/tmp/tst.go" | "tst.go" |
"C:/Program Files (x86)" | "Program Files (x86)" |
"/" | "" |
"C:/" | "" |
"D:/" | "" |
"//FileServer/" | "" |
Absolute path | Extension |
---|---|
"/tmp/tst.go" | "go" |
"/tmp/.classpath" | "classpath" |
"/bin/bash" | not defined |
"/tmp/tst2." | "" |
"/tmp/x.tar.gz" | "gz" |
Absolute path | Stem |
---|---|
"/tmp/tst.go" | "tst" |
"/tmp/.classpath" | "" |
"/bin/bash" | "bash" |
"/tmp/tst2." | "tst2" |
"/tmp/x.tar.gz" | "x.tar" |
{}
`. + */ +class HtmlEscaping extends Escaping { + HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() } +} + +/** Provides classes for modeling HTTP-related APIs. */ +module HTTP { + /** Provides classes for modeling HTTP servers. */ + module Server { + /** + * A data-flow node that sets up a route on a server. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RouteSetup::Range` instead. + */ + class RouteSetup extends DataFlow::Node instanceof RouteSetup::Range { + /** Gets the URL pattern for this route, if it can be statically determined. */ + string getUrlPattern() { result = super.getUrlPattern() } + + /** + * Gets a function that will handle incoming requests for this route, if any. + * + * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`. + */ + Method getARequestHandler() { result = super.getARequestHandler() } + + /** + * Gets a parameter that will receive parts of the url when handling incoming + * requests for this route, if any. These automatically become a `RemoteFlowSource`. + */ + Parameter getARoutedParameter() { result = super.getARoutedParameter() } + + /** Gets a string that identifies the framework used for this route setup. */ + string getFramework() { result = super.getFramework() } + } + + /** Provides a class for modeling new HTTP routing APIs. */ + module RouteSetup { + /** + * A data-flow node that sets up a route on a server. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RouteSetup` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument used to set the URL pattern. */ + abstract DataFlow::Node getUrlPatternArg(); + + /** Gets the URL pattern for this route, if it can be statically determined. */ + string getUrlPattern() { + exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode | + this.getUrlPatternArg().getALocalSource() = DataFlow::exprNode(strNode) and + result = strNode.getExpr().getValueText() + ) + } + + /** + * Gets a function that will handle incoming requests for this route, if any. + * + * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Method`. + */ + abstract Method getARequestHandler(); + + /** + * Gets a parameter that will receive parts of the url when handling incoming + * requests for this route, if any. These automatically become a `RemoteFlowSource`. + */ + abstract Parameter getARoutedParameter(); + + /** Gets a string that identifies the framework used for this route setup. */ + abstract string getFramework(); + } + } + + /** + * A function that will handle incoming HTTP requests. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `RequestHandler::Range` instead. + */ + class RequestHandler extends Method instanceof RequestHandler::Range { + /** + * Gets a parameter that could receive parts of the url when handling incoming + * requests, if any. These automatically become a `RemoteFlowSource`. + */ + Parameter getARoutedParameter() { result = super.getARoutedParameter() } + + /** Gets a string that identifies the framework used for this route setup. */ + string getFramework() { result = super.getFramework() } + } + + /** Provides a class for modeling new HTTP request handlers. */ + module RequestHandler { + /** + * A function that will handle incoming HTTP requests. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `RequestHandler` instead. + * + * Only extend this class if you can't provide a `RouteSetup`, since we handle that case automatically. + */ + abstract class Range extends Method { + /** + * Gets a parameter that could receive parts of the url when handling incoming + * requests, if any. These automatically become a `RemoteFlowSource`. + */ + abstract Parameter getARoutedParameter(); + + /** Gets a string that identifies the framework used for this request handler. */ + abstract string getFramework(); + } + } + + private class RequestHandlerFromRouteSetup extends RequestHandler::Range { + RouteSetup rs; + + RequestHandlerFromRouteSetup() { this = rs.getARequestHandler() } + + override Parameter getARoutedParameter() { + result = rs.getARoutedParameter() and + result = this.getAParameter() + } + + override string getFramework() { result = rs.getFramework() } + } + + /** A parameter that will receive parts of the url when handling an incoming request. */ + private class RoutedParameter extends RemoteFlowSource::Range, DataFlow::ParameterNode { + RequestHandler handler; + + RoutedParameter() { this.getParameter() = handler.getARoutedParameter() } + + override string getSourceType() { result = handler.getFramework() + " RoutedParameter" } + } + + /** + * A data-flow node that creates a HTTP response on a server. + * + * Note: we don't require that this response must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HttpResponse::Range` instead. + */ + class HttpResponse extends DataFlow::Node instanceof HttpResponse::Range { + /** Gets the data-flow node that specifies the body of this HTTP response. */ + DataFlow::Node getBody() { result = super.getBody() } + + /** Gets the mimetype of this HTTP response, if it can be statically determined. */ + string getMimetype() { result = super.getMimetype() } + } + + /** Provides a class for modeling new HTTP response APIs. */ + module HttpResponse { + /** + * A data-flow node that creates a HTTP response on a server. + * + * Note: we don't require that this response must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HttpResponse` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the data-flow node that specifies the body of this HTTP response. */ + abstract DataFlow::Node getBody(); + + /** Gets the data-flow node that specifies the content-type/mimetype of this HTTP response, if any. */ + abstract DataFlow::Node getMimetypeOrContentTypeArg(); + + /** Gets the default mimetype that should be used if `getMimetypeOrContentTypeArg` has no results. */ + abstract string getMimetypeDefault(); + + /** Gets the mimetype of this HTTP response, if it can be statically determined. */ + string getMimetype() { + exists(CfgNodes::ExprNodes::StringlikeLiteralCfgNode strNode | + this.getMimetypeOrContentTypeArg().getALocalSource() = DataFlow::exprNode(strNode) and + result = strNode.getExpr().getValueText().splitAt(";", 0) + ) + or + not exists(this.getMimetypeOrContentTypeArg()) and + result = this.getMimetypeDefault() + } + } + } + + /** + * A data-flow node that creates a HTTP redirect response on a server. + * + * Note: we don't require that this redirect must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `HttpRedirectResponse::Range` instead. + */ + class HttpRedirectResponse extends HttpResponse instanceof HttpRedirectResponse::Range { + /** Gets the data-flow node that specifies the location of this HTTP redirect response. */ + DataFlow::Node getRedirectLocation() { result = super.getRedirectLocation() } + } + + /** Provides a class for modeling new HTTP redirect response APIs. */ + module HttpRedirectResponse { + /** + * A data-flow node that creates a HTTP redirect response on a server. + * + * Note: we don't require that this redirect must be sent to a client (a kind of + * "if a tree falls in a forest and nobody hears it" situation). + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `HttpResponse` instead. + */ + abstract class Range extends HTTP::Server::HttpResponse::Range { + /** Gets the data-flow node that specifies the location of this HTTP redirect response. */ + abstract DataFlow::Node getRedirectLocation(); + } + } + } + + /** Provides classes for modeling HTTP clients. */ + module Client { + /** + * A method call that makes an outgoing HTTP request. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `Request::Range` instead. + */ + class Request extends MethodCall instanceof Request::Range { + /** Gets a node which returns the body of the response */ + DataFlow::Node getResponseBody() { result = super.getResponseBody() } + + /** Gets a string that identifies the framework used for this request. */ + string getFramework() { result = super.getFramework() } + } + + /** Provides a class for modeling new HTTP requests. */ + module Request { + /** + * A method call that makes an outgoing HTTP request. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `Request` instead. + */ + abstract class Range extends MethodCall { + /** Gets a node which returns the body of the response */ + abstract DataFlow::Node getResponseBody(); + + /** Gets a string that identifies the framework used for this request. */ + abstract string getFramework(); + } + } + + /** The response body from an outgoing HTTP request, considered as a remote flow source */ + private class RequestResponseBody extends RemoteFlowSource::Range, DataFlow::Node { + Request request; + + RequestResponseBody() { this = request.getResponseBody() } + + override string getSourceType() { result = request.getFramework() } + } + } +} + +/** + * A data flow node that executes an operating system command, + * for instance by spawning a new process. + */ +class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range { + /** Holds if a shell interprets `arg`. */ + predicate isShellInterpreted(DataFlow::Node arg) { super.isShellInterpreted(arg) } + + /** Gets an argument to this execution that specifies the command or an argument to it. */ + DataFlow::Node getAnArgument() { result = super.getAnArgument() } +} + +/** Provides a class for modeling new operating system command APIs. */ +module SystemCommandExecution { + /** + * A data flow node that executes an operating system command, for instance by spawning a new + * process. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `SystemCommandExecution` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets an argument to this execution that specifies the command or an argument to it. */ + abstract DataFlow::Node getAnArgument(); + + /** Holds if a shell interprets `arg`. */ + predicate isShellInterpreted(DataFlow::Node arg) { none() } + } +} + +/** + * A data-flow node that dynamically executes Ruby code. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `CodeExecution::Range` instead. + */ +class CodeExecution extends DataFlow::Node instanceof CodeExecution::Range { + /** Gets the argument that specifies the code to be executed. */ + DataFlow::Node getCode() { result = super.getCode() } +} + +/** Provides a class for modeling new dynamic code execution APIs. */ +module CodeExecution { + /** + * A data-flow node that dynamically executes Ruby code. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `CodeExecution` instead. + */ + abstract class Range extends DataFlow::Node { + /** Gets the argument that specifies the code to be executed. */ + abstract DataFlow::Node getCode(); + } +} diff --git a/ruby/ql/lib/codeql/ruby/DataFlow.qll b/ruby/ql/lib/codeql/ruby/DataFlow.qll new file mode 100644 index 000000000000..e7645ce0c109 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/DataFlow.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) data flow analyses. + */ +module DataFlow { + import codeql.ruby.dataflow.internal.DataFlowImpl +} diff --git a/ruby/ql/lib/codeql/ruby/Diagnostics.qll b/ruby/ql/lib/codeql/ruby/Diagnostics.qll new file mode 100644 index 000000000000..b8995c01bc27 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/Diagnostics.qll @@ -0,0 +1,52 @@ +private import codeql.Locations + +/** A diagnostic emitted during extraction, such as a parse error */ +class Diagnostic extends @diagnostic { + int severity; + string tag; + string message; + string fullMessage; + Location location; + + Diagnostic() { diagnostics(this, severity, tag, message, fullMessage, location) } + + /** + * Gets the numerical severity level associated with this diagnostic. + */ + int getSeverity() { result = severity } + + /** Gets a string representation of the severity of this diagnostic. */ + string getSeverityText() { + severity = 10 and result = "Debug" + or + severity = 20 and result = "Info" + or + severity = 30 and result = "Warning" + or + severity = 40 and result = "Error" + } + + /** Gets the error code associated with this diagnostic, e.g. parse_error. */ + string getTag() { result = tag } + + /** + * Gets the error message text associated with this diagnostic. + */ + string getMessage() { result = message } + + /** + * Gets the full error message text associated with this diagnostic. + */ + string getFullMessage() { result = fullMessage } + + /** Gets the source location of this diagnostic. */ + Location getLocation() { result = location } + + /** Gets a textual representation of this diagnostic. */ + string toString() { result = this.getMessage() } +} + +/** A diagnostic relating to a particular error in extracting a file. */ +class ExtractionError extends Diagnostic, @diagnostic_error { + ExtractionError() { this.getTag() = "parse_error" } +} diff --git a/ruby/ql/lib/codeql/ruby/Frameworks.qll b/ruby/ql/lib/codeql/ruby/Frameworks.qll new file mode 100644 index 000000000000..8ce52df8458a --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/Frameworks.qll @@ -0,0 +1,10 @@ +/** + * Helper file that imports all framework modeling. + */ + +private import codeql.ruby.frameworks.ActionController +private import codeql.ruby.frameworks.ActiveRecord +private import codeql.ruby.frameworks.ActionView +private import codeql.ruby.frameworks.StandardLibrary +private import codeql.ruby.frameworks.Files +private import codeql.ruby.frameworks.HTTPClients diff --git a/ruby/ql/lib/codeql/ruby/TaintTracking.qll b/ruby/ql/lib/codeql/ruby/TaintTracking.qll new file mode 100755 index 000000000000..e443b2942731 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/TaintTracking.qll @@ -0,0 +1,7 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + */ +module TaintTracking { + import codeql.ruby.dataflow.internal.tainttracking1.TaintTrackingImpl +} diff --git a/ruby/ql/lib/codeql/ruby/ast/Call.qll b/ruby/ql/lib/codeql/ruby/ast/Call.qll new file mode 100644 index 000000000000..8f98cf0574ce --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/ast/Call.qll @@ -0,0 +1,199 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.Call +private import internal.TreeSitter +private import codeql.ruby.dataflow.internal.DataFlowDispatch +private import codeql.ruby.dataflow.internal.DataFlowImplCommon + +/** + * A call. + */ +class Call extends Expr instanceof CallImpl { + override string getAPrimaryQlClass() { result = "Call" } + + /** + * Gets the `n`th argument of this method call. In the following example, the + * result for n=0 is the `IntegerLiteral` 0, while for n=1 the result is a + * `Pair` (whose `getKey` returns the `SymbolLiteral` for `bar`, and + * `getValue` returns the `IntegerLiteral` 1). Keyword arguments like this + * can be accessed more naturally using the + * `getKeywordArgument(string keyword)` predicate. + * ```rb + * foo(0, bar: 1) + * yield 0, bar: 1 + * ``` + */ + final Expr getArgument(int n) { result = super.getArgumentImpl(n) } + + /** + * Gets an argument of this method call. + */ + final Expr getAnArgument() { result = this.getArgument(_) } + + /** + * Gets the value of the keyword argument whose key is `keyword`, if any. For + * example, the result for `getKeywordArgument("qux")` in the following + * example is the `IntegerLiteral` 123. + * ```rb + * foo :bar "baz", qux: 123 + * ``` + */ + final Expr getKeywordArgument(string keyword) { + exists(Pair p | + p = this.getAnArgument() and + p.getKey().(SymbolLiteral).getValueText() = keyword and + result = p.getValue() + ) + } + + /** + * Gets the number of arguments of this method call. + */ + final int getNumberOfArguments() { result = super.getNumberOfArgumentsImpl() } + + /** Gets a potential target of this call, if any. */ + final Callable getATarget() { + exists(DataFlowCall c | this = c.asCall().getExpr() | + TCfgScope(result) = [viableCallable(c), viableCallableLambda(c, _)] + ) + } + + override AstNode getAChild(string pred) { + result = Expr.super.getAChild(pred) + or + pred = "getArgument" and result = this.getArgument(_) + } +} + +/** + * A method call. + */ +class MethodCall extends Call instanceof MethodCallImpl { + override string getAPrimaryQlClass() { result = "MethodCall" } + + /** + * Gets the receiver of this call, if any. For example: + * + * ```rb + * foo.bar + * Baz::qux + * corge() + * ``` + * + * The result for the call to `bar` is the `Expr` for `foo`; the result for + * the call to `qux` is the `Expr` for `Baz`; for the call to `corge` there + * is no result. + */ + final Expr getReceiver() { result = super.getReceiverImpl() } + + /** + * Gets the name of the method being called. For example, in: + * + * ```rb + * foo.bar x, y + * ``` + * + * the result is `"bar"`. + */ + final string getMethodName() { result = super.getMethodNameImpl() } + + /** + * Gets the block of this method call, if any. + * ```rb + * foo.each { |x| puts x } + * ``` + */ + final Block getBlock() { result = super.getBlockImpl() } + + override string toString() { result = "call to " + this.getMethodName() } + + override AstNode getAChild(string pred) { + result = Call.super.getAChild(pred) + or + pred = "getReceiver" and result = this.getReceiver() + or + pred = "getBlock" and result = this.getBlock() + } +} + +/** + * A call to a setter method. + * ```rb + * self.foo = 10 + * a[0] = 10 + * ``` + */ +class SetterMethodCall extends MethodCall, TMethodCallSynth { + SetterMethodCall() { this = TMethodCallSynth(_, _, _, true, _) } + + final override string getAPrimaryQlClass() { result = "SetterMethodCall" } +} + +/** + * An element reference; a call to the `[]` method. + * ```rb + * a[0] + * ``` + */ +class ElementReference extends MethodCall instanceof ElementReferenceImpl { + final override string getAPrimaryQlClass() { result = "ElementReference" } + + final override string toString() { result = "...[...]" } +} + +/** + * A call to `yield`. + * ```rb + * yield x, y + * ``` + */ +class YieldCall extends Call instanceof YieldCallImpl { + final override string getAPrimaryQlClass() { result = "YieldCall" } + + final override string toString() { result = "yield ..." } +} + +/** + * A call to `super`. + * ```rb + * class Foo < Bar + * def baz + * super + * end + * end + * ``` + */ +class SuperCall extends MethodCall instanceof SuperCallImpl { + final override string getAPrimaryQlClass() { result = "SuperCall" } +} + +/** + * A block argument in a method call. + * ```rb + * foo(&block) + * ``` + */ +class BlockArgument extends Expr, TBlockArgument { + private Ruby::BlockArgument g; + + BlockArgument() { this = TBlockArgument(g) } + + final override string getAPrimaryQlClass() { result = "BlockArgument" } + + /** + * Gets the underlying expression representing the block. In the following + * example, the result is the `Expr` for `bar`: + * ```rb + * foo(&bar) + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "&..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getValue" and result = this.getValue() + } +} diff --git a/ruby/ql/lib/codeql/ruby/ast/Constant.qll b/ruby/ql/lib/codeql/ruby/ast/Constant.qll new file mode 100644 index 000000000000..268ed20f1516 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/ast/Constant.qll @@ -0,0 +1,179 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.Module +private import internal.Variable +private import internal.TreeSitter + +/** An access to a constant. */ +class ConstantAccess extends Expr, TConstantAccess { + /** Gets the name of the constant being accessed. */ + string getName() { none() } + + /** Holds if the name of the constant being accessed is `name`. */ + final predicate hasName(string name) { this.getName() = name } + + /** + * Gets the expression used in the access's scope resolution operation, if + * any. In the following example, the result is the `Call` expression for + * `foo()`. + * + * ```rb + * foo()::MESSAGE + * ``` + * + * However, there is no result for the following example, since there is no + * scope resolution operation. + * + * ```rb + * MESSAGE + * ``` + */ + Expr getScopeExpr() { none() } + + /** + * Holds if the access uses the scope resolution operator to refer to the + * global scope, as in this example: + * + * ```rb + * ::MESSAGE + * ``` + */ + predicate hasGlobalScope() { none() } + + override string toString() { result = this.getName() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getScopeExpr" and result = this.getScopeExpr() + } +} + +private class TokenConstantAccess extends ConstantAccess, TTokenConstantAccess { + private Ruby::Constant g; + + TokenConstantAccess() { this = TTokenConstantAccess(g) } + + final override string getName() { result = g.getValue() } +} + +private class ScopeResolutionConstantAccess extends ConstantAccess, TScopeResolutionConstantAccess { + private Ruby::ScopeResolution g; + private Ruby::Constant constant; + + ScopeResolutionConstantAccess() { this = TScopeResolutionConstantAccess(g, constant) } + + final override string getName() { result = constant.getValue() } + + final override Expr getScopeExpr() { toGenerated(result) = g.getScope() } + + final override predicate hasGlobalScope() { not exists(g.getScope()) } +} + +private class ConstantReadAccessSynth extends ConstantAccess, TConstantReadAccessSynth { + private string value; + + ConstantReadAccessSynth() { this = TConstantReadAccessSynth(_, _, value) } + + final override string getName() { + if this.hasGlobalScope() then result = value.suffix(2) else result = value + } + + final override Expr getScopeExpr() { synthChild(this, 0, result) } + + final override predicate hasGlobalScope() { value.matches("::%") } +} + +/** + * A use (read) of a constant. + * + * For example, the right-hand side of the assignment in: + * + * ```rb + * x = Foo + * ``` + * + * Or the superclass `Bar` in this example: + * + * ```rb + * class Foo < Bar + * end + * ``` + */ +class ConstantReadAccess extends ConstantAccess { + ConstantReadAccess() { + not this instanceof ConstantWriteAccess + or + // `X` in `X ||= 10` is considered both a read and a write + this = any(AssignOperation a).getLeftOperand() + or + this instanceof TConstantReadAccessSynth + } + + /** + * Gets the value being read, if any. For example, in + * + * ```rb + * module M + * CONST = "const" + * end + * + * puts M::CONST + * ``` + * + * the value being read at `M::CONST` is `"const"`. + */ + Expr getValue() { + not exists(this.getScopeExpr()) and + result = lookupConst(this.getEnclosingModule+().getModule(), this.getName()) and + // For now, we restrict the scope of top-level declarations to their file. + // This may remove some plausible targets, but also removes a lot of + // implausible targets + if result.getEnclosingModule() instanceof Toplevel + then result.getFile() = this.getFile() + else any() + or + this.hasGlobalScope() and + result = lookupConst(TResolved("Object"), this.getName()) + or + result = lookupConst(resolveScopeExpr(this.getScopeExpr()), this.getName()) + } + + final override string getAPrimaryQlClass() { result = "ConstantReadAccess" } +} + +/** + * A definition of a constant. + * + * Examples: + * + * ```rb + * Foo = 1 # defines constant Foo as an integer + * M::Foo = 1 # defines constant Foo as an integer in module M + * + * class Bar; end # defines constant Bar as a class + * class M::Bar; end # defines constant Bar as a class in module M + * + * module Baz; end # defines constant Baz as a module + * module M::Baz; end # defines constant Baz as a module in module M + * ``` + */ +class ConstantWriteAccess extends ConstantAccess { + ConstantWriteAccess() { + explicitAssignmentNode(toGenerated(this), _) or this instanceof TNamespace + } + + override string getAPrimaryQlClass() { result = "ConstantWriteAccess" } +} + +/** + * A definition of a constant via assignment. For example, the left-hand + * operand in the following example: + * + * ```rb + * MAX_SIZE = 100 + * ``` + */ +class ConstantAssignment extends ConstantWriteAccess, LhsExpr { + override string getAPrimaryQlClass() { result = "ConstantAssignment" } +} diff --git a/ruby/ql/lib/codeql/ruby/ast/Control.qll b/ruby/ql/lib/codeql/ruby/ast/Control.qll new file mode 100644 index 000000000000..33f52c024133 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/ast/Control.qll @@ -0,0 +1,611 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.TreeSitter + +/** + * A control expression that can be any of the following: + * - `case` + * - `if`/`unless` (including expression-modifier variants) + * - ternary-if (`?:`) + * - `while`/`until` (including expression-modifier variants) + * - `for` + */ +class ControlExpr extends Expr, TControlExpr { } + +/** + * A conditional expression: `if`/`unless` (including expression-modifier + * variants), and ternary-if (`?:`) expressions. + */ +class ConditionalExpr extends ControlExpr, TConditionalExpr { + /** + * Gets the condition expression. For example, the result is `foo` in the + * following: + * ```rb + * if foo + * bar = 1 + * end + * ``` + */ + Expr getCondition() { none() } + + /** + * Gets the branch of this conditional expression that is taken when the + * condition evaluates to `cond`, if any. + */ + Stmt getBranch(boolean cond) { none() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getCondition" and result = this.getCondition() + or + pred = "getBranch" and result = this.getBranch(_) + } +} + +/** + * An `if` or `elsif` expression. + * ```rb + * if x + * a += 1 + * elsif y + * a += 2 + * end + * ``` + */ +class IfExpr extends ConditionalExpr, TIfExpr { + final override string getAPrimaryQlClass() { result = "IfExpr" } + + /** Holds if this is an `elsif` expression. */ + predicate isElsif() { none() } + + /** Gets the 'then' branch of this `if`/`elsif` expression. */ + Stmt getThen() { none() } + + /** + * Gets the `elsif`/`else` branch of this `if`/`elsif` expression, if any. In + * the following example, the result is a `StmtSequence` containing `b`. + * ```rb + * if foo + * a + * else + * b + * end + * ``` + * But there is no result for the following: + * ```rb + * if foo + * a + * end + * ``` + * There can be at most one result, since `elsif` branches nest. In the + * following example, `ifExpr.getElse()` returns an `ElsifExpr`, and the + * `else` branch is nested inside that. To get the `StmtSequence` for the + * `else` branch, i.e. the one containing `c`, use + * `getElse().(ElsifExpr).getElse()`. + * ```rb + * if foo + * a + * elsif bar + * b + * else + * c + * end + * ``` + */ + Stmt getElse() { none() } + + final override Stmt getBranch(boolean cond) { + cond = true and result = this.getThen() + or + cond = false and result = this.getElse() + } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getThen" and result = this.getThen() + or + pred = "getElse" and result = this.getElse() + } +} + +private class If extends IfExpr, TIf { + private Ruby::If g; + + If() { this = TIf(g) } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getThen() { toGenerated(result) = g.getConsequence() } + + final override Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override string toString() { result = "if ..." } +} + +private class Elsif extends IfExpr, TElsif { + private Ruby::Elsif g; + + Elsif() { this = TElsif(g) } + + final override predicate isElsif() { any() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getThen() { toGenerated(result) = g.getConsequence() } + + final override Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override string toString() { result = "elsif ..." } +} + +/** + * An `unless` expression. + * ```rb + * unless x == 0 + * y /= x + * end + * ``` + */ +class UnlessExpr extends ConditionalExpr, TUnlessExpr { + private Ruby::Unless g; + + UnlessExpr() { this = TUnlessExpr(g) } + + final override string getAPrimaryQlClass() { result = "UnlessExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Gets the 'then' branch of this `unless` expression. In the following + * example, the result is the `StmtSequence` containing `foo`. + * ```rb + * unless a == b then + * foo + * else + * bar + * end + * ``` + */ + final Stmt getThen() { toGenerated(result) = g.getConsequence() } + + /** + * Gets the 'else' branch of this `unless` expression. In the following + * example, the result is the `StmtSequence` containing `bar`. + * ```rb + * unless a == b then + * foo + * else + * bar + * end + * ``` + */ + final Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override Expr getBranch(boolean cond) { + cond = false and result = getThen() + or + cond = true and result = getElse() + } + + final override string toString() { result = "unless ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getThen" and result = this.getThen() + or + pred = "getElse" and result = this.getElse() + } +} + +/** + * An expression modified using `if`. + * ```rb + * foo if bar + * ``` + */ +class IfModifierExpr extends ConditionalExpr, TIfModifierExpr { + private Ruby::IfModifier g; + + IfModifierExpr() { this = TIfModifierExpr(g) } + + final override string getAPrimaryQlClass() { result = "IfModifierExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getBranch(boolean cond) { cond = true and result = this.getBody() } + + /** + * Gets the statement that is conditionally evaluated. In the following + * example, the result is the `Expr` for `foo`. + * ```rb + * foo if bar + * ``` + */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + final override string toString() { result = "... if ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * An expression modified using `unless`. + * ```rb + * y /= x unless x == 0 + * ``` + */ +class UnlessModifierExpr extends ConditionalExpr, TUnlessModifierExpr { + private Ruby::UnlessModifier g; + + UnlessModifierExpr() { this = TUnlessModifierExpr(g) } + + final override string getAPrimaryQlClass() { result = "UnlessModifierExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + final override Stmt getBranch(boolean cond) { cond = false and result = this.getBody() } + + /** + * Gets the statement that is conditionally evaluated. In the following + * example, the result is the `Expr` for `foo`. + * ```rb + * foo unless bar + * ``` + */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + final override string toString() { result = "... unless ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * A conditional expression using the ternary (`?:`) operator. + * ```rb + * (a > b) ? a : b + * ``` + */ +class TernaryIfExpr extends ConditionalExpr, TTernaryIfExpr { + private Ruby::Conditional g; + + TernaryIfExpr() { this = TTernaryIfExpr(g) } + + final override string getAPrimaryQlClass() { result = "TernaryIfExpr" } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** Gets the 'then' branch of this ternary if expression. */ + final Stmt getThen() { toGenerated(result) = g.getConsequence() } + + /** Gets the 'else' branch of this ternary if expression. */ + final Stmt getElse() { toGenerated(result) = g.getAlternative() } + + final override Stmt getBranch(boolean cond) { + cond = true and result = getThen() + or + cond = false and result = getElse() + } + + final override string toString() { result = "... ? ... : ..." } + + override AstNode getAChild(string pred) { + result = ConditionalExpr.super.getAChild(pred) + or + pred = "getThen" and result = this.getThen() + or + pred = "getElse" and result = this.getElse() + } +} + +class CaseExpr extends ControlExpr, TCaseExpr { + private Ruby::Case g; + + CaseExpr() { this = TCaseExpr(g) } + + final override string getAPrimaryQlClass() { result = "CaseExpr" } + + /** + * Gets the expression being compared, if any. For example, `foo` in the following example. + * ```rb + * case foo + * when 0 + * puts 'zero' + * when 1 + * puts 'one' + * end + * ``` + * There is no result for the following example: + * ```rb + * case + * when a then 0 + * when b then 1 + * else 2 + * end + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue() } + + /** + * Gets the `n`th branch of this case expression, either a `WhenExpr` or a + * `StmtSequence`. + */ + final Expr getBranch(int n) { toGenerated(result) = g.getChild(n) } + + /** + * Gets a branch of this case expression, either a `WhenExpr` or an + * `ElseExpr`. + */ + final Expr getABranch() { result = this.getBranch(_) } + + /** Gets a `when` branch of this case expression. */ + final WhenExpr getAWhenBranch() { result = getABranch() } + + /** Gets the `else` branch of this case expression, if any. */ + final StmtSequence getElseBranch() { result = getABranch() } + + /** + * Gets the number of branches of this case expression. + */ + final int getNumberOfBranches() { result = count(this.getBranch(_)) } + + final override string toString() { result = "case ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getValue" and result = this.getValue() + or + pred = "getBranch" and result = this.getBranch(_) + } +} + +/** + * A `when` branch of a `case` expression. + * ```rb + * case + * when a > b then x + * end + * ``` + */ +class WhenExpr extends Expr, TWhenExpr { + private Ruby::When g; + + WhenExpr() { this = TWhenExpr(g) } + + final override string getAPrimaryQlClass() { result = "WhenExpr" } + + /** Gets the body of this case-when expression. */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + /** + * Gets the `n`th pattern (or condition) in this case-when expression. In the + * following example, the 0th pattern is `x`, the 1st pattern is `y`, and the + * 2nd pattern is `z`. + * ```rb + * case foo + * when x, y, z + * puts 'x/y/z' + * end + * ``` + */ + final Expr getPattern(int n) { toGenerated(result) = g.getPattern(n).getChild() } + + /** + * Gets a pattern (or condition) in this case-when expression. + */ + final Expr getAPattern() { result = this.getPattern(_) } + + /** + * Gets the number of patterns in this case-when expression. + */ + final int getNumberOfPatterns() { result = count(this.getPattern(_)) } + + final override string toString() { result = "when ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + or + pred = "getPattern" and result = this.getPattern(_) + } +} + +/** + * A loop. That is, a `for` loop, a `while` or `until` loop, or their + * expression-modifier variants. + */ +class Loop extends ControlExpr, TLoop { + /** Gets the body of this loop. */ + Stmt getBody() { none() } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * A loop using a condition expression. That is, a `while` or `until` loop, or + * their expression-modifier variants. + */ +class ConditionalLoop extends Loop, TConditionalLoop { + /** Gets the condition expression of this loop. */ + Expr getCondition() { none() } + + override AstNode getAChild(string pred) { + result = Loop.super.getAChild(pred) + or + pred = "getCondition" and result = this.getCondition() + } + + /** Holds if the loop body is entered when the condition is `condValue`. */ + predicate entersLoopWhenConditionIs(boolean condValue) { none() } +} + +/** + * A `while` loop. + * ```rb + * while a < b + * p a + * a += 2 + * end + * ``` + */ +class WhileExpr extends ConditionalLoop, TWhileExpr { + private Ruby::While g; + + WhileExpr() { this = TWhileExpr(g) } + + final override string getAPrimaryQlClass() { result = "WhileExpr" } + + /** Gets the body of this `while` loop. */ + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `while` loops, this holds when `condValue` is true. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true } + + final override string toString() { result = "while ..." } +} + +/** + * An `until` loop. + * ```rb + * until a >= b + * p a + * a += 1 + * end + * ``` + */ +class UntilExpr extends ConditionalLoop, TUntilExpr { + private Ruby::Until g; + + UntilExpr() { this = TUntilExpr(g) } + + final override string getAPrimaryQlClass() { result = "UntilExpr" } + + /** Gets the body of this `until` loop. */ + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `until` loops, this holds when `condValue` is false. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false } + + final override string toString() { result = "until ..." } +} + +/** + * An expression looped using the `while` modifier. + * ```rb + * foo while bar + * ``` + */ +class WhileModifierExpr extends ConditionalLoop, TWhileModifierExpr { + private Ruby::WhileModifier g; + + WhileModifierExpr() { this = TWhileModifierExpr(g) } + + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `while`-modifier loops, this holds when `condValue` is true. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = true } + + final override string getAPrimaryQlClass() { result = "WhileModifierExpr" } + + final override string toString() { result = "... while ..." } +} + +/** + * An expression looped using the `until` modifier. + * ```rb + * foo until bar + * ``` + */ +class UntilModifierExpr extends ConditionalLoop, TUntilModifierExpr { + private Ruby::UntilModifier g; + + UntilModifierExpr() { this = TUntilModifierExpr(g) } + + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + final override Expr getCondition() { toGenerated(result) = g.getCondition() } + + /** + * Holds if the loop body is entered when the condition is `condValue`. For + * `until`-modifier loops, this holds when `condValue` is false. + */ + final override predicate entersLoopWhenConditionIs(boolean condValue) { condValue = false } + + final override string getAPrimaryQlClass() { result = "UntilModifierExpr" } + + final override string toString() { result = "... until ..." } +} + +/** + * A `for` loop. + * ```rb + * for val in 1..n + * sum += val + * end + * ``` + */ +class ForExpr extends Loop, TForExpr { + private Ruby::For g; + + ForExpr() { this = TForExpr(g) } + + final override string getAPrimaryQlClass() { result = "ForExpr" } + + /** Gets the body of this `for` loop. */ + final override Stmt getBody() { toGenerated(result) = g.getBody() } + + /** Gets the pattern representing the iteration argument. */ + final Pattern getPattern() { toGenerated(result) = g.getPattern() } + + /** + * Gets the value being iterated over. In the following example, the result + * is the expression `1..10`: + * ```rb + * for n in 1..10 do + * puts n + * end + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue().getChild() } + + final override string toString() { result = "for ... in ..." } + + override AstNode getAChild(string pred) { + result = Loop.super.getAChild(pred) + or + pred = "getPattern" and result = this.getPattern() + or + pred = "getValue" and result = this.getValue() + } +} diff --git a/ruby/ql/lib/codeql/ruby/ast/Erb.qll b/ruby/ql/lib/codeql/ruby/ast/Erb.qll new file mode 100644 index 000000000000..0c10d764590d --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/ast/Erb.qll @@ -0,0 +1,267 @@ +private import codeql.Locations +private import codeql.ruby.AST +private import internal.Erb +private import internal.TreeSitter + +/** + * A node in the ERB abstract syntax tree. This class is the base class for all + * ERB elements. + */ +class ErbAstNode extends TAstNode { + /** Gets a textual representation of this node. */ + cached + string toString() { none() } + + /** Gets the location of this node. */ + Location getLocation() { result = getLocation(this) } + + /** + * Gets the name of a primary CodeQL class to which this node belongs. + * + * This predicate always has a result. If no primary class can be + * determined, the result is `"???"`. If multiple primary classes match, + * this predicate can have multiple results. + */ + string getAPrimaryQlClass() { result = "???" } +} + +/** + * An ERB template. This can contain multiple directives to be executed when + * the template is compiled. + */ +class ErbTemplate extends TTemplate, ErbAstNode { + private Erb::Template g; + + ErbTemplate() { this = TTemplate(g) } + + override string toString() { result = "erb template" } + + final override string getAPrimaryQlClass() { result = "ErbTemplate" } + + ErbAstNode getAChildNode() { toGenerated(result) = g.getChild(_) } +} + +// Truncate the token string value to 32 char max +bindingset[val] +private string displayToken(string val) { + val.length() <= 32 and result = val + or + val.length() > 32 and result = val.prefix(29) + "..." +} + +/** + * An ERB token. This could be embedded code, a comment, or arbitrary text. + */ +class ErbToken extends TTokenNode, ErbAstNode { + override string toString() { result = displayToken(this.getValue()) } + + /** Gets the string value of this token. */ + string getValue() { exists(Erb::Token g | this = fromGenerated(g) | result = g.getValue()) } + + override string getAPrimaryQlClass() { result = "ErbToken" } +} + +/** + * An ERB token appearing within a comment directive. + */ +class ErbComment extends ErbToken { + private Erb::Comment g; + + ErbComment() { this = TComment(g) } + + override string getValue() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "ErbComment" } +} + +/** + * An ERB token appearing within a code directive. This will typically be + * interpreted as Ruby code or a GraphQL query, depending on context. + */ +class ErbCode extends ErbToken { + private Erb::Code g; + + ErbCode() { this = TCode(g) } + + override string getValue() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "ErbCode" } +} + +bindingset[line, col] +private predicate locationIncludesPosition(Location loc, int line, int col) { + // position between start and end line, exclusive + line > loc.getStartLine() and + line < loc.getEndLine() + or + // position on start line, multi line location + line = loc.getStartLine() and + not loc.getStartLine() = loc.getEndLine() and + col >= loc.getStartColumn() + or + // position on end line, multi line location + line = loc.getEndLine() and + not loc.getStartLine() = loc.getEndLine() and + col <= loc.getEndColumn() + or + // single line location, position between start and end column + line = loc.getStartLine() and + loc.getStartLine() = loc.getEndLine() and + col >= loc.getStartColumn() and + col <= loc.getEndColumn() +} + +/** + * A directive in an ERB template. + */ +class ErbDirective extends TDirectiveNode, ErbAstNode { + private predicate containsStartOf(Location loc) { + loc.getFile() = this.getLocation().getFile() and + locationIncludesPosition(this.getLocation(), loc.getStartLine(), loc.getStartColumn()) + } + + private predicate containsStmtStart(Stmt s) { + this.containsStartOf(s.getLocation()) and + // `Toplevel` statements are not contained within individual directives, + // though their start location may appear within a directive location + not s instanceof Toplevel + } + + /** + * Gets a statement that starts in directive that is not a child of any other + * statement starting in this directive. + */ + Stmt getAChildStmt() { + this.containsStmtStart(result) and + not this.containsStmtStart(result.getParent()) + } + + /** + * Gets the last child statement in this directive. + * See `getAChildStmt` for more details. + */ + Stmt getTerminalStmt() { + result = this.getAChildStmt() and + forall(Stmt s | s = this.getAChildStmt() and not s = result | + s.getLocation().strictlyBefore(result.getLocation()) + ) + } + + /** Gets the child token of this directive. */ + ErbToken getToken() { + exists(Erb::Directive g | this = fromGenerated(g) | toGenerated(result) = g.getChild()) + } + + override string toString() { result = "erb directive" } + + override string getAPrimaryQlClass() { result = "ErbDirective" } +} + +/** + * A comment directive in an ERB template. + * ```erb + * <%#= 2 + 2 %> + * <%# for x in xs do %> + * ``` + */ +class ErbCommentDirective extends ErbDirective { + private Erb::CommentDirective g; + + ErbCommentDirective() { this = TCommentDirective(g) } + + override ErbComment getToken() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "<%#" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbCommentDirective" } +} + +/** + * A GraphQL directive in an ERB template. + * ```erb + * <%graphql + * fragment Foo on Bar { + * some { + * queryText + * moreProperties + * } + * } + * %> + * ``` + */ +class ErbGraphqlDirective extends ErbDirective { + private Erb::GraphqlDirective g; + + ErbGraphqlDirective() { this = TGraphqlDirective(g) } + + override ErbCode getToken() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "<%graphql" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbGraphqlDirective" } +} + +/** + * An output directive in an ERB template. + * ```erb + * <%= + * fragment Foo on Bar { + * some { + * queryText + * moreProperties + * } + * } + * %> + * ``` + */ +class ErbOutputDirective extends ErbDirective { + private Erb::OutputDirective g; + + ErbOutputDirective() { this = TOutputDirective(g) } + + override ErbCode getToken() { toGenerated(result) = g.getChild() } + + final override string toString() { result = "<%=" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbOutputDirective" } +} + +/** + * An execution directive in an ERB template. + * This code will be executed as Ruby, but not rendered. + * ```erb + * <% books = author.books + * for book in books do %> + * ``` + */ +class ErbExecutionDirective extends ErbDirective { + private Erb::Directive g; + + ErbExecutionDirective() { this = TDirective(g) } + + final override string toString() { result = "<%" + this.getToken().toString() + "%>" } + + final override string getAPrimaryQlClass() { result = "ErbExecutionDirective" } +} + +/** + * A `File` containing an Embedded Ruby template. + * This is typically a file containing snippets of Ruby code that can be + * evaluated to create a compiled version of the file. + */ +class ErbFile extends File { + private ErbTemplate template; + + ErbFile() { this = template.getLocation().getFile() } + + /** + * Holds if the file represents a partial to be rendered in the context of + * another template. + */ + predicate isPartial() { this.getStem().charAt(0) = "_" } + + /** + * Gets the erb template contained within this file. + */ + ErbTemplate getTemplate() { result = template } +} diff --git a/ruby/ql/lib/codeql/ruby/ast/Expr.qll b/ruby/ql/lib/codeql/ruby/ast/Expr.qll new file mode 100644 index 000000000000..6a2a85ffee90 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/ast/Expr.qll @@ -0,0 +1,450 @@ +private import codeql.ruby.AST +private import internal.AST +private import internal.TreeSitter + +/** + * An expression. + * + * This is the root QL class for all expressions. + */ +class Expr extends Stmt, TExpr { } + +/** + * A reference to the current object. For example: + * - `self == other` + * - `self.method_name` + * - `def self.method_name ... end` + * + * This also includes implicit references to the current object in method + * calls. For example, the method call `foo(123)` has an implicit `self` + * receiver, and is equivalent to the explicit `self.foo(123)`. + */ +class Self extends Expr, TSelf { + final override string getAPrimaryQlClass() { result = "Self" } + + final override string toString() { result = "self" } +} + +/** + * A sequence of expressions in the right-hand side of an assignment or + * a `return`, `break` or `next` statement. + * ```rb + * x = 1, *items, 3, *more + * return 1, 2 + * next *list + * break **map + * return 1, 2, *items, k: 5, **map + * ``` + */ +class ArgumentList extends Expr, TArgumentList { + private Ruby::AstNode g; + + ArgumentList() { this = TArgumentList(g) } + + /** Gets the `i`th element in this argument list. */ + Expr getElement(int i) { + toGenerated(result) in [ + g.(Ruby::ArgumentList).getChild(i), g.(Ruby::RightAssignmentList).getChild(i) + ] + } + + final override string getAPrimaryQlClass() { result = "ArgumentList" } + + final override string toString() { result = "..., ..." } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getElement" and result = this.getElement(_) + } +} + +/** A sequence of expressions. */ +class StmtSequence extends Expr, TStmtSequence { + override string getAPrimaryQlClass() { result = "StmtSequence" } + + /** Gets the `n`th statement in this sequence. */ + Stmt getStmt(int n) { none() } + + /** Gets a statement in this sequence. */ + final Stmt getAStmt() { result = this.getStmt(_) } + + /** Gets the last statement in this sequence, if any. */ + final Stmt getLastStmt() { result = this.getStmt(this.getNumberOfStatements() - 1) } + + /** Gets the number of statements in this sequence. */ + final int getNumberOfStatements() { result = count(this.getAStmt()) } + + /** Holds if this sequence has no statements. */ + final predicate isEmpty() { this.getNumberOfStatements() = 0 } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getStmt" and result = this.getStmt(_) + } +} + +private class StmtSequenceSynth extends StmtSequence, TStmtSequenceSynth { + final override Stmt getStmt(int n) { synthChild(this, n, result) } + + final override string toString() { result = "..." } +} + +private class Then extends StmtSequence, TThen { + private Ruby::Then g; + + Then() { this = TThen(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "then ..." } +} + +private class Else extends StmtSequence, TElse { + private Ruby::Else g; + + Else() { this = TElse(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "else ..." } +} + +private class Do extends StmtSequence, TDo { + private Ruby::Do g; + + Do() { this = TDo(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "do ..." } +} + +private class Ensure extends StmtSequence, TEnsure { + private Ruby::Ensure g; + + Ensure() { this = TEnsure(g) } + + override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string toString() { result = "ensure ..." } +} + +/** + * A sequence of statements representing the body of a method, class, module, + * or do-block. That is, any body that may also include rescue/ensure/else + * statements. + */ +class BodyStmt extends StmtSequence, TBodyStmt { + // Not defined by dispatch, as it should not be exposed + private Ruby::AstNode getChild(int i) { + result = any(Ruby::Method g | this = TMethod(g)).getChild(i) + or + result = any(Ruby::SingletonMethod g | this = TSingletonMethod(g)).getChild(i) + or + exists(Ruby::Lambda g | this = TLambda(g) | + result = g.getBody().(Ruby::DoBlock).getChild(i) or + result = g.getBody().(Ruby::Block).getChild(i) + ) + or + result = any(Ruby::DoBlock g | this = TDoBlock(g)).getChild(i) + or + result = any(Ruby::Program g | this = TToplevel(g)).getChild(i) and + not result instanceof Ruby::BeginBlock + or + result = any(Ruby::Class g | this = TClassDeclaration(g)).getChild(i) + or + result = any(Ruby::SingletonClass g | this = TSingletonClass(g)).getChild(i) + or + result = any(Ruby::Module g | this = TModuleDeclaration(g)).getChild(i) + or + result = any(Ruby::Begin g | this = TBeginExpr(g)).getChild(i) + } + + final override Stmt getStmt(int n) { + result = + rank[n + 1](AstNode node, int i | + toGenerated(node) = this.getChild(i) and + not node instanceof Else and + not node instanceof RescueClause and + not node instanceof Ensure + | + node order by i + ) + } + + /** Gets the `n`th rescue clause in this block. */ + final RescueClause getRescue(int n) { + result = + rank[n + 1](RescueClause node, int i | toGenerated(node) = getChild(i) | node order by i) + } + + /** Gets a rescue clause in this block. */ + final RescueClause getARescue() { result = this.getRescue(_) } + + /** Gets the `else` clause in this block, if any. */ + final StmtSequence getElse() { result = unique(Else s | toGenerated(s) = getChild(_)) } + + /** Gets the `ensure` clause in this block, if any. */ + final StmtSequence getEnsure() { result = unique(Ensure s | toGenerated(s) = getChild(_)) } + + final predicate hasEnsure() { exists(this.getEnsure()) } + + override AstNode getAChild(string pred) { + result = StmtSequence.super.getAChild(pred) + or + pred = "getRescue" and result = this.getRescue(_) + or + pred = "getElse" and result = this.getElse() + or + pred = "getEnsure" and result = this.getEnsure() + } +} + +/** + * A parenthesized expression sequence, typically containing a single expression: + * ```rb + * (x + 1) + * ``` + * However, they can also contain multiple expressions (the value of the parenthesized + * expression is the last expression): + * ```rb + * (foo; bar) + * ``` + * or even an empty sequence (value is `nil`): + * ```rb + * () + * ``` + */ +class ParenthesizedExpr extends StmtSequence, TParenthesizedExpr { + private Ruby::ParenthesizedStatements g; + + ParenthesizedExpr() { this = TParenthesizedExpr(g) } + + final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string getAPrimaryQlClass() { result = "ParenthesizedExpr" } + + final override string toString() { result = "( ... )" } +} + +/** + * A pair expression. For example, in a hash: + * ```rb + * { foo: bar } + * ``` + * Or a keyword argument: + * ```rb + * baz(qux: 1) + * ``` + */ +class Pair extends Expr, TPair { + private Ruby::Pair g; + + Pair() { this = TPair(g) } + + final override string getAPrimaryQlClass() { result = "Pair" } + + /** + * Gets the key expression of this pair. For example, the `SymbolLiteral` + * representing the keyword `foo` in the following example: + * ```rb + * bar(foo: 123) + * ``` + * Or the `StringLiteral` for `'foo'` in the following hash pair: + * ```rb + * { 'foo' => 123 } + * ``` + */ + final Expr getKey() { toGenerated(result) = g.getKey() } + + /** + * Gets the value expression of this pair. For example, the `InteralLiteral` + * 123 in the following hash pair: + * ```rb + * { 'foo' => 123 } + * ``` + */ + final Expr getValue() { toGenerated(result) = g.getValue() } + + final override string toString() { result = "Pair" } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getKey" and result = this.getKey() + or + pred = "getValue" and result = this.getValue() + } +} + +/** + * A rescue clause. For example: + * ```rb + * begin + * write_file + * rescue StandardError => msg + * puts msg + * end + */ +class RescueClause extends Expr, TRescueClause { + private Ruby::Rescue g; + + RescueClause() { this = TRescueClause(g) } + + final override string getAPrimaryQlClass() { result = "RescueClause" } + + /** + * Gets the `n`th exception to match, if any. For example `FirstError` or `SecondError` in: + * ```rb + * begin + * do_something + * rescue FirstError, SecondError => e + * handle_error(e) + * end + * ``` + */ + final Expr getException(int n) { toGenerated(result) = g.getExceptions().getChild(n) } + + /** + * Gets an exception to match, if any. For example `FirstError` or `SecondError` in: + * ```rb + * begin + * do_something + * rescue FirstError, SecondError => e + * handle_error(e) + * end + * ``` + */ + final Expr getAnException() { result = this.getException(_) } + + /** + * Gets the variable to which to assign the matched exception, if any. + * For example `err` in: + * ```rb + * begin + * do_something + * rescue StandardError => err + * handle_error(err) + * end + * ``` + */ + final LhsExpr getVariableExpr() { toGenerated(result) = g.getVariable().getChild() } + + /** + * Gets the exception handler body. + */ + final StmtSequence getBody() { toGenerated(result) = g.getBody() } + + final override string toString() { result = "rescue ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getException" and result = this.getException(_) + or + pred = "getVariableExpr" and result = this.getVariableExpr() + or + pred = "getBody" and result = this.getBody() + } +} + +/** + * An expression with a `rescue` modifier. For example: + * ```rb + * contents = read_file rescue "" + * ``` + */ +class RescueModifierExpr extends Expr, TRescueModifierExpr { + private Ruby::RescueModifier g; + + RescueModifierExpr() { this = TRescueModifierExpr(g) } + + final override string getAPrimaryQlClass() { result = "RescueModifierExpr" } + + /** + * Gets the body of this `RescueModifierExpr`. + * ```rb + * body rescue handler + * ``` + */ + final Stmt getBody() { toGenerated(result) = g.getBody() } + + /** + * Gets the exception handler of this `RescueModifierExpr`. + * ```rb + * body rescue handler + * ``` + */ + final Stmt getHandler() { toGenerated(result) = g.getHandler() } + + final override string toString() { result = "... rescue ..." } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getBody" and result = this.getBody() + or + pred = "getHandler" and result = this.getHandler() + } +} + +/** + * A concatenation of string literals. + * + * ```rb + * "foo" "bar" "baz" + * ``` + */ +class StringConcatenation extends Expr, TStringConcatenation { + private Ruby::ChainedString g; + + StringConcatenation() { this = TStringConcatenation(g) } + + final override string getAPrimaryQlClass() { result = "StringConcatenation" } + + /** Gets the `n`th string literal in this concatenation. */ + final StringLiteral getString(int n) { toGenerated(result) = g.getChild(n) } + + /** Gets a string literal in this concatenation. */ + final StringLiteral getAString() { result = this.getString(_) } + + /** Gets the number of string literals in this concatenation. */ + final int getNumberOfStrings() { result = count(this.getString(_)) } + + /** + * Gets the result of concatenating all the string literals, if and only if + * they do not contain any interpolations. + * + * For the following example, the result is `"foobar"`: + * + * ```rb + * "foo" 'bar' + * ``` + * + * And for the following example, where one of the string literals includes + * an interpolation, there is no result: + * + * ```rb + * "foo" "bar#{ n }" + * ``` + */ + final string getConcatenatedValueText() { + forall(StringLiteral c | c = this.getString(_) | exists(c.getValueText())) and + result = + concat(string valueText, int i | + valueText = this.getString(i).getValueText() + | + valueText order by i + ) + } + + final override string toString() { result = "\"...\" \"...\"" } + + override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getString" and result = this.getString(_) + } +} diff --git a/ruby/ql/lib/codeql/ruby/ast/Literal.qll b/ruby/ql/lib/codeql/ruby/ast/Literal.qll new file mode 100644 index 000000000000..c158faf2b4b2 --- /dev/null +++ b/ruby/ql/lib/codeql/ruby/ast/Literal.qll @@ -0,0 +1,885 @@ +private import codeql.ruby.AST +private import codeql.ruby.regexp.RegExpTreeView as RETV +private import internal.AST +private import internal.Scope +private import internal.TreeSitter + +/** + * A literal. + * + * This is the QL root class for all literals. + */ +class Literal extends Expr, TLiteral { + /** + * Gets the source text for this literal, if this is a simple literal. + * + * For complex literals, such as arrays, hashes, and strings with + * interpolations, this predicate has no result. + */ + string getValueText() { none() } +} + +/** + * A numeric literal, i.e. an integer, floating-point, rational, or complex + * value. + * + * ```rb + * 123 + * 0xff + * 3.14159 + * 1.0E2 + * 7r + * 1i + * ``` + */ +class NumericLiteral extends Literal, TNumericLiteral { } + +/** + * An integer literal. + * + * ```rb + * 123 + * 0xff + * ``` + */ +class IntegerLiteral extends NumericLiteral, TIntegerLiteral { + /** Gets the numerical value of this integer literal. */ + int getValue() { none() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "IntegerLiteral" } +} + +private class IntegerLiteralReal extends IntegerLiteral, TIntegerLiteralReal { + private Ruby::Integer g; + + IntegerLiteralReal() { this = TIntegerLiteralReal(g) } + + final override string getValueText() { result = g.getValue() } + + final override int getValue() { + exists(string s, string values, string str | + s = this.getValueText().toLowerCase() and + ( + s.matches("0b%") and + values = "01" and + str = s.suffix(2) + or + s.matches("0x%") and + values = "0123456789abcdef" and + str = s.suffix(2) + or + s.charAt(0) = "0" and + not s.charAt(1) = ["b", "x", "o"] and + values = "01234567" and + str = s.suffix(1) + or + s.matches("0o%") and + values = "01234567" and + str = s.suffix(2) + or + s.charAt(0) != "0" and values = "0123456789" and str = s + ) + | + result = + sum(int index, string c, int v, int exp | + c = str.replaceAll("_", "").charAt(index) and + v = values.indexOf(c.toLowerCase()) and + exp = str.replaceAll("_", "").length() - index - 1 + | + v * values.length().pow(exp) + ) + ) + } +} + +private class IntegerLiteralSynth extends IntegerLiteral, TIntegerLiteralSynth { + private int value; + + IntegerLiteralSynth() { this = TIntegerLiteralSynth(_, _, value) } + + final override string getValueText() { result = value.toString() } + + final override int getValue() { result = value } +} + +/** + * A floating-point literal. + * + * ```rb + * 1.3 + * 2.7e+5 + * ``` + */ +class FloatLiteral extends NumericLiteral, TFloatLiteral { + private Ruby::Float g; + + FloatLiteral() { this = TFloatLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "FloatLiteral" } +} + +/** + * A rational literal. + * + * ```rb + * 123r + * ``` + */ +class RationalLiteral extends NumericLiteral, TRationalLiteral { + private Ruby::Rational g; + + RationalLiteral() { this = TRationalLiteral(g) } + + final override string getValueText() { result = g.getChild().(Ruby::Token).getValue() + "r" } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "RationalLiteral" } +} + +/** + * A complex literal. + * + * ```rb + * 1i + * ``` + */ +class ComplexLiteral extends NumericLiteral, TComplexLiteral { + private Ruby::Complex g; + + ComplexLiteral() { this = TComplexLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "ComplexLiteral" } +} + +/** A `nil` literal. */ +class NilLiteral extends Literal, TNilLiteral { + private Ruby::Nil g; + + NilLiteral() { this = TNilLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = this.getValueText() } + + final override string getAPrimaryQlClass() { result = "NilLiteral" } +} + +/** + * A Boolean literal. + * ```rb + * true + * false + * TRUE + * FALSE + * ``` + */ +class BooleanLiteral extends Literal, TBooleanLiteral { + final override string getAPrimaryQlClass() { result = "BooleanLiteral" } + + final override string toString() { result = this.getValueText() } + + /** Holds if the Boolean literal is `true` or `TRUE`. */ + predicate isTrue() { none() } + + /** Holds if the Boolean literal is `false` or `FALSE`. */ + predicate isFalse() { none() } +} + +private class TrueLiteral extends BooleanLiteral, TTrueLiteral { + private Ruby::True g; + + TrueLiteral() { this = TTrueLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override predicate isTrue() { any() } +} + +private class FalseLiteral extends BooleanLiteral, TFalseLiteral { + private Ruby::False g; + + FalseLiteral() { this = TFalseLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override predicate isFalse() { any() } +} + +/** + * The base class for a component of a string: `StringTextComponent`, + * `StringEscapeSequenceComponent`, or `StringInterpolationComponent`. + */ +class StringComponent extends AstNode, TStringComponent { + /** + * Gets the source text for this string component. Has no result if this is + * a `StringInterpolationComponent`. + */ + string getValueText() { none() } +} + +/** + * A component of a string (or string-like) literal that is simply text. + * + * For example, the following string literals all contain `StringTextComponent` + * components whose `getValueText()` returns `"foo"`: + * + * ```rb + * 'foo' + * "#{ bar() }foo" + * "foo#{ bar() } baz" + * ``` + */ +class StringTextComponent extends StringComponent, TStringTextComponent { + private Ruby::Token g; + + StringTextComponent() { this = TStringTextComponent(g) } + + final override string toString() { result = g.getValue() } + + final override string getValueText() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "StringTextComponent" } +} + +/** + * An escape sequence component of a string or string-like literal. + */ +class StringEscapeSequenceComponent extends StringComponent, TStringEscapeSequenceComponent { + private Ruby::EscapeSequence g; + + StringEscapeSequenceComponent() { this = TStringEscapeSequenceComponent(g) } + + final override string toString() { result = g.getValue() } + + final override string getValueText() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "StringEscapeSequenceComponent" } +} + +/** + * An interpolation expression component of a string or string-like literal. + */ +class StringInterpolationComponent extends StringComponent, StmtSequence, + TStringInterpolationComponent { + private Ruby::Interpolation g; + + StringInterpolationComponent() { this = TStringInterpolationComponent(g) } + + final override string toString() { result = "#{...}" } + + final override Stmt getStmt(int n) { toGenerated(result) = g.getChild(n) } + + final override string getValueText() { none() } + + final override string getAPrimaryQlClass() { result = "StringInterpolationComponent" } +} + +/** + * A string, symbol, regexp, or subshell literal. + */ +class StringlikeLiteral extends Literal, TStringlikeLiteral { + /** + * Gets the `n`th component of this string or string-like literal. The result + * will be one of `StringTextComponent`, `StringInterpolationComponent`, and + * `StringEscapeSequenceComponent`. + * + * In the following example, the result for `n = 0` is the + * `StringTextComponent` for `foo_`, and the result for `n = 1` is the + * `StringInterpolationComponent` for `Time.now`. + * + * ```rb + * "foo_#{ Time.now }" + * ``` + */ + StringComponent getComponent(int n) { none() } + + /** + * Gets the number of components in this string or string-like literal. + * + * For the empty string `""`, the result is 0. + * + * For the string `"foo"`, the result is 1: there is a single + * `StringTextComponent`. + * + * For the following example, the result is 3: there is a + * `StringTextComponent` for the substring `"foo_"`; a + * `StringEscapeSequenceComponent` for the escaped quote; and a + * `StringInterpolationComponent` for the interpolation. + * + * ```rb + * "foo\"#{bar}" + * ``` + */ + final int getNumberOfComponents() { result = count(this.getComponent(_)) } + + private string getStartDelimiter() { + this instanceof TStringLiteral and + result = "\"" + or + this instanceof TRegExpLiteral and + result = "/" + or + this instanceof TSimpleSymbolLiteral and + result = ":" + or + this instanceof TComplexSymbolLiteral and + result = ":\"" + or + this instanceof THashKeySymbolLiteral and + result = "" + or + this instanceof TSubshellLiteral and + result = "`" + or + this instanceof THereDoc and + result = "" + } + + private string getEndDelimiter() { + this instanceof TStringLiteral and + result = "\"" + or + this instanceof TRegExpLiteral and + result = "/" + or + this instanceof TSimpleSymbolLiteral and + result = "" + or + this instanceof TComplexSymbolLiteral and + result = "\"" + or + this instanceof THashKeySymbolLiteral and + result = "" + or + this instanceof TSubshellLiteral and + result = "`" + or + this instanceof THereDoc and + result = "" + } + + override string getValueText() { + // 0 components should result in the empty string + // if there are any interpolations, there should be no result + // otherwise, concatenate all the components + forall(StringComponent c | c = this.getComponent(_) | + not c instanceof StringInterpolationComponent + ) and + result = + concat(StringComponent c, int i | c = this.getComponent(i) | c.getValueText() order by i) + } + + override string toString() { + exists(string full, string summary | + full = + concat(StringComponent c, int i, string s | + c = this.getComponent(i) and + ( + s = toGenerated(c).(Ruby::Token).getValue() + or + not toGenerated(c) instanceof Ruby::Token and + s = "#{...}" + ) + | + s order by i + ) and + ( + // summary should be 32 chars max (incl. ellipsis) + full.length() > 32 and summary = full.substring(0, 29) + "..." + or + full.length() <= 32 and summary = full + ) and + result = this.getStartDelimiter() + summary + this.getEndDelimiter() + ) + } + + final override AstNode getAChild(string pred) { + result = super.getAChild(pred) + or + pred = "getComponent" and result = this.getComponent(_) + } +} + +/** + * A string literal. + * + * ```rb + * 'hello' + * "hello, #{name}" + * ``` + */ +class StringLiteral extends StringlikeLiteral, TStringLiteral { + final override string getAPrimaryQlClass() { result = "StringLiteral" } +} + +private class RegularStringLiteral extends StringLiteral, TRegularStringLiteral { + private Ruby::String g; + + RegularStringLiteral() { this = TRegularStringLiteral(g) } + + final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) } +} + +private class BareStringLiteral extends StringLiteral, TBareStringLiteral { + private Ruby::BareString g; + + BareStringLiteral() { this = TBareStringLiteral(g) } + + final override StringComponent getComponent(int n) { toGenerated(result) = g.getChild(n) } +} + +/** + * A regular expression literal. + * + * ```rb + * /[a-z]+/ + * ``` + */ +class RegExpLiteral extends StringlikeLiteral, TRegExpLiteral { + private Ruby::Regex g; + + RegExpLiteral() { this = TRegExpLiteral(g) } + + final override string getAPrimaryQlClass() { result = "RegExpLiteral" } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } + + /** + * Gets the regexp flags as a string. + * + * ```rb + * /foo/ # => "" + * /foo/i # => "i" + * /foo/imxo # => "imxo" + */ + final string getFlagString() { + // For `/foo/i`, there should be an `/i` token in the database with `this` + // as its parents. Strip the delimiter, which can vary. + result = + max(Ruby::Token t | t.getParent() = g | t.getValue().suffix(1) order by t.getParentIndex()) + } + + /** + * Holds if the regexp was specified using the `i` flag to indicate case + * insensitivity, as in the following example: + * + * ```rb + * /foo/i + * ``` + */ + final predicate hasCaseInsensitiveFlag() { this.getFlagString().charAt(_) = "i" } + + /** + * Holds if the regex was specified using the `m` flag to indicate multiline + * mode. For example: + * + * ```rb + * /foo/m + * ``` + */ + final predicate hasMultilineFlag() { this.getFlagString().charAt(_) = "m" } + + /** + * Holds if the regex was specified using the `x` flag to indicate + * 'free-spacing' mode (also known as 'extended' mode), meaning that + * whitespace and comments in the pattern are ignored. For example: + * + * ```rb + * %r{ + * [a-zA-Z_] # starts with a letter or underscore + * \w* # and then zero or more letters/digits/underscores + * }/x + * ``` + */ + final predicate hasFreeSpacingFlag() { this.getFlagString().charAt(_) = "x" } + + /** Returns the root node of the parse tree of this regular expression. */ + final RETV::RegExpTerm getParsed() { result = RETV::getParsedRegExp(this) } +} + +/** + * A symbol literal. + * + * ```rb + * :foo + * :"foo bar" + * :"foo bar #{baz}" + * ``` + */ +class SymbolLiteral extends StringlikeLiteral, TSymbolLiteral { + final override string getAPrimaryQlClass() { + not this instanceof MethodName and result = "SymbolLiteral" + } +} + +private class SimpleSymbolLiteral extends SymbolLiteral, TSimpleSymbolLiteral { + private Ruby::SimpleSymbol g; + + SimpleSymbolLiteral() { this = TSimpleSymbolLiteral(g) } + + // Tree-sitter gives us value text including the colon, which we skip. + final override string getValueText() { result = g.getValue().suffix(1) } + + final override string toString() { result = g.getValue() } +} + +private class ComplexSymbolLiteral extends SymbolLiteral, TComplexSymbolLiteral { } + +private class DelimitedSymbolLiteral extends ComplexSymbolLiteral, TDelimitedSymbolLiteral { + private Ruby::DelimitedSymbol g; + + DelimitedSymbolLiteral() { this = TDelimitedSymbolLiteral(g) } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } +} + +private class BareSymbolLiteral extends ComplexSymbolLiteral, TBareSymbolLiteral { + private Ruby::BareSymbol g; + + BareSymbolLiteral() { this = TBareSymbolLiteral(g) } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } +} + +private class HashKeySymbolLiteral extends SymbolLiteral, THashKeySymbolLiteral { + private Ruby::HashKeySymbol g; + + HashKeySymbolLiteral() { this = THashKeySymbolLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = ":" + this.getValueText() } +} + +/** + * A subshell literal. + * + * ```rb + * `ls -l` + * %x(/bin/sh foo.sh) + * ``` + */ +class SubshellLiteral extends StringlikeLiteral, TSubshellLiteral { + private Ruby::Subshell g; + + SubshellLiteral() { this = TSubshellLiteral(g) } + + final override string getAPrimaryQlClass() { result = "SubshellLiteral" } + + final override StringComponent getComponent(int i) { toGenerated(result) = g.getChild(i) } +} + +/** + * A character literal. + * + * ```rb + * ?a + * ?\u{61} + * ``` + */ +class CharacterLiteral extends Literal, TCharacterLiteral { + private Ruby::Character g; + + CharacterLiteral() { this = TCharacterLiteral(g) } + + final override string getValueText() { result = g.getValue() } + + final override string toString() { result = g.getValue() } + + final override string getAPrimaryQlClass() { result = "CharacterLiteral" } +} + +/** + * A "here document". For example: + * ```rb + * query = <Code that passes user input directly to
+Kernel.system
, Kernel.exec
, or some other library
+routine that executes a command, allows the user to execute malicious
+code.
If possible, use hard-coded string literals to specify the command to run +or library to load. Instead of passing the user input directly to the +process or library function, examine the user input and then choose +among hard-coded string literals.
+ +If the applicable libraries or commands cannot be determined at +compile time, then add code to verify that the user input string is +safe before using it.
+ +The following example shows code that takes a shell script that can be changed
+maliciously by a user, and passes it straight to Kernel.system
+without examining it first.
+ Directly writing user input (for example, an HTTP request parameter) to a webpage, + without properly sanitizing the input first, allows for a cross-site scripting + vulnerability. +
++ To guard against cross-site scripting, escape user input before writing it + to the page. Some frameworks, such as Rails, perform this escaping + implicitly and by default. +
+ +
+ Take care when using methods such as html_safe
or
+ raw
. They can be used to emit a string without escaping
+ it, and should only be used when the string has already been manually
+ escaped (for example, with the Rails html_escape
method), or when
+ the content is otherwise guaranteed to be safe (such as a hard-coded string).
+
+ The following example is safe because the
+ params[:user_name]
content within the output tags will be
+ HTML-escaped automatically before being emitted.
+
+ However, the following example is unsafe because user-controlled input is
+ emitted without escaping, since it is marked as html_safe
.
+
Hello <%= params[:user_name].html_safe %>!
diff --git a/ruby/ql/src/queries/security/cwe-079/examples/safe.html.erb b/ruby/ql/src/queries/security/cwe-079/examples/safe.html.erb new file mode 100644 index 000000000000..5e247b25b7eb --- /dev/null +++ b/ruby/ql/src/queries/security/cwe-079/examples/safe.html.erb @@ -0,0 +1 @@ +Hello <%= params[:user_name] %>!
diff --git a/ruby/ql/src/queries/security/cwe-089/SqlInjection.qhelp b/ruby/ql/src/queries/security/cwe-089/SqlInjection.qhelp new file mode 100644 index 000000000000..b477a49ca690 --- /dev/null +++ b/ruby/ql/src/queries/security/cwe-089/SqlInjection.qhelp @@ -0,0 +1,64 @@ + ++If a database query (such as a SQL or NoSQL query) is built from +user-provided data without sufficient sanitization, a malicious user +may be able to run malicious database queries. +
++Most database connector libraries offer a way of safely embedding +untrusted data into a query by means of query parameters or +prepared statements. +
+
+In the following Rails example, an ActionController
class
+has a text_bio
method to handle requests to fetch a biography
+for a specified user.
+
+The user is specified using a parameter, user_name
provided by
+the client. This value is accessible using the params
method.
+
+The method illustrates three different ways to construct and execute an SQL +query to find the user by name. +
+ +
+In the first case, the parameter user_name
is inserted into an
+SQL fragment using string interpolation. The parameter is user-supplied and
+is not sanitized. An attacker could use this to construct SQL queries that
+were not intended to be executed here.
+
+The second case uses string concatenation and is vulnerable in the same way +that the first case is. +
+ +
+In the third case, the name is passed in a hash instead.
+ActiveRecord
will construct a parameterized SQL query that is not
+vulnerable to SQL injection attacks.
+
+Directly evaluating user input (for example, an HTTP request parameter) as code without first
+sanitizing the input allows an attacker arbitrary code execution. This can occur when user
+input is passed to code that interprets it as an expression to be
+evaluated, using methods such as Kernel.eval
or Kernel.send
.
+
+Avoid including user input in any expression that may be dynamically evaluated. If user input must +be included, use context-specific escaping before including it. +It is important that the correct escaping is used for the type of evaluation that will occur. +
+
+The following example shows two functions setting a name from a request.
+The first function uses eval
to execute the set_name
method.
+This is dangerous as it can allow a malicious user to execute arbitrary code on the server.
+For example, the user could supply the value "' + exec('rm -rf') + '"
+to destroy the server's file system.
+The second function calls the set_name
method directly and is thus safe.
+
+
+ + Consider this use of a regular expression, which removes + all leading and trailing whitespace in a string: + +
+ +
+
+ The sub-expression "\s+$"
will match the
+ whitespace characters in text
from left to
+ right, but it can start matching anywhere within a
+ whitespace sequence. This is problematic for strings
+ that do not end with a whitespace
+ character. Such a string will force the regular
+ expression engine to process each whitespace sequence
+ once per whitespace character in the sequence.
+
+
+
+ This ultimately means that the time cost of trimming a
+ string is quadratic in the length of the string. So a
+ string like "a b"
will take milliseconds to
+ process, but a similar string with a million spaces
+ instead of just one will take several minutes.
+
+
+
+ Avoid this problem by rewriting the regular expression
+ to not contain the ambiguity about when to start
+ matching whitespace sequences. For instance, by using a
+ negative look-behind
+ (/^\s+|(?<!\s)\s+$/
), or just by using
+ the built-in strip method (text.strip!
).
+
+
+
+ Note that the sub-expression "^\s+"
is
+ not problematic as the ^
+ anchor restricts when that sub-expression can start
+ matching, and as the regular expression engine matches
+ from left to right.
+
+
+ + As a similar, but slightly subtler problem, consider the + regular expression that matches lines with numbers, possibly written + using scientific notation: +
+ +
+
+ The problem with this regular expression is in the
+ sub-expression \d+E?\d+
because the second
+ \d+
can start matching digits anywhere
+ after the first match of the first \d+
if
+ there is no E
in the input string.
+
+
+ + This is problematic for strings that do + not end with a digit. Such a string + will force the regular expression engine to process each + digit sequence once per digit in the sequence, again + leading to a quadratic time complexity. + +
+ +
+
+ To make the processing faster, the regular expression
+ should be rewritten such that the two \d+
+ sub-expressions do not have overlapping matches:
+ /^0\.\d+(E\d+)?$/
.
+
+
Consider this regular expression:
+
+ Its sub-expression "(__|.)+?"
can match the string
+ "__"
either by the first alternative "__"
to the
+ left of the "|"
operator, or by two repetitions of the second
+ alternative "."
to the right. Thus, a string consisting of an
+ odd number of underscores followed by some other character will cause the
+ regular expression engine to run for an exponential amount of time before
+ rejecting the input.
+
+ This problem can be avoided by rewriting the regular expression to remove + the ambiguity between the two branches of the alternative inside the + repetition: +
++ Some regular expressions take a long time to match certain input strings + to the point where the time it takes to match a string of length n + is proportional to nk or even 2n. + Such regular expressions can negatively affect performance, or even allow + a malicious user to perform a Denial of Service ("DoS") attack by crafting + an expensive input string for the regular expression to match. +
++ The regular expression engine used by the Ruby interpreter (MRI) uses + backtracking non-deterministic finite automata to implement regular + expression matching. While this approach is space-efficient and allows + supporting advanced features like capture groups, it is not time-efficient + in general. The worst-case time complexity of such an automaton can be + polynomial or even exponential, meaning that for strings of a certain + shape, increasing the input length by ten characters may make the + automaton about 1000 times slower. +
+
+ Typically, a regular expression is affected by this problem if it contains
+ a repetition of the form r*
or r+
where the
+ sub-expression r
is ambiguous in the sense that it can match
+ some string in multiple ways. More information about the precise
+ circumstances can be found in the references.
+
+ Modify the regular expression to remove the ambiguity, or ensure that the + strings matched with the regular expression are short enough that the + time-complexity does not matter. +
++Deserializing untrusted data using any method that allows the construction of +arbitrary objects is easily exploitable and, in many cases, allows an attacker +to execute arbitrary code. +
+
+Avoid deserialization of untrusted data if possible. If the architecture permits
+it, use serialization formats that cannot represent arbitarary objects. For
+libraries that support it, such as the Ruby standard library's JSON
+module, ensure that the parser is configured to disable
+deserialization of arbitrary objects.
+
+The following example calls the Marshal.load
, JSON.load
, and
+YAML.load
methods on data from an HTTP request. Since these methods
+are capable of deserializing to arbitrary objects, this is inherently unsafe.
+
+Using JSON.parse
and YAML.safe_load
instead, as in the
+following example, removes the vulnerability. Note that there is no safe way to
+deserialize untrusted data using Marshal
.
+
+Directly incorporating user input into a URL redirect request without validating the input +can facilitate phishing attacks. In these attacks, unsuspecting users can be redirected to a +malicious site that looks very similar to the real site they intend to visit, but which is +controlled by the attacker. +
++To guard against untrusted URL redirection, it is advisable to avoid putting user input +directly into a redirect URL. Instead, maintain a list of authorized +redirects on the server; then choose from that list based on the user input provided. +
++The following example shows an HTTP request parameter being used directly in a URL redirect +without validating the input, which facilitates phishing attacks: +
+ ++One way to remedy the problem is to validate the user input against a known fixed string +before doing the redirection: +
+ ++When creating a file, POSIX systems allow permissions to be specified +for owner, group and others separately. Permissions should be kept as +strict as possible, preventing access to the files contents by other users. +
+ ++Restrict the file permissions of files to prevent any but the owner being able to read or write to that file +
++Including unencrypted hard-coded inbound or outbound authentication credentials within source code +or configuration files is dangerous because the credentials may be easily discovered. +
++Source or configuration files containing hard-coded credentials may be visible to an attacker. For +example, the source code may be open source, or it may be leaked or accidentally revealed. +
++For inbound authentication, hard-coded credentials may allow unauthorized access to the system. This +is particularly problematic if the credential is hard-coded in the source code, because it cannot be +disabled easily. For outbound authentication, the hard-coded credentials may provide an attacker with +privileged information or unauthorized access to some other system. +
+ ++Remove hard-coded credentials, such as user names, passwords and certificates, from source code, +placing them in configuration files or other data stores if necessary. If possible, store +configuration files including credential data separately from the source code, in a secure location +with restricted access. +
+ ++For outbound authentication details, consider encrypting the credentials or the enclosing data +stores or configuration files, and using permissions to restrict access. +
+ +
+For inbound authentication details, consider hashing passwords using standard library functions
+where possible. For example, OpenSSL::KDF.pbkdf2_hmac
.
+
+The following examples shows different types of inbound and outbound authentication. +
+ +
+In the first case, RackAppBad
, we accept a password from a remote user, and compare
+it against a plaintext string literal. If an attacker acquires the source code they can observe
+the password, and can log in to the system. Furthermore, if such an intrusion was discovered, the
+application would need to be rewritten and redeployed in order to change the password.
+
+In the second case, RackAppGood
, the password is compared to a hashed and salted
+password stored in a configuration file, using OpenSSL::KDF.pbkdf2_hmac
.
+In this case, access to the source code or the assembly would not reveal the password to an
+attacker. Even access to the configuration file containing the password hash and salt would be of
+little value to an attacker, as it is usually extremely difficult to reverse engineer the password
+from the hash and salt. In a real application care should be taken to make the string comparison
+of the hashed input against the hashed password take close to constant time, as this will make
+timing attacks more difficult.
+