Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/ql/integration-tests/hello-project/summary.expected
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 2 |
| Macro calls - total | 2 |
| Macro calls - unresolved | 0 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 2 |
| Macro calls - total | 2 |
| Macro calls - unresolved | 0 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 2 |
| Macro calls - total | 2 |
| Macro calls - unresolved | 0 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
7 changes: 7 additions & 0 deletions rust/ql/lib/codeql/rust/frameworks/regex.model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Models for the `regex` crate.
extensions:
- addsTo:
pack: codeql/rust-all
extensible: summaryModel
data:
- ["repo:https://github.com/rust-lang/regex:regex", "crate::escape", "Argument[0].Reference", "ReturnValue", "taint", "manual"]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This model is accurate, but I mostly added it to make sure that the barrier actually worked.

Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Provides classes and predicates to reason about regular expression injection
* vulnerabilities.
*/

private import codeql.util.Unit
private import rust
private import codeql.rust.dataflow.DataFlow
private import codeql.rust.controlflow.CfgNodes
private import codeql.rust.dataflow.FlowSink

/**
* A data flow sink for regular expression injection vulnerabilities.
*/
abstract class RegexInjectionSink extends DataFlow::Node { }

/**
* A barrier for regular expression injection vulnerabilities.
*/
abstract class RegexInjectionBarrier extends DataFlow::Node { }

/** A sink for `a` in `Regex::new(a)` when `a` is not a literal. */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any benefit to defining this sink in QL rather than models-as-data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to not consider cases where Regex::new is directly applied to a literal as sinks. I think most real-world uses of Regex::new is of that form, so it's simply as an optimization to not consider these clearly safe cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. Note that we do not currently do this in the cleartext logging query, which has far more sinks the vast majority of which just take a literal. If you think it makes a difference we might consider doing it there as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was told it could make a difference. But it probably doesn't matter much and let's double check before we spend time rewriting a bunch of stuff.

private class NewRegexInjectionSink extends RegexInjectionSink {
NewRegexInjectionSink() {
exists(CallExprCfgNode call, PathExpr path |
path = call.getFunction().getExpr() and
path.getResolvedCrateOrigin() = "repo:https://github.com/rust-lang/regex:regex" and
path.getResolvedPath() = "<crate::regex::string::Regex>::new" and
this.asExpr() = call.getArgument(0) and
not this.asExpr() instanceof LiteralExprCfgNode
)
}
}

private class MadRegexInjectionSink extends RegexInjectionSink {
MadRegexInjectionSink() { sinkNode(this, "regex-use") }
}

/**
* A unit class for adding additional flow steps.
*/
class RegexInjectionAdditionalFlowStep extends Unit {
/**
* Holds if the step from `node1` to `node2` should be considered a flow
* step for paths related to regular expression injection vulnerabilities.
*/
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
}

/**
* An escape barrier for regular expression injection vulnerabilities.
*/
private class RegexInjectionDefaultBarrier extends RegexInjectionBarrier {
RegexInjectionDefaultBarrier() {
// A barrier is any call to a function named `escape`, in particular this
// makes calls to `regex::escape` a barrier.
this.asExpr()
.getExpr()
.(CallExpr)
.getFunction()
.(PathExpr)
.getPath()
.getPart()
.getNameRef()
.getText() = "escape"
}
}
31 changes: 31 additions & 0 deletions rust/ql/lib/codeql/rust/security/regex/RegexInjectionQuery.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/**
* Provides a taint-tracking configuration for detecting regular expression
* injection vulnerabilities.
*/

private import rust
private import codeql.rust.dataflow.DataFlow
private import codeql.rust.dataflow.TaintTracking
private import codeql.rust.dataflow.FlowSource
private import codeql.rust.Concepts
private import codeql.rust.security.regex.RegexInjectionExtensions

/**
* A taint configuration for detecting regular expression injection vulnerabilities.
*/
module RegexInjectionConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof ThreatModelSource }

predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }

predicate isBarrier(DataFlow::Node barrier) { barrier instanceof RegexInjectionBarrier }

predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
any(RegexInjectionAdditionalFlowStep s).step(nodeFrom, nodeTo)
}
}

/**
* Detect taint flow of tainted data that reaches a regular expression sink.
*/
module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>;
56 changes: 56 additions & 0 deletions rust/ql/src/queries/security/CWE-730/RegexInjection.qhelp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<!DOCTYPE qhelp PUBLIC
"-//Semmle//qhelp//EN"
"qhelp.dtd">
<qhelp>

<overview>
<p>
Constructing a regular expression with unsanitized user input can be dangerous.
A malicious user may be able to modify the meaning of the expression causing it
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor - separating the clauses with a comma

Suggested change
A malicious user may be able to modify the meaning of the expression causing it
A malicious user may be able to modify the meaning of the expression, causing it

to match unexpected strings and to construct large regular expressions by using
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can possibly simplify here

Suggested change
to match unexpected strings and to construct large regular expressions by using
to match unexpected strings and construct large regular expressions by using

counted repetitions.
</p>
</overview>

<recommendation>
<p>
Before embedding user input into a regular expression, escape the input string
using a function such as <a
href="https://docs.rs/regex/latest/regex/fn.escape.html">regex::escape</a> to
escape meta-characters that have special meaning.
</p>
<p>
If purposefully supporting user supplied regular expressions, then use <a
href="https://docs.rs/regex/latest/regex/struct.RegexBuilder.html#method.size_limit">RegexBuilder::size_limit</a>
to limit the pattern size such that it is no larger than necessary.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo / simplifying

Suggested change
to limit the pattern size such that it is no larger than necessary.
to limit the pattern size so that it is no larger than necessary.

</p>
</recommendation>

<example>
<p>
The following example construct a regular expressions from the user input
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo

Suggested change
The following example construct a regular expressions from the user input
The following example constructs a regular expressions from the user input

<code>key</code> without escaping it first.
</p>

<sample src="RegexInjectionBad.rs" />

<p>
The regular expression is intended to match strings starting with
<code>"property"</code> such as <code>"property:foo=bar"</code>. However, a
malicious user might inject the regular expression <code>".*^|key"</code> and
unexpectedly cause strings such as <code>"key=secret"</code> to match.
</p>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great example! I've added the ready-for-doc-review tag so this PR to attract a member of the docs team for further review of the .qhelp.

<p>
If user input is used to construct a regular expression it should be escaped
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a comma to separate the clauses

Suggested change
If user input is used to construct a regular expression it should be escaped
If user input is used to construct a regular expression, it should be escaped

first. This ensures that the user cannot insert characters that have special
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps?

Suggested change
first. This ensures that the user cannot insert characters that have special
first. This ensures that malicious users cannot insert characters that have special

meanings in regular expressions.
</p>
<sample src="RegexInjectionGood.rs" />
</example>

<references>
<li>
<code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Items in the References section need a full stop

Suggested change
<code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>
<code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>.

</li>
</references>
</qhelp>
20 changes: 20 additions & 0 deletions rust/ql/src/queries/security/CWE-730/RegexInjection.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/**
* @name Regular expression injection
* @description
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The description, which is mandatory, is missing. For guidance about writing query descriptions, see https://github.com/github/codeql/blob/main/docs/query-metadata-style-guide.md#query-descriptions-description.

* @kind path-problem
* @problem.severity error
* @security-severity 7.5
* @precision high
* @id rust/regex-injection
* @tags security
* external/cwe/cwe-730
* external/cwe/cwe-400
*/

private import codeql.rust.security.regex.RegexInjectionQuery
private import RegexInjectionFlow::PathGraph

from RegexInjectionFlow::PathNode sourceNode, RegexInjectionFlow::PathNode sinkNode
where RegexInjectionFlow::flowPath(sourceNode, sinkNode)
select sinkNode.getNode(), sourceNode, sinkNode,
"This regular expression is constructed from a $@.", sourceNode.getNode(), "user-provided value"
8 changes: 8 additions & 0 deletions rust/ql/src/queries/security/CWE-730/RegexInjectionBad.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use regex::Regex;

fn get_value<'h>(key: &str, property: &'h str) -> Option<&'h str> {
// BAD: User provided `key` is interpolated into the regular expression.
let pattern = format!(r"^property:{key}=(.*)$");
let re = Regex::new(&pattern).unwrap();
re.captures(property)?.get(1).map(|m| m.as_str())
}
9 changes: 9 additions & 0 deletions rust/ql/src/queries/security/CWE-730/RegexInjectionGood.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use regex::{escape, Regex};

fn get_value<'h>(key: &str, property: &'h str) -> option<&'h str> {
// GOOD: User input is escaped before being used in the regular expression.
let escaped_key = escape(key);
let pattern = format!(r"^property:{escaped_key}=(.*)$");
let re = regex::new(&pattern).unwrap();
re.captures(property)?.get(1).map(|m| m.as_str())
}
Original file line number Diff line number Diff line change
Expand Up @@ -2493,6 +2493,7 @@ readStep
| file://:0:0:0:0 | [summary param] 0 in lang:std::_::crate::sys_common::ignore_notfound | Err | file://:0:0:0:0 | [summary] read: Argument[0].Field[crate::result::Result::Err(0)] in lang:std::_::crate::sys_common::ignore_notfound |
| file://:0:0:0:0 | [summary param] 0 in lang:std::_::crate::thread::current::try_with_current | function return | file://:0:0:0:0 | [summary] read: Argument[0].ReturnValue in lang:std::_::crate::thread::current::try_with_current |
| file://:0:0:0:0 | [summary param] 0 in lang:std::_::crate::thread::with_current_name | function return | file://:0:0:0:0 | [summary] read: Argument[0].ReturnValue in lang:std::_::crate::thread::with_current_name |
| file://:0:0:0:0 | [summary param] 0 in repo:https://github.com/rust-lang/regex:regex::_::crate::escape | &ref | file://:0:0:0:0 | [summary] read: Argument[0].Reference in repo:https://github.com/rust-lang/regex:regex::_::crate::escape |
| file://:0:0:0:0 | [summary param] 1 in lang:alloc::_::<crate::vec::into_iter::IntoIter as crate::iter::traits::iterator::Iterator>::fold | function return | file://:0:0:0:0 | [summary] read: Argument[1].ReturnValue in lang:alloc::_::<crate::vec::into_iter::IntoIter as crate::iter::traits::iterator::Iterator>::fold |
| file://:0:0:0:0 | [summary param] 1 in lang:alloc::_::crate::collections::btree::mem::replace | function return | file://:0:0:0:0 | [summary] read: Argument[1].ReturnValue in lang:alloc::_::crate::collections::btree::mem::replace |
| file://:0:0:0:0 | [summary param] 1 in lang:alloc::_::crate::collections::btree::mem::take_mut | function return | file://:0:0:0:0 | [summary] read: Argument[1].ReturnValue in lang:alloc::_::crate::collections::btree::mem::take_mut |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
| Macro calls - resolved | 8 |
| Macro calls - total | 9 |
| Macro calls - unresolved | 1 |
| Taint edges - number of edges | 1670 |
| Taint edges - number of edges | 1671 |
| Taint reach - nodes tainted | 0 |
| Taint reach - per million nodes | 0 |
| Taint sinks - cryptographic operations | 0 |
Expand Down
28 changes: 28 additions & 0 deletions rust/ql/test/query-tests/security/CWE-730/RegexInjection.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#select
| main.rs:6:25:6:30 | &regex | main.rs:4:20:4:32 | ...::var | main.rs:6:25:6:30 | &regex | This regular expression is constructed from a $@. | main.rs:4:20:4:32 | ...::var | user-provided value |
edges
| main.rs:4:9:4:16 | username | main.rs:5:25:5:44 | MacroExpr | provenance | |
| main.rs:4:20:4:32 | ...::var | main.rs:4:20:4:40 | ...::var(...) [Ok] | provenance | Src:MaD:60 |
| main.rs:4:20:4:40 | ...::var(...) [Ok] | main.rs:4:20:4:66 | ... .unwrap_or(...) | provenance | MaD:1573 |
| main.rs:4:20:4:66 | ... .unwrap_or(...) | main.rs:4:9:4:16 | username | provenance | |
| main.rs:5:9:5:13 | regex | main.rs:6:26:6:30 | regex | provenance | |
| main.rs:5:17:5:45 | res | main.rs:5:25:5:44 | { ... } | provenance | |
| main.rs:5:25:5:44 | ...::format(...) | main.rs:5:17:5:45 | res | provenance | |
| main.rs:5:25:5:44 | ...::must_use(...) | main.rs:5:9:5:13 | regex | provenance | |
| main.rs:5:25:5:44 | MacroExpr | main.rs:5:25:5:44 | ...::format(...) | provenance | MaD:64 |
| main.rs:5:25:5:44 | { ... } | main.rs:5:25:5:44 | ...::must_use(...) | provenance | MaD:2996 |
| main.rs:6:26:6:30 | regex | main.rs:6:25:6:30 | &regex | provenance | |
nodes
| main.rs:4:9:4:16 | username | semmle.label | username |
| main.rs:4:20:4:32 | ...::var | semmle.label | ...::var |
| main.rs:4:20:4:40 | ...::var(...) [Ok] | semmle.label | ...::var(...) [Ok] |
| main.rs:4:20:4:66 | ... .unwrap_or(...) | semmle.label | ... .unwrap_or(...) |
| main.rs:5:9:5:13 | regex | semmle.label | regex |
| main.rs:5:17:5:45 | res | semmle.label | res |
| main.rs:5:25:5:44 | ...::format(...) | semmle.label | ...::format(...) |
| main.rs:5:25:5:44 | ...::must_use(...) | semmle.label | ...::must_use(...) |
| main.rs:5:25:5:44 | MacroExpr | semmle.label | MacroExpr |
| main.rs:5:25:5:44 | { ... } | semmle.label | { ... } |
| main.rs:6:25:6:30 | &regex | semmle.label | &regex |
| main.rs:6:26:6:30 | regex | semmle.label | regex |
subpaths
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
query: queries/security/CWE-730/RegexInjection.ql
postprocess: utils/test/InlineExpectationsTestQuery.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
| main.rs:6:25:6:30 | &regex |
| main.rs:14:25:14:30 | &regex |
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
private import codeql.rust.dataflow.DataFlow
private import codeql.rust.security.regex.RegexInjectionExtensions

query predicate regexInjectionSink(DataFlow::Node node) { node instanceof RegexInjectionSink }
36 changes: 36 additions & 0 deletions rust/ql/test/query-tests/security/CWE-730/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use regex::Regex;

fn simple_bad(hay: &str) -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string()); // $ Source=env
let regex = format!("foo{}bar", username);
let re = Regex::new(&regex).unwrap(); // $ Alert[rust/regex-injection]=env
Some(re.is_match(hay))
}

fn simple_good(hay: &str) -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string());
let escaped = regex::escape(&username);
let regex = format!("foo{}bar", escaped);
let re = Regex::new(&regex).unwrap();
Some(re.is_match(hay))
}

fn not_a_sink_literal() -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string());
let re = Regex::new("literal string").unwrap();
Some(re.is_match(&username))
}

fn not_a_sink_raw_literal() -> Option<bool> {
let username = std::env::var("USER").unwrap_or("".to_string());
let re = Regex::new(r"literal string").unwrap();
Some(re.is_match(&username))
}

fn main() {
let hay = "a string";
simple_bad(hay);
simple_good(hay);
not_a_sink_literal();
not_a_sink_raw_literal();
}
3 changes: 3 additions & 0 deletions rust/ql/test/query-tests/security/CWE-730/options.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
qltest_cargo_check: true
qltest_dependencies:
- regex = { version = "1.11.1" }