-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Rust: Add regular expression injection query #18946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
494f914
179ea04
344fea2
0e965f7
5c83644
b48fd99
1e0b78e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| # Models for the `regex` crate. | ||
| extensions: | ||
| - addsTo: | ||
| pack: codeql/rust-all | ||
| extensible: summaryModel | ||
| data: | ||
| - ["repo:https://github.com/rust-lang/regex:regex", "crate::escape", "Argument[0].Reference", "ReturnValue", "taint", "manual"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /** | ||
| * Provides classes and predicates to reason about regular expression injection | ||
| * vulnerabilities. | ||
| */ | ||
|
|
||
| private import codeql.util.Unit | ||
| private import rust | ||
| private import codeql.rust.dataflow.DataFlow | ||
| private import codeql.rust.controlflow.CfgNodes | ||
| private import codeql.rust.dataflow.FlowSink | ||
|
|
||
| /** | ||
| * A data flow sink for regular expression injection vulnerabilities. | ||
| */ | ||
| abstract class RegexInjectionSink extends DataFlow::Node { } | ||
|
|
||
| /** | ||
| * A barrier for regular expression injection vulnerabilities. | ||
| */ | ||
| abstract class RegexInjectionBarrier extends DataFlow::Node { } | ||
|
|
||
| /** A sink for `a` in `Regex::new(a)` when `a` is not a literal. */ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any benefit to defining this sink in QL rather than models-as-data?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is to not consider cases where
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Note that we do not currently do this in the cleartext logging query, which has far more sinks the vast majority of which just take a literal. If you think it makes a difference we might consider doing it there as well.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was told it could make a difference. But it probably doesn't matter much and let's double check before we spend time rewriting a bunch of stuff. |
||
| private class NewRegexInjectionSink extends RegexInjectionSink { | ||
| NewRegexInjectionSink() { | ||
| exists(CallExprCfgNode call, PathExpr path | | ||
| path = call.getFunction().getExpr() and | ||
| path.getResolvedCrateOrigin() = "repo:https://github.com/rust-lang/regex:regex" and | ||
| path.getResolvedPath() = "<crate::regex::string::Regex>::new" and | ||
| this.asExpr() = call.getArgument(0) and | ||
| not this.asExpr() instanceof LiteralExprCfgNode | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| private class MadRegexInjectionSink extends RegexInjectionSink { | ||
| MadRegexInjectionSink() { sinkNode(this, "regex-use") } | ||
| } | ||
|
|
||
| /** | ||
| * A unit class for adding additional flow steps. | ||
| */ | ||
| class RegexInjectionAdditionalFlowStep extends Unit { | ||
| /** | ||
| * Holds if the step from `node1` to `node2` should be considered a flow | ||
| * step for paths related to regular expression injection vulnerabilities. | ||
| */ | ||
| abstract predicate step(DataFlow::Node node1, DataFlow::Node node2); | ||
| } | ||
|
|
||
| /** | ||
| * An escape barrier for regular expression injection vulnerabilities. | ||
| */ | ||
| private class RegexInjectionDefaultBarrier extends RegexInjectionBarrier { | ||
| RegexInjectionDefaultBarrier() { | ||
| // A barrier is any call to a function named `escape`, in particular this | ||
| // makes calls to `regex::escape` a barrier. | ||
| this.asExpr() | ||
| .getExpr() | ||
| .(CallExpr) | ||
| .getFunction() | ||
| .(PathExpr) | ||
| .getPath() | ||
| .getPart() | ||
| .getNameRef() | ||
| .getText() = "escape" | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| /** | ||
| * Provides a taint-tracking configuration for detecting regular expression | ||
| * injection vulnerabilities. | ||
| */ | ||
geoffw0 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| private import rust | ||
| private import codeql.rust.dataflow.DataFlow | ||
| private import codeql.rust.dataflow.TaintTracking | ||
| private import codeql.rust.dataflow.FlowSource | ||
| private import codeql.rust.Concepts | ||
| private import codeql.rust.security.regex.RegexInjectionExtensions | ||
|
|
||
| /** | ||
| * A taint configuration for detecting regular expression injection vulnerabilities. | ||
| */ | ||
| module RegexInjectionConfig implements DataFlow::ConfigSig { | ||
| predicate isSource(DataFlow::Node source) { source instanceof ThreatModelSource } | ||
geoffw0 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink } | ||
|
|
||
| predicate isBarrier(DataFlow::Node barrier) { barrier instanceof RegexInjectionBarrier } | ||
|
|
||
| predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { | ||
| any(RegexInjectionAdditionalFlowStep s).step(nodeFrom, nodeTo) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Detect taint flow of tainted data that reaches a regular expression sink. | ||
| */ | ||
| module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>; | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,56 @@ | ||||||
| <!DOCTYPE qhelp PUBLIC | ||||||
| "-//Semmle//qhelp//EN" | ||||||
| "qhelp.dtd"> | ||||||
| <qhelp> | ||||||
|
|
||||||
| <overview> | ||||||
| <p> | ||||||
| Constructing a regular expression with unsanitized user input can be dangerous. | ||||||
| A malicious user may be able to modify the meaning of the expression causing it | ||||||
|
||||||
| A malicious user may be able to modify the meaning of the expression causing it | |
| A malicious user may be able to modify the meaning of the expression, causing it |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can possibly simplify here
| to match unexpected strings and to construct large regular expressions by using | |
| to match unexpected strings and construct large regular expressions by using |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo / simplifying
| to limit the pattern size such that it is no larger than necessary. | |
| to limit the pattern size so that it is no larger than necessary. |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo
| The following example construct a regular expressions from the user input | |
| The following example constructs a regular expressions from the user input |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great example! I've added the ready-for-doc-review tag so this PR to attract a member of the docs team for further review of the .qhelp.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding a comma to separate the clauses
| If user input is used to construct a regular expression it should be escaped | |
| If user input is used to construct a regular expression, it should be escaped |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps?
| first. This ensures that the user cannot insert characters that have special | |
| first. This ensures that malicious users cannot insert characters that have special |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Items in the References section need a full stop
| <code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a> | |
| <code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| /** | ||
| * @name Regular expression injection | ||
| * @description | ||
|
||
| * @kind path-problem | ||
| * @problem.severity error | ||
| * @security-severity 7.5 | ||
| * @precision high | ||
| * @id rust/regex-injection | ||
| * @tags security | ||
| * external/cwe/cwe-730 | ||
| * external/cwe/cwe-400 | ||
geoffw0 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| */ | ||
|
|
||
| private import codeql.rust.security.regex.RegexInjectionQuery | ||
| private import RegexInjectionFlow::PathGraph | ||
|
|
||
| from RegexInjectionFlow::PathNode sourceNode, RegexInjectionFlow::PathNode sinkNode | ||
| where RegexInjectionFlow::flowPath(sourceNode, sinkNode) | ||
| select sinkNode.getNode(), sourceNode, sinkNode, | ||
| "This regular expression is constructed from a $@.", sourceNode.getNode(), "user-provided value" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| use regex::Regex; | ||
|
|
||
| fn get_value<'h>(key: &str, property: &'h str) -> Option<&'h str> { | ||
| // BAD: User provided `key` is interpolated into the regular expression. | ||
| let pattern = format!(r"^property:{key}=(.*)$"); | ||
| let re = Regex::new(&pattern).unwrap(); | ||
| re.captures(property)?.get(1).map(|m| m.as_str()) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| use regex::{escape, Regex}; | ||
|
|
||
| fn get_value<'h>(key: &str, property: &'h str) -> option<&'h str> { | ||
| // GOOD: User input is escaped before being used in the regular expression. | ||
| let escaped_key = escape(key); | ||
| let pattern = format!(r"^property:{escaped_key}=(.*)$"); | ||
| let re = regex::new(&pattern).unwrap(); | ||
| re.captures(property)?.get(1).map(|m| m.as_str()) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| #select | ||
| | main.rs:6:25:6:30 | ®ex | main.rs:4:20:4:32 | ...::var | main.rs:6:25:6:30 | ®ex | This regular expression is constructed from a $@. | main.rs:4:20:4:32 | ...::var | user-provided value | | ||
| edges | ||
| | main.rs:4:9:4:16 | username | main.rs:5:25:5:44 | MacroExpr | provenance | | | ||
| | main.rs:4:20:4:32 | ...::var | main.rs:4:20:4:40 | ...::var(...) [Ok] | provenance | Src:MaD:60 | | ||
| | main.rs:4:20:4:40 | ...::var(...) [Ok] | main.rs:4:20:4:66 | ... .unwrap_or(...) | provenance | MaD:1573 | | ||
| | main.rs:4:20:4:66 | ... .unwrap_or(...) | main.rs:4:9:4:16 | username | provenance | | | ||
| | main.rs:5:9:5:13 | regex | main.rs:6:26:6:30 | regex | provenance | | | ||
| | main.rs:5:17:5:45 | res | main.rs:5:25:5:44 | { ... } | provenance | | | ||
| | main.rs:5:25:5:44 | ...::format(...) | main.rs:5:17:5:45 | res | provenance | | | ||
| | main.rs:5:25:5:44 | ...::must_use(...) | main.rs:5:9:5:13 | regex | provenance | | | ||
| | main.rs:5:25:5:44 | MacroExpr | main.rs:5:25:5:44 | ...::format(...) | provenance | MaD:64 | | ||
| | main.rs:5:25:5:44 | { ... } | main.rs:5:25:5:44 | ...::must_use(...) | provenance | MaD:2996 | | ||
| | main.rs:6:26:6:30 | regex | main.rs:6:25:6:30 | ®ex | provenance | | | ||
| nodes | ||
| | main.rs:4:9:4:16 | username | semmle.label | username | | ||
| | main.rs:4:20:4:32 | ...::var | semmle.label | ...::var | | ||
| | main.rs:4:20:4:40 | ...::var(...) [Ok] | semmle.label | ...::var(...) [Ok] | | ||
| | main.rs:4:20:4:66 | ... .unwrap_or(...) | semmle.label | ... .unwrap_or(...) | | ||
| | main.rs:5:9:5:13 | regex | semmle.label | regex | | ||
| | main.rs:5:17:5:45 | res | semmle.label | res | | ||
| | main.rs:5:25:5:44 | ...::format(...) | semmle.label | ...::format(...) | | ||
| | main.rs:5:25:5:44 | ...::must_use(...) | semmle.label | ...::must_use(...) | | ||
| | main.rs:5:25:5:44 | MacroExpr | semmle.label | MacroExpr | | ||
| | main.rs:5:25:5:44 | { ... } | semmle.label | { ... } | | ||
| | main.rs:6:25:6:30 | ®ex | semmle.label | ®ex | | ||
| | main.rs:6:26:6:30 | regex | semmle.label | regex | | ||
| subpaths |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| query: queries/security/CWE-730/RegexInjection.ql | ||
| postprocess: utils/test/InlineExpectationsTestQuery.ql |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| | main.rs:6:25:6:30 | ®ex | | ||
| | main.rs:14:25:14:30 | ®ex | |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| private import codeql.rust.dataflow.DataFlow | ||
| private import codeql.rust.security.regex.RegexInjectionExtensions | ||
|
|
||
| query predicate regexInjectionSink(DataFlow::Node node) { node instanceof RegexInjectionSink } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| use regex::Regex; | ||
|
|
||
| fn simple_bad(hay: &str) -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); // $ Source=env | ||
| let regex = format!("foo{}bar", username); | ||
| let re = Regex::new(®ex).unwrap(); // $ Alert[rust/regex-injection]=env | ||
| Some(re.is_match(hay)) | ||
| } | ||
|
|
||
| fn simple_good(hay: &str) -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); | ||
| let escaped = regex::escape(&username); | ||
| let regex = format!("foo{}bar", escaped); | ||
| let re = Regex::new(®ex).unwrap(); | ||
| Some(re.is_match(hay)) | ||
| } | ||
|
|
||
| fn not_a_sink_literal() -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); | ||
| let re = Regex::new("literal string").unwrap(); | ||
| Some(re.is_match(&username)) | ||
| } | ||
|
|
||
| fn not_a_sink_raw_literal() -> Option<bool> { | ||
| let username = std::env::var("USER").unwrap_or("".to_string()); | ||
| let re = Regex::new(r"literal string").unwrap(); | ||
| Some(re.is_match(&username)) | ||
| } | ||
|
|
||
| fn main() { | ||
| let hay = "a string"; | ||
| simple_bad(hay); | ||
| simple_good(hay); | ||
| not_a_sink_literal(); | ||
| not_a_sink_raw_literal(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| qltest_cargo_check: true | ||
| qltest_dependencies: | ||
| - regex = { version = "1.11.1" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This model is accurate, but I mostly added it to make sure that the barrier actually worked.