-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathIncompleteHostnameRegExp.ql
More file actions
64 lines (54 loc) · 2.03 KB
/
IncompleteHostnameRegExp.ql
File metadata and controls
64 lines (54 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/**
* @name Incomplete regular expression for hostnames
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @precision high
* @id js/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
override
predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
}
override
predicate isSink(DataFlow::Node sink) {
isInterpretedAsRegExp(sink)
}
}
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart = pattern.regexpCapture(
"(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" +
".*", 1)
}
from Expr e, string pattern, string hostPart
where
(
e.(RegExpLiteral).getValue() = pattern or
exists (Configuration cfg |
cfg.hasFlow(e.flow(), _) and
e.mayHaveStringValue(pattern)
)
) and
isIncompleteHostNameRegExpPattern(pattern, hostPart)
and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
select e, "This regular expression has an unescaped '.' before '" + hostPart + "', so it might match more hosts than expected."