-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathIncompleteHostnameRegExp.ql
More file actions
57 lines (51 loc) · 2 KB
/
IncompleteHostnameRegExp.ql
File metadata and controls
57 lines (51 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/**
* @name Incomplete regular expression for hostnames
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @precision high
* @id js/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
override predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.getStringValue(), _)
}
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
}
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart = pattern
.regexpCapture("(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
}
from Expr e, string pattern, string hostPart
where
(
e.(RegExpLiteral).getValue() = pattern
or
exists(Configuration cfg |
cfg.hasFlow(e.flow(), _) and
e.mayHaveStringValue(pattern)
)
) and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.]" + RegExpPatterns::commonTLD() + ".*[(][?]:.*[)].*")
select e,
"This regular expression has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected."