-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathIncompleteHostnameRegExp.ql
More file actions
60 lines (54 loc) · 2.18 KB
/
IncompleteHostnameRegExp.ql
File metadata and controls
60 lines (54 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
/**
* @name Incomplete regular expression for hostnames
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @precision high
* @id js/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
/**
* A taint tracking configuration for incomplete hostname regular expressions sources.
*/
class Configuration extends TaintTracking::Configuration {
Configuration() { this = "IncompleteHostnameRegExpTracking" }
override predicate isSource(DataFlow::Node source) {
isIncompleteHostNameRegExpPattern(source.getStringValue(), _)
}
override predicate isSink(DataFlow::Node sink) { isInterpretedAsRegExp(sink) }
}
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart = pattern
.regexpCapture("(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" + ".*", 1)
}
from DataFlow::Node re, string pattern, string hostPart, string kind, DataFlow::Node aux
where
(
re.asExpr().(RegExpLiteral).getValue() = pattern and
kind = "regular expression" and
aux = re
or
exists(Configuration cfg |
cfg.hasFlow(re, aux) and
re.mayHaveStringValue(pattern) and
kind = "string, which is used as a regular expression $@,"
)
) and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
select re,
"This " + kind + " has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected.", aux, "here"