-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathIncompleteHostnameRegExp.ql
More file actions
49 lines (46 loc) · 1.84 KB
/
IncompleteHostnameRegExp.ql
File metadata and controls
49 lines (46 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/**
* @name Incomplete regular expression for hostnames
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
* @kind problem
* @problem.severity warning
* @precision high
* @id js/incomplete-hostname-regexp
* @tags correctness
* security
* external/cwe/cwe-20
*/
import javascript
import semmle.javascript.CharacterEscapes
/**
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
*/
bindingset[pattern]
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
hostPart = pattern
.regexpCapture("(?i).*" +
// an unescaped single `.`
"(?<!\\\\)[.]" +
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
"([():|?a-z0-9-]+(\\\\)?[.]" + RegExpPatterns::commonTLD() + ")" + ".*", 1)
}
from RegExpPatternSource re, string pattern, string hostPart, string kind, DataFlow::Node aux
where
pattern = re.getPattern() and
isIncompleteHostNameRegExpPattern(pattern, hostPart) and
(
if re.getAParse() != re
then (
kind = "string, which is used as a regular expression $@," and
aux = re.getAParse()
) else (
kind = "regular expression" and aux = re
)
) and
// ignore patterns with capture groups after the TLD
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*") and
// avoid double reporting
not CharacterEscapes::hasALikelyRegExpPatternMistake(re)
select re,
"This " + kind + " has an unescaped '.' before '" + hostPart +
"', so it might match more hosts than expected.", aux, "here"