-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathUnicodeDoS.ql
More file actions
122 lines (110 loc) · 4.56 KB
/
UnicodeDoS.ql
File metadata and controls
122 lines (110 loc) · 4.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/**
* @name Denial of Service using Unicode Characters
* @description A remote user-controlled data can reach a costly Unicode normalization with either form NFKC or NFKD. On Windows OS, with an attack such as the One Million Unicode Characters, this could lead to a denial of service. And, with the use of special Unicode characters, like U+2100 (℀) or U+2105 (℅), the payload size could be tripled.
* @kind path-problem
* @id py/unicode-dos
* @precision high
* @problem.severity error
* @tags security
* experimental
* external/cwe/cwe-770
*/
import python
import semmle.python.ApiGraphs
import semmle.python.Concepts
import semmle.python.dataflow.new.TaintTracking
import semmle.python.dataflow.new.internal.DataFlowPublic
import semmle.python.dataflow.new.RemoteFlowSources
// The Unicode compatibility normalization calls from unicodedata, unidecode, pyunormalize
// and textnorm modules. The use of argIdx is to constraint the argument being normalized.
class UnicodeCompatibilityNormalize extends API::CallNode {
int argIdx;
UnicodeCompatibilityNormalize() {
(
this = API::moduleImport("unicodedata").getMember("normalize").getACall() and
this.getParameter(0).getAValueReachingSink().asExpr().(StringLiteral).getText() in [
"NFKC", "NFKD"
]
or
this = API::moduleImport("pyunormalize").getMember("normalize").getACall() and
this.getParameter(0).getAValueReachingSink().asExpr().(StringLiteral).getText() in [
"NFKC", "NFKD"
]
) and
argIdx = 1
or
(
this = API::moduleImport("textnorm").getMember("normalize_unicode").getACall() and
this.getParameter(1).getAValueReachingSink().asExpr().(StringLiteral).getText() in [
"NFKC", "NFKD"
]
or
this = API::moduleImport("unidecode").getMember("unidecode").getACall()
or
this = API::moduleImport("pyunormalize").getMember(["NFKC", "NFKD"]).getACall()
) and
argIdx = 0
}
DataFlow::Node getPathArg() { result = this.getArg(argIdx) }
}
predicate underAValue(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
exists(CompareNode cn | cn = g |
exists(API::CallNode lenCall, Cmpop op, Node n |
lenCall = n.getALocalSource() and
(
// arg <= LIMIT OR arg < LIMIT
(op instanceof LtE or op instanceof Lt) and
branch = true and
cn.operands(n.asCfgNode(), op, _)
or
// LIMIT >= arg OR LIMIT > arg
(op instanceof GtE or op instanceof Gt) and
branch = true and
cn.operands(_, op, n.asCfgNode())
or
// not arg >= LIMIT OR not arg > LIMIT
(op instanceof GtE or op instanceof Gt) and
branch = false and
cn.operands(n.asCfgNode(), op, _)
or
// not LIMIT <= arg OR not LIMIT < arg
(op instanceof LtE or op instanceof Lt) and
branch = false and
cn.operands(_, op, n.asCfgNode())
)
|
lenCall = API::builtin("len").getACall() and
node = lenCall.getArg(0).asCfgNode()
) //and
//not cn.getLocation().getFile().inStdlib()
)
}
private module UnicodeDoSConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof RemoteFlowSource }
predicate isBarrier(DataFlow::Node sanitizer) {
// underAValue is a check to ensure that the length of the user-provided value is limited to a certain amount
sanitizer = DataFlow::BarrierGuard<underAValue/3>::getABarrierNode()
}
predicate isSink(DataFlow::Node sink) {
// Any call to the Unicode compatibility normalization is a costly operation
sink = any(UnicodeCompatibilityNormalize ucn).getPathArg()
or
// The call to secure_filename() from pallets/werkzeug uses the Unicode compatibility normalization
// under the hood, https://github.com/pallets/werkzeug/blob/d3dd65a27388fbd39d146caacf2563639ba622f0/src/werkzeug/utils.py#L218
sink = API::moduleImport("werkzeug").getMember("secure_filename").getACall().getArg(_)
or
sink =
API::moduleImport("werkzeug")
.getMember("utils")
.getMember("secure_filename")
.getACall()
.getArg(_)
}
predicate observeDiffInformedIncrementalMode() { any() }
}
module UnicodeDoSFlow = TaintTracking::Global<UnicodeDoSConfig>;
import UnicodeDoSFlow::PathGraph
from UnicodeDoSFlow::PathNode source, UnicodeDoSFlow::PathNode sink
where UnicodeDoSFlow::flowPath(source, sink)
select sink.getNode(), source, sink, "This $@ can reach a $@.", source.getNode(),
"user-provided value", sink.getNode(), "costly Unicode normalization operation"