-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathTarSlip.ql
More file actions
182 lines (150 loc) · 5.61 KB
/
TarSlip.ql
File metadata and controls
182 lines (150 loc) · 5.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/**
* @name Arbitrary file write during tarfile extraction
* @description Extracting files from a malicious tar archive without validating that the
* destination file path is within the destination directory can cause files outside
* the destination directory to be overwritten.
* @kind path-problem
* @id py/tarslip
* @problem.severity error
* @security-severity 7.5
* @precision medium
* @tags security
* external/cwe/cwe-022
*/
import python
import semmle.python.security.Paths
import semmle.python.dataflow.TaintTracking
import semmle.python.security.strings.Basic
/** A TaintKind to represent open tarfile objects. That is, the result of calling `tarfile.open(...)` */
class OpenTarFile extends TaintKind {
OpenTarFile() { this = "tarfile.open" }
override TaintKind getTaintOfMethodResult(string name) {
name = "getmember" and result instanceof TarFileInfo
or
name = "getmembers" and result.(SequenceKind).getItem() instanceof TarFileInfo
}
override ClassValue getType() { result = Value::named("tarfile.TarFile") }
override TaintKind getTaintForIteration() { result instanceof TarFileInfo }
}
/** The source of open tarfile objects. That is, any call to `tarfile.open(...)` */
class TarfileOpen extends TaintSource {
TarfileOpen() {
Value::named("tarfile.open").getACall() = this and
/*
* If argument refers to a string object, then it's a hardcoded path and
* this tarfile is safe.
*/
not this.(CallNode).getAnArg().pointsTo(any(StringValue str)) and
/* Ignore opens within the tarfile module itself */
not this.(ControlFlowNode).getLocation().getFile().getBaseName() = "tarfile.py"
}
override predicate isSourceOf(TaintKind kind) { kind instanceof OpenTarFile }
}
class TarFileInfo extends TaintKind {
TarFileInfo() { this = "tarfile.entry" }
override TaintKind getTaintOfMethodResult(string name) { name = "next" and result = this }
override TaintKind getTaintOfAttribute(string name) {
name = "name" and result instanceof TarFileInfo
}
}
/*
* For efficiency we don't want to track the flow of taint
* around the tarfile module.
*/
class ExcludeTarFilePy extends Sanitizer {
ExcludeTarFilePy() { this = "Tar sanitizer" }
override predicate sanitizingNode(TaintKind taint, ControlFlowNode node) {
node.getLocation().getFile().getBaseName() = "tarfile.py" and
(
taint instanceof OpenTarFile
or
taint instanceof TarFileInfo
or
taint.(SequenceKind).getItem() instanceof TarFileInfo
)
}
}
/* Any call to an extractall method */
class ExtractAllSink extends TaintSink {
ExtractAllSink() {
exists(CallNode call |
this = call.getFunction().(AttrNode).getObject("extractall") and
not exists(call.getAnArg())
)
}
override predicate sinks(TaintKind kind) { kind instanceof OpenTarFile }
}
/* Argument to extract method */
class ExtractSink extends TaintSink {
CallNode call;
ExtractSink() {
call.getFunction().(AttrNode).getName() = "extract" and
this = call.getArg(0)
}
override predicate sinks(TaintKind kind) { kind instanceof TarFileInfo }
}
/* Members argument to extract method */
class ExtractMembersSink extends TaintSink {
CallNode call;
ExtractMembersSink() {
call.getFunction().(AttrNode).getName() = "extractall" and
(this = call.getArg(0) or this = call.getArgByName("members"))
}
override predicate sinks(TaintKind kind) {
kind.(SequenceKind).getItem() instanceof TarFileInfo
or
kind instanceof OpenTarFile
}
}
class TarFileInfoSanitizer extends Sanitizer {
TarFileInfoSanitizer() { this = "TarInfo sanitizer" }
/* The test `if <path_sanitizing_test>:` clears taint on its `false` edge. */
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
taint instanceof TarFileInfo and
clears_taint_on_false_edge(test.getTest(), test.getSense())
}
private predicate clears_taint_on_false_edge(ControlFlowNode test, boolean sense) {
path_sanitizing_test(test) and
sense = false
or
// handle `not` (also nested)
test.(UnaryExprNode).getNode().getOp() instanceof Not and
clears_taint_on_false_edge(test.(UnaryExprNode).getOperand(), sense.booleanNot())
}
}
private predicate path_sanitizing_test(ControlFlowNode test) {
/* Assume that any test with "path" in it is a sanitizer */
test.getAChild+().(AttrNode).getName().matches("%path")
or
test.getAChild+().(NameNode).getId().matches("%path")
}
class TarSlipConfiguration extends TaintTracking::Configuration {
TarSlipConfiguration() { this = "TarSlip configuration" }
override predicate isSource(TaintTracking::Source source) { source instanceof TarfileOpen }
override predicate isSink(TaintTracking::Sink sink) {
sink instanceof ExtractSink or
sink instanceof ExtractAllSink or
sink instanceof ExtractMembersSink
}
override predicate isSanitizer(Sanitizer sanitizer) {
sanitizer instanceof TarFileInfoSanitizer
or
sanitizer instanceof ExcludeTarFilePy
}
override predicate isBarrier(DataFlow::Node node) {
// Avoid flow into the tarfile module
exists(ParameterDefinition def |
node.asVariable().getDefinition() = def
or
node.asCfgNode() = def.getDefiningNode()
|
def.getScope() = Value::named("tarfile.open").(CallableValue).getScope()
or
def.isSelf() and def.getScope().getEnclosingModule().getName() = "tarfile"
)
}
}
from TarSlipConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "Extraction of tarfile from $@", src.getSource(),
"a potentially untrusted source"