-
Notifications
You must be signed in to change notification settings - Fork 2k
Expand file tree
/
Copy pathExtractEndpointData.qll
More file actions
195 lines (178 loc) · 7.88 KB
/
ExtractEndpointData.qll
File metadata and controls
195 lines (178 loc) · 7.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*
* For internal use only.
*
* Library code for training and evaluation data we can use to train ML models for ML-powered
* queries.
*/
import javascript
import Exclusions as Exclusions
import evaluation.EndToEndEvaluation as EndToEndEvaluation
import experimental.adaptivethreatmodeling.ATMConfig
import experimental.adaptivethreatmodeling.CoreKnowledge as CoreKnowledge
import experimental.adaptivethreatmodeling.EndpointFeatures as EndpointFeatures
import experimental.adaptivethreatmodeling.EndpointScoring as EndpointScoring
import experimental.adaptivethreatmodeling.EndpointTypes
import experimental.adaptivethreatmodeling.FilteringReasons
import experimental.adaptivethreatmodeling.NosqlInjectionATM as NosqlInjectionATM
import experimental.adaptivethreatmodeling.SqlInjectionATM as SqlInjectionATM
import experimental.adaptivethreatmodeling.TaintedPathATM as TaintedPathATM
import experimental.adaptivethreatmodeling.XssATM as XssATM
import Labels
import NoFeaturizationRestrictionsConfig
import Queries
/** Gets the ATM configuration object for the specified query. */
AtmConfig getAtmCfg(Query query) {
query instanceof NosqlInjectionQuery and
result instanceof NosqlInjectionATM::NosqlInjectionAtmConfig
or
query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::SqlInjectionAtmConfig
or
query instanceof TaintedPathQuery and result instanceof TaintedPathATM::TaintedPathAtmConfig
or
query instanceof XssQuery and result instanceof XssATM::DomBasedXssAtmConfig
}
/** DEPRECATED: Alias for getAtmCfg */
deprecated ATMConfig getATMCfg(Query query) { result = getAtmCfg(query) }
/** Gets the ATM data flow configuration for the specified query. */
DataFlow::Configuration getDataFlowCfg(Query query) {
query instanceof NosqlInjectionQuery and result instanceof NosqlInjectionATM::Configuration
or
query instanceof SqlInjectionQuery and result instanceof SqlInjectionATM::Configuration
or
query instanceof TaintedPathQuery and result instanceof TaintedPathATM::Configuration
or
query instanceof XssQuery and result instanceof XssATM::Configuration
}
/** Gets a known sink for the specified query. */
private DataFlow::Node getASink(Query query) {
getAtmCfg(query).isKnownSink(result) and
// Only consider the source code for the project being analyzed.
exists(result.getFile().getRelativePath())
}
/** Gets a data flow node that is known not to be a sink for the specified query. */
private DataFlow::Node getANotASink(NotASinkReason reason) {
CoreKnowledge::isOtherModeledArgument(result, reason) and
// Some endpoints can be assigned both a `NotASinkReason` and a `LikelyNotASinkReason`. We
// consider these endpoints to be `LikelyNotASink`, therefore this line excludes them from the
// definition of `NotASink`.
not CoreKnowledge::isOtherModeledArgument(result, any(LikelyNotASinkReason t)) and
not result = getASink(_) and
// Only consider the source code for the project being analyzed.
exists(result.getFile().getRelativePath())
}
/**
* Gets a data flow node whose label is unknown for the specified query.
*
* In other words, this is an endpoint that is not `Sink`, `NotASink`, or `LikelyNotASink` for the
* specified query.
*/
private DataFlow::Node getAnUnknown(Query query) {
getAtmCfg(query).isEffectiveSink(result) and
not result = getASink(query) and
// Only consider the source code for the project being analyzed.
exists(result.getFile().getRelativePath())
}
/** Gets the query-specific sink label for the given endpoint, if such a label exists. */
private EndpointLabel getSinkLabelForEndpoint(DataFlow::Node endpoint, Query query) {
endpoint = getASink(query) and result instanceof SinkLabel
or
endpoint = getANotASink(_) and result instanceof NotASinkLabel
or
endpoint = getAnUnknown(query) and result instanceof UnknownLabel
}
/** Gets an endpoint that should be extracted. */
DataFlow::Node getAnEndpoint(Query query) { exists(getSinkLabelForEndpoint(result, query)) }
/**
* Endpoints and associated metadata.
*
* Note that we draw a distinction between _features_, that are provided to the model at training
* and query time, and _metadata_, that is only provided to the model at training time.
*
* Internal: See the design document for
* [extensible extraction queries](https://docs.google.com/document/d/1g3ci2Nf1hGMG6ZUP0Y4PqCy_8elcoC_dhBvgTxdAWpg)
* for technical information about the design of this predicate.
*/
predicate endpoints(
DataFlow::Node endpoint, string queryName, string key, string value, string valueType
) {
exists(Query query |
// Only provide metadata for labelled endpoints, since we do not extract all endpoints.
endpoint = getAnEndpoint(query) and
queryName = query.getName() and
(
// Holds if there is a taint flow path from a known source to the endpoint
key = "hasFlowFromSource" and
(
if FlowFromSource::hasFlowFromSource(endpoint, query)
then value = "true"
else value = "false"
) and
valueType = "boolean"
or
// Constant expressions always evaluate to a constant primitive value. Therefore they can't ever
// appear in an alert, making them less interesting training examples.
key = "isConstantExpression" and
(if endpoint.asExpr() instanceof ConstantExpr then value = "true" else value = "false") and
valueType = "boolean"
or
// Holds if alerts involving the endpoint are excluded from the end-to-end evaluation.
key = "isExcludedFromEndToEndEvaluation" and
(if Exclusions::isFileExcluded(endpoint.getFile()) then value = "true" else value = "false") and
valueType = "boolean"
or
// The label for this query, considering the endpoint as a sink.
key = "sinkLabel" and
value = getSinkLabelForEndpoint(endpoint, query).getEncoding() and
valueType = "string"
or
// The reason, or reasons, why the endpoint was labeled NotASink for this query.
key = "notASinkReason" and
exists(FilteringReason reason |
endpoint = getANotASink(reason) and
value = reason.getDescription()
) and
valueType = "string"
)
)
}
/**
* `EndpointFeatures::tokenFeatures` has no results when `featureName` is absent for the endpoint
* `endpoint`. To preserve compatibility with the data pipeline, this relation will instead set
* `featureValue` to the empty string in this case.
*/
predicate tokenFeatures(DataFlow::Node endpoint, string featureName, string featureValue) {
endpoints(endpoint, _, _, _, _) and
(
EndpointFeatures::tokenFeatures(endpoint, featureName, featureValue)
or
// Performance note: this creates a Cartesian product between `endpoint` and `featureName`.
featureName = EndpointFeatures::getASupportedFeatureName() and
not exists(string value | EndpointFeatures::tokenFeatures(endpoint, featureName, value)) and
featureValue = ""
)
}
module FlowFromSource {
predicate hasFlowFromSource(DataFlow::Node endpoint, Query q) {
exists(Configuration cfg | cfg.getQuery() = q | cfg.hasFlow(_, endpoint))
}
/**
* A data flow configuration that replicates the data flow configuration for a specific query, but
* replaces the set of sinks with the set of endpoints we're extracting.
*
* We use this to find out when there is flow to a particular endpoint from a known source.
*
* This configuration behaves in a very similar way to the `ForwardExploringConfiguration` class
* from the CodeQL standard libraries for JavaScript.
*/
private class Configuration extends DataFlow::Configuration {
Query q;
Configuration() { this = getDataFlowCfg(q) }
Query getQuery() { result = q }
/** The sinks are the endpoints we're extracting. */
override predicate isSink(DataFlow::Node sink) { sink = getAnEndpoint(q) }
/** The sinks are the endpoints we're extracting. */
override predicate isSink(DataFlow::Node sink, DataFlow::FlowLabel lbl) {
sink = getAnEndpoint(q)
}
}
}