/** * @name Arbitrary file write during tarfile extraction * @description Extracting files from a malicious tar archive without validating that the * destination file path is within the destination directory can cause files outside * the destination directory to be overwritten. * @kind path-problem * @id py/tarslip * @problem.severity error * @security-severity 7.5 * @precision high * @tags security * external/cwe/cwe-022 */ import python import semmle.python.dataflow.new.DataFlow import semmle.python.dataflow.new.TaintTracking import DataFlow::PathGraph import semmle.python.ApiGraphs import semmle.python.dataflow.new.internal.Attributes import semmle.python.dataflow.new.BarrierGuards import semmle.python.dataflow.new.RemoteFlowSources /** * Handle those three cases of Tarfile opens: * - `tarfile.open()` * - `tarfile.TarFile()` * - `MKtarfile.Tarfile.open()` */ API::Node tarfileOpen() { result in [ API::moduleImport("tarfile").getMember(["open", "TarFile"]), API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open") ] } /** * Handle the previous three cases, plus the use of `closing` in the previous cases */ class AllTarfileOpens extends API::CallNode { AllTarfileOpens() { this = tarfileOpen().getACall() or exists(API::Node closing, Node arg | closing = API::moduleImport("contextlib").getMember("closing") and this = closing.getACall() and arg = this.getArg(0) and arg = tarfileOpen().getACall() ) } } /** * A taint-tracking configuration for detecting more "TarSlip" vulnerabilities. */ class Configuration extends TaintTracking::Configuration { Configuration() { this = "TarSlip" } override predicate isSource(DataFlow::Node source) { source = tarfileOpen().getACall() } override predicate isSink(DataFlow::Node sink) { ( // A sink capturing method calls to `extractall` without `members` argument. // For a call to `file.extractall` without `members` argument, `file` is considered a sink. exists(MethodCallNode call, AllTarfileOpens atfo | call = atfo.getReturn().getMember("extractall").getACall() and not exists(Node arg | arg = call.getArgByName("members")) and sink = call.getObject() ) or // A sink capturing method calls to `extractall` with `members` argument. // For a call to `file.extractall` with `members` argument, `file` is considered a sink if not // a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`. // Otherwise, the argument of `members` is considered a sink. exists(MethodCallNode call, Node arg, AllTarfileOpens atfo | call = atfo.getReturn().getMember("extractall").getACall() and arg = call.getArgByName("members") and if arg.asCfgNode() instanceof NameConstantNode or arg.asCfgNode() instanceof ListNode then sink = call.getObject() else if arg.(MethodCallNode).getMethodName() = "getmembers" then sink = arg.(MethodCallNode).getObject() else sink = call.getArgByName("members") ) or // An argument to `extract` is considered a sink. exists(AllTarfileOpens atfo | sink = atfo.getReturn().getMember("extract").getACall().getArg(0) ) or //An argument to `_extract_member` is considered a sink. exists(MethodCallNode call, AllTarfileOpens atfo | call = atfo.getReturn().getMember("_extract_member").getACall() and call.getArg(1).(AttrRead).accesses(sink, "name") ) ) and not sink.getScope().getLocation().getFile().inStdlib() } override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { exists(AttrRead attr, MethodCallNode call | attr.accesses(nodeFrom, "getmembers") and nodeFrom = call.getObject() and nodeFrom instanceof AllTarfileOpens and nodeTo = call ) or exists(API::CallNode closing | closing = API::moduleImport("contextlib").getMember("closing").getACall() and nodeFrom = closing.getArg(0) and nodeFrom = tarfileOpen().getReturn().getAValueReachingSink() and nodeTo = closing ) } } from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink where config.hasFlowPath(source, sink) select sink, source, sink, "Extraction of tarfile from $@ to a potentially untrusted source $@.", source.getNode(), source.getNode().toString(), sink.getNode(), sink.getNode().toString()