diff --git a/config/identical-files.json b/config/identical-files.json index 89a1e6fc363a..1b48e4372bf5 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -57,10 +57,6 @@ "java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll", "csharp/ql/lib/semmle/code/csharp/dataflow/internal/rangeanalysis/SsaReadPositionCommon.qll" ], - "Model as Data Generation Java/C# - CaptureModels": [ - "java/ql/src/utils/modelgenerator/internal/CaptureModels.qll", - "csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll" - ], "Sign Java/C#": [ "java/ql/lib/semmle/code/java/dataflow/internal/rangeanalysis/Sign.qll", "csharp/ql/lib/semmle/code/csharp/dataflow/internal/rangeanalysis/Sign.qll" diff --git a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index 5a653867572e..306ec34d31d0 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -9,5 +9,5 @@ import internal.CaptureModels from DataFlowSummaryTargetApi api, string flow -where flow = captureContentFlow(api) +where flow = ContentSensitive::captureFlow(api) select flow order by flow diff --git a/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index a601c2511e6d..7a53125e21c1 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -6,9 +6,7 @@ * @tags modelgenerator */ -import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl import internal.CaptureModels -import internal.CaptureSummaryFlowQuery from DataFlowSummaryTargetApi api, string noflow where noflow = captureNoFlow(api) diff --git a/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index da4d03fa9bb2..991e593474e2 100644 --- a/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/csharp/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -6,9 +6,7 @@ * @tags modelgenerator */ -import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl import internal.CaptureModels -import internal.CaptureSummaryFlowQuery from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api) diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ab5de0d01979..7c0aed91b6b8 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -1,635 +1,351 @@ -/** - * Provides classes and predicates related to capturing summary, source, - * and sink models of the Standard or a 3rd party library. - */ - -private import CaptureModelsSpecific -private import CaptureModelsPrinting - -/** - * A node from which flow can return to the caller. This is either a regular - * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. - */ -private class ReturnNodeExt extends DataFlow::Node { - private DataFlowImplCommon::ReturnKindExt kind; - - ReturnNodeExt() { - kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or - kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind() +private import csharp as CS +private import semmle.code.csharp.commons.Util as Util +private import semmle.code.csharp.commons.Collections as Collections +private import semmle.code.csharp.commons.QualifiedName as QualifiedName +private import semmle.code.csharp.dataflow.internal.DataFlowDispatch +private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import semmle.code.csharp.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate +private import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow +private import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon +private import semmle.code.csharp.dataflow.internal.DataFlowImplSpecific +private import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import semmle.code.csharp.dataflow.internal.TaintTrackingImplSpecific +private import semmle.code.csharp.frameworks.system.linq.Expressions +private import semmle.code.csharp.frameworks.System +private import semmle.code.csharp.Location +private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl + +module ModelGeneratorInput implements ModelGeneratorInputSig { + class Type = CS::Type; + + class Parameter = CS::Parameter; + + class Callable = CS::Callable; + + class NodeExtended extends CS::DataFlow::Node { + Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingCallable() } } /** - * Gets the kind of the return node. + * Holds if any of the parameters of `api` are `System.Func<>`. */ - DataFlowImplCommon::ReturnKindExt getKind() { result = kind } -} + private predicate isHigherOrder(Callable api) { + exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() | + t instanceof SystemLinqExpressions::DelegateExtType + ) + } -bindingset[c] -private signature string printCallableParamSig(Callable c, ParameterPosition p); + private predicate irrelevantAccessor(CS::Accessor a) { + a.getDeclaration().(CS::Property).isReadWrite() + } -private module PrintReturnNodeExt { - string getOutput(ReturnNodeExt node) { - node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and - result = "ReturnValue" + private predicate isUninterestingForModels(Callable api) { + api.getDeclaringType().getNamespace().getFullName() = "" + or + api instanceof CS::ConversionOperator + or + api instanceof Util::MainMethod or - exists(ParameterPosition pos | - pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and - result = printCallableParam(returnNodeEnclosingCallable(node), pos) + api instanceof CS::Destructor + or + api instanceof CS::AnonymousFunctionExpr + or + api.(CS::Constructor).isParameterless() + or + exists(Type decl | decl = api.getDeclaringType() | + decl instanceof SystemObjectClass or + decl instanceof SystemValueTypeClass ) + or + // Disregard properties that have both a get and a set accessor, + // which implicitly means auto implemented properties. + irrelevantAccessor(api) } -} - -string getOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) -} -string getContentOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) -} - -class DataFlowSummaryTargetApi extends SummaryTargetApi { - DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } -} - -class DataFlowSourceTargetApi = SourceTargetApi; - -class DataFlowSinkTargetApi = SinkTargetApi; - -private module ModelPrintingInput implements ModelPrintingSig { - class SummaryApi = DataFlowSummaryTargetApi; - - class SourceOrSinkApi = SourceOrSinkTargetApi; + private predicate relevant(Callable api) { + [api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and + api.fromSource() and + api.isUnboundDeclaration() and + not isUninterestingForModels(api) + } - string getProvenance() { result = "df-generated" } -} + private Callable getARelevantOverrideeOrImplementee(Overridable m) { + m.overridesOrImplements(result) and relevant(result) + } -module Printing = ModelPrinting; + /** + * Gets the super implementation of `api` if it is relevant. + * If such a super implementation does not exist, returns `api` if it is relevant. + */ + private Callable liftedImpl(Callable api) { + ( + result = getARelevantOverrideeOrImplementee(api) + or + result = api and relevant(api) + ) and + not exists(getARelevantOverrideeOrImplementee(result)) + } -/** - * Holds if `c` is a relevant content kind, where the underlying type is relevant. - */ -private predicate isRelevantTypeInContent(DataFlow::ContentSet c) { - isRelevantType(getUnderlyingContentType(c)) -} + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) + } -/** - * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. - */ -private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { - exists(DataFlow::ContentSet f | - DataFlowPrivate::readStep(node1, f, node2) and - // Partially restrict the content types used for intermediate steps. - (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) - ) - or - exists(DataFlow::ContentSet f | DataFlowPrivate::storeStep(node1, f, node2) | containerContent(f)) -} + private predicate hasManualSourceModel(Callable api) { + api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()) + } -/** - * Holds if content `c` is either a field, a synthetic field or language specific - * content of a relevant type or a container like content. - */ -pragma[nomagic] -private predicate isRelevantContent0(DataFlow::ContentSet c) { - isRelevantTypeInContent(c) or - containerContent(c) -} + private predicate hasManualSinkModel(Callable api) { + api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()) + } -/** - * Gets the MaD string representation of the parameter node `p`. - */ -string parameterNodeAsInput(DataFlow::ParameterNode p) { - result = parameterAccess(p.asParameter()) - or - result = qualifierString() and p instanceof InstanceParameterNode -} + predicate isUninterestingForDataFlowModels(Callable api) { isHigherOrder(api) } -/** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ -string parameterNodeAsContentInput(DataFlow::ParameterNode p) { - result = parameterContentAccess(p.asParameter()) - or - result = qualifierString() and p instanceof InstanceParameterNode -} + class SourceOrSinkTargetApi extends Callable { + SourceOrSinkTargetApi() { relevant(this) } + } -/** - * Gets the MaD input string representation of `source`. - */ -string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific(source) } - -/** - * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). - */ -string captureQualifierFlow(DataFlowSummaryTargetApi api) { - exists(ReturnNodeExt ret | - api = returnNodeEnclosingCallable(ret) and - isOwnInstanceAccessNode(ret) - ) and - result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue") -} + class SinkTargetApi extends SourceOrSinkTargetApi { + SinkTargetApi() { not hasManualSinkModel(this) } + } -private int accessPathLimit0() { result = 2 } + class SourceTargetApi extends SourceOrSinkTargetApi { + SourceTargetApi() { + not hasManualSourceModel(this) and + // Do not generate source models for overridable callables + // as virtual dispatch implies that too many methods + // will be considered sources. + not this.(Overridable).overridesOrImplements(_) + } + } -private newtype TTaintState = - TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or - TTaintStore(int n) { n in [1 .. accessPathLimit0()] } + class SummaryTargetApi extends Callable { + private Callable lift; -abstract private class TaintState extends TTaintState { - abstract string toString(); -} + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } -/** - * A FlowState representing a tainted read. - */ -private class TaintRead extends TaintState, TTaintRead { - private int step; + Callable lift() { result = lift } - TaintRead() { this = TTaintRead(step) } + predicate isRelevant() { relevant(this) } + } /** - * Gets the flow state step number. + * Holds if `t` is a type that is generally used for bulk data in collection types. + * Eg. char[] is roughly equivalent to string and thus a highly + * relevant type for model generation. */ - int getStep() { result = step } - - override string toString() { result = "TaintRead(" + step + ")" } -} + private predicate isPrimitiveTypeUsedForBulkData(CS::Type t) { + t instanceof CS::ByteType or + t instanceof CS::CharType + } -/** - * A FlowState representing a tainted write. - */ -private class TaintStore extends TaintState, TTaintStore { - private int step; + /** + * Holds if the collection type `ct` is irrelevant for model generation. + * Collection types where the type of the elements are + * (1) unknown - are considered relevant. + * (2) known - at least one the child types should be relevant (a non-simple type + * or a type used for bulk data) + */ + private predicate irrelevantCollectionType(CS::Type ct) { + Collections::isCollectionType(ct) and + forex(CS::Type child | child = ct.getAChild() | + child instanceof CS::SimpleType and + not isPrimitiveTypeUsedForBulkData(child) + ) + } - TaintStore() { this = TTaintStore(step) } + predicate isRelevantType(CS::Type t) { + not t instanceof CS::SimpleType and + not t instanceof CS::Enum and + not t instanceof SystemDateTimeStruct and + not t instanceof SystemTypeClass and + not irrelevantCollectionType(t) + } /** - * Gets the flow state step number. + * Gets the underlying type of the content `c`. */ - int getStep() { result = step } - - override string toString() { result = "TaintStore(" + step + ")" } -} + private CS::Type getUnderlyingContType(DataFlow::Content c) { + result = c.(DataFlow::FieldContent).getField().getType() or + result = c.(DataFlow::SyntheticFieldContent).getField().getType() + } -/** - * A data-flow configuration for tracking flow through APIs. - * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. - * - * This can be used to generate Flow summaries for APIs from parameter to return. - */ -module PropagateFlowConfig implements DataFlow::StateConfigSig { - class FlowState = TaintState; - - predicate isSource(DataFlow::Node source, FlowState state) { - source instanceof DataFlow::ParameterNode and - source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi and - state.(TaintRead).getStep() = 0 - } - - predicate isSink(DataFlow::Node sink, FlowState state) { - sink instanceof ReturnNodeExt and - not isOwnInstanceAccessNode(sink) and - not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and - (state instanceof TaintRead or state instanceof TaintStore) - } - - predicate isAdditionalFlowStep( - DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 - ) { - exists(DataFlow::ContentSet c | - DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and - isRelevantContent0(c) and - ( - state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 - or - state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() - ) + Type getUnderlyingContentType(DataFlow::ContentSet c) { + exists(DataFlow::Content cont | + c.isSingleton(cont) and + result = getUnderlyingContType(cont) ) or - exists(DataFlow::ContentSet c | - DataFlowPrivate::readStep(node1, c, node2) and - isRelevantContent0(c) and - state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() + exists(CS::Property p | + c.isProperty(p) and + result = p.getType() ) } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) - } + string qualifierString() { result = "Argument[this]" } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + string parameterAccess(CS::Parameter p) { + if Collections::isCollectionType(p.getType()) + then result = "Argument[" + p.getPosition() + "].Element" + else result = "Argument[" + p.getPosition() + "]" } -} -module PropagateFlow = TaintTracking::GlobalWithState; - -/** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ -string captureThroughFlow0( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt -) { - exists(string input, string output | - p.getEnclosingCallable() = api and - returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and - input = parameterNodeAsInput(p) and - output = getOutput(returnNodeExt) and - input != output and - result = Printing::asLiftedTaintModel(api, input, output) - ) -} + string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" } -/** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ -string captureThroughFlow(DataFlowSummaryTargetApi api) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - PropagateFlow::flow(p, returnNodeExt) and - result = captureThroughFlow0(api, p, returnNodeExt) - ) -} + class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode; -private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - source instanceof DataFlow::ParameterNode and - source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi - } + private signature string parameterAccessSig(Parameter p); - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi + private module ParamReturnNodeAsOutput { + bindingset[c] + string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) { + result = getParamAccess(c.getParameter(pos.getPosition())) + or + pos.isThisParameter() and + result = qualifierString() + } } - predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; - - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + bindingset[c] + string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) { + result = ParamReturnNodeAsOutput::paramReturnNodeAsOutput(c, pos) } - int accessPathLimit() { result = 2 } - - predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } - - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + bindingset[c] + string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) { + result = ParamReturnNodeAsOutput::paramReturnNodeAsOutput(c, pos) } -} - -private module PropagateContentFlow = ContentDataFlow::Global; -private string getContent(PropagateContentFlow::AccessPath ap, int i) { - exists(ContentSet head, PropagateContentFlow::AccessPath tail | - head = ap.getHead() and - tail = ap.getTail() - | - i = 0 and - result = "." + printContent(head) - or - i > 0 and result = getContent(tail, i - 1) - ) -} - -/** - * Gets the MaD string representation of a store step access path. - */ -private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i) -} - -/** - * Gets the MaD string representation of a read step access path. - */ -private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i desc) -} - -/** - * Holds if the access path `ap` contains a field or synthetic field access. - */ -private predicate mentionsField(PropagateContentFlow::AccessPath ap) { - exists(ContentSet head, PropagateContentFlow::AccessPath tail | - head = ap.getHead() and - tail = ap.getTail() - | - mentionsField(tail) or isField(head) - ) -} - -private predicate apiFlow( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, - ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue -) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and - p.getEnclosingCallable() = api -} - -/** - * A class of APIs relevant for modeling using content flow. - * The following heuristic is applied: - * Content flow is only relevant for an API, if - * #content flow <= 2 * #parameters + 3 - * If an API produces more content flow, it is likely that - * 1. Types are not sufficiently constrained leading to a combinatorial - * explosion in dispatch and thus in the generated summaries. - * 2. It is a reasonable approximation to use the non-content based flow - * detection instead, as reads and stores would use a significant - * part of an objects internal state. - */ -private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { - ContentDataFlowSummaryTargetApi() { - count(string input, string output | - exists( - DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, - ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores - | - apiFlow(this, p, reads, returnNodeExt, stores, _) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) - ) - ) <= 2 * this.getNumberOfParameters() + 3 + Callable returnNodeEnclosingCallable(DataFlow::Node ret) { + result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable(_) } -} -pragma[nomagic] -private predicate apiContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue -) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and - p.getEnclosingCallable() = api -} - -/** - * Holds if any of the content sets in `path` translates into a synthetic field. - */ -private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { - exists(PropagateContentFlow::AccessPath tail, ContentSet head | - head = path.getHead() and - tail = path.getTail() - | - exists(getSyntheticName(head)) or - hasSyntheticContent(tail) - ) -} + predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) { + node.asExpr() instanceof CS::ThisAccess + } -/** - * A module containing predicates for validating access paths containing content sets - * that translates into synthetic fields, when used for generated summary models. - */ -private module AccessPathSyntheticValidation { - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`). - */ - private predicate step( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - p.getType() = t1 and - returnNodeExt.getType() = t2 and - apiContentFlow(_, p, read, returnNodeExt, store, _) + private predicate isRelevantMemberAccess(DataFlow::Node node) { + exists(CS::MemberAccess access | access = node.asExpr() | + access.hasThisQualifier() and + access.getTarget().isEffectivelyPublic() and + ( + access instanceof CS::FieldAccess + or + access.getTarget().(CS::Property).getSetter().isPublic() + ) ) } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. - * - * Step A -> Synth. - */ - private predicate synthPathEntry( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - not hasSyntheticContent(read) and - hasSyntheticContent(store) and - step(t1, read, t2, store) - } + predicate sinkModelSanitizer(DataFlow::Node node) { none() } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` has synthetic content - * and `store` does not. - * - * Step Synth -> A. - */ - private predicate synthPathExit( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - not hasSyntheticContent(store) and - step(t1, read, t2, store) + predicate apiSource(DataFlow::Node source) { + isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode } - /** - * Holds if there exists a path of steps from `read` to an exit. - * - * read ->* Synth -> A - */ - private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { - synthPathExit(t, read, _, _) - or - hasSyntheticContent(read) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(t, read, midType, mid) and - reachesSynthExit(midType, mid.reverse()) + private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) { + exists(DataFlowCall call | + dc1 = call.getEnclosingCallable() and + dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0) ) } - /** - * Holds if there exists a path of steps from an entry to `store`. - * - * A -> Synth ->* store - */ - private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { - synthPathEntry(_, _, t, store) - or - hasSyntheticContent(store) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(midType, mid, t, store) and - synthEntryReaches(midType, mid.reverse()) + bindingset[dc1, dc2] + private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) = + fastTC(uniquelyCalls/2)(dc1, dc2) + + bindingset[sourceEnclosing, api] + predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { + not exists(DataFlowCallable dc1, DataFlowCallable dc2 | + uniquelyCallsPlus(dc1, dc2) or dc1 = dc2 + | + dc1.getUnderlyingCallable() = api and + dc2.getUnderlyingCallable() = sourceEnclosing ) } - /** - * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) - * contain content that will be translated into a synthetic field, when being used in - * a MaD summary model, and if there is a range of APIs, such that - * when chaining their flow access paths, there exists access paths `A` and `B` where - * A ->* read -> store ->* B and where `A` and `B` do not contain content that will - * be translated into a synthetic field. - * - * This is needed because we don't want to include summaries that reads from or - * stores into a "dead" synthetic field. - * - * Example: - * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and - * `setX`, which gets and sets a private field `X` on `t`. - * This would lead to the following content flows - * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. - * setX : Argument[0] -> Argument[this].SyntheticField[t.X] - * As the reads and stores are on synthetic fields we should only make summaries - * if both of these methods exist. - */ - pragma[nomagic] - predicate acceptReadStore( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse()) - or - exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read | - synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) - or - synthEntryReaches(t1, store0) and - step(t1, read, t2, store) and - reachesSynthExit(t2, store.reverse()) + string getInputArgument(DataFlow::Node source) { + exists(int pos | + pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and + result = "Argument[" + pos + "]" ) + or + source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and + result = qualifierString() } -} -/** - * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. - * Flow is considered relevant, - * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. - * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if - * the synthetic content is "live" on the relevant declaring type. - */ -private predicate apiRelevantContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath store, boolean preservesValue -) { - apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and - ( - not hasSyntheticContent(read) and not hasSyntheticContent(store) - or - AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store) - ) -} + bindingset[kind] + predicate isRelevantSinkKind(string kind) { any() } -pragma[nomagic] -private predicate captureContentFlow0( - ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, - boolean lift -) { - exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads, - PropagateContentFlow::AccessPath stores - | - apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - input != output and - (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) - ) -} + bindingset[kind] + predicate isRelevantSourceKind(string kind) { any() } -/** - * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to - * the return value or a parameter). - * - * Models are lifted to the best type in case the read and store access paths do not - * contain a field or synthetic field access. - */ -string captureContentFlow(ContentDataFlowSummaryTargetApi api) { - exists(string input, string output, boolean lift, boolean preservesValue | - captureContentFlow0(api, input, output, _, lift) and - preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and - result = Printing::asModel(api, input, output, preservesValue, lift) - ) -} + predicate containerContent(DataFlow::ContentSet c) { c.isElement() } -/** - * A dataflow configuration used for finding new sources. - * The sources are the already known existing sources and the sinks are the API return nodes. - * - * This can be used to generate Source summaries for an API, if the API expose an already known source - * via its return (then the API itself becomes a source). - */ -module PropagateFromSourceConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(string kind | - isRelevantSourceKind(kind) and - ExternalFlow::sourceNode(source, kind) + predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and + not nodeTo.asExpr() instanceof CS::ElementAccess and + not exists(DataFlow::ContentSet c | + DataFlowPrivate::readStep(nodeFrom, c, nodeTo) and containerContent(c) ) } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - sink.getEnclosingCallable() instanceof DataFlowSourceTargetApi - } - - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext } - - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) - } - - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + bindingset[d] + private string getFullyQualifiedName(Declaration d) { + exists(string qualifier, string name | + d.hasFullyQualifiedName(qualifier, name) and + result = QualifiedName::getQualifiedName(qualifier, name) + ) } -} - -private module PropagateFromSource = TaintTracking::Global; - -/** - * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. - */ -string captureSource(DataFlowSourceTargetApi api) { - exists(DataFlow::Node source, ReturnNodeExt sink, string kind | - PropagateFromSource::flow(source, sink) and - ExternalFlow::sourceNode(source, kind) and - api = sink.getEnclosingCallable() and - not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and - result = Printing::asSourceModel(api, getOutput(sink), kind) - ) -} -/** - * A dataflow configuration used for finding new sinks. - * The sources are the parameters of the API and the fields of the enclosing type. - * - * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) - * into an existing known sink (then the API itself becomes a sink). - */ -module PropagateToSinkConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - apiSource(source) and source.getEnclosingCallable() instanceof DataFlowSinkTargetApi + predicate isField(DataFlow::ContentSet c) { + c.isField(_) or c.isSyntheticField(_) or c.isProperty(_) } - predicate isSink(DataFlow::Node sink) { - exists(string kind | isRelevantSinkKind(kind) and ExternalFlow::sinkNode(sink, kind)) + string getSyntheticName(DataFlow::ContentSet c) { + exists(CS::Field f | + not f.isEffectivelyPublic() and + c.isField(f) and + result = getFullyQualifiedName(f) + ) + or + exists(CS::Property p | + not p.isEffectivelyPublic() and + c.isProperty(p) and + result = getFullyQualifiedName(p) + ) + or + c.isSyntheticField(result) } - predicate isBarrier(DataFlow::Node node) { - exists(Type t | t = node.getType() and not isRelevantType(t)) + string printContent(DataFlow::ContentSet c) { + exists(CS::Field f, string name | name = getFullyQualifiedName(f) | + c.isField(f) and + f.isEffectivelyPublic() and + result = "Field[" + name + "]" + ) + or + exists(CS::Property p, string name | name = getFullyQualifiedName(p) | + c.isProperty(p) and + p.isEffectivelyPublic() and + result = "Property[" + name + "]" + ) or - sinkModelSanitizer(node) + result = "SyntheticField[" + getSyntheticName(c) + "]" + or + c.isElement() and + result = "Element" } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext } + predicate partialModel = ExternalFlow::partialModel/6; - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) - } -} + predicate sourceNode = ExternalFlow::sourceNode/2; -private module PropagateToSink = TaintTracking::Global; - -/** - * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. - */ -string captureSink(DataFlowSinkTargetApi api) { - exists(DataFlow::Node src, DataFlow::Node sink, string kind | - PropagateToSink::flow(src, sink) and - ExternalFlow::sinkNode(sink, kind) and - api = src.getEnclosingCallable() and - result = Printing::asSinkModel(api, asInputArgument(src), kind) - ) + predicate sinkNode = ExternalFlow::sinkNode/2; } + +import MakeModelGenerator diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 4c09f4ccba5b..90452b1fc91e 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,5 +1,5 @@ private import csharp as CS -private import codeql.mad.modelgenerator.ModelPrinting +private import codeql.mad.modelgenerator.internal.ModelPrinting private import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow private module ModelPrintingLang implements ModelPrintingLangSig { diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll deleted file mode 100644 index 8ac07342d559..000000000000 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll +++ /dev/null @@ -1,436 +0,0 @@ -/** - * Provides predicates related to capturing summary models of the Standard or a 3rd party library. - */ - -private import csharp as CS -private import semmle.code.csharp.commons.Util as Util -private import semmle.code.csharp.commons.Collections as Collections -private import semmle.code.csharp.commons.QualifiedName as QualifiedName -private import semmle.code.csharp.dataflow.internal.DataFlowDispatch -private import semmle.code.csharp.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import semmle.code.csharp.frameworks.system.linq.Expressions -private import semmle.code.csharp.frameworks.System -private import semmle.code.csharp.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate -import semmle.code.csharp.dataflow.internal.ExternalFlow as ExternalFlow -import semmle.code.csharp.dataflow.internal.ContentDataFlow as ContentDataFlow -import semmle.code.csharp.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon -import semmle.code.csharp.dataflow.internal.DataFlowPrivate as DataFlowPrivate -import semmle.code.csharp.dataflow.internal.DataFlowDispatch as DataFlowDispatch - -module DataFlow = CS::DataFlow; - -module TaintTracking = CS::TaintTracking; - -class Type = CS::Type; - -class Callable = CS::Callable; - -class ContentSet = DataFlow::ContentSet; - -/** - * Holds if any of the parameters of `api` are `System.Func<>`. - */ -private predicate isHigherOrder(Callable api) { - exists(Type t | t = api.getAParameter().getType().getUnboundDeclaration() | - t instanceof SystemLinqExpressions::DelegateExtType - ) -} - -private predicate irrelevantAccessor(CS::Accessor a) { - a.getDeclaration().(CS::Property).isReadWrite() -} - -private predicate isUninterestingForModels(Callable api) { - api.getDeclaringType().getNamespace().getFullName() = "" - or - api instanceof CS::ConversionOperator - or - api instanceof Util::MainMethod - or - api instanceof CS::Destructor - or - api instanceof CS::AnonymousFunctionExpr - or - api.(CS::Constructor).isParameterless() - or - exists(Type decl | decl = api.getDeclaringType() | - decl instanceof SystemObjectClass or - decl instanceof SystemValueTypeClass - ) - or - // Disregard properties that have both a get and a set accessor, - // which implicitly means auto implemented properties. - irrelevantAccessor(api) -} - -private predicate relevant(Callable api) { - [api.(CS::Modifiable), api.(CS::Accessor).getDeclaration()].isEffectivelyPublic() and - api.fromSource() and - api.isUnboundDeclaration() and - not isUninterestingForModels(api) -} - -private Callable getARelevantOverrideeOrImplementee(Overridable m) { - m.overridesOrImplements(result) and relevant(result) -} - -/** - * Gets the super implementation of `api` if it is relevant. - * If such a super implementation does not exist, returns `api` if it is relevant. - */ -private Callable liftedImpl(Callable api) { - ( - result = getARelevantOverrideeOrImplementee(api) - or - result = api and relevant(api) - ) and - not exists(getARelevantOverrideeOrImplementee(result)) -} - -private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()) -} - -private predicate hasManualSourceModel(Callable api) { - api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()) -} - -private predicate hasManualSinkModel(Callable api) { - api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()) -} - -/** - * Holds if it is irrelevant to generate models for `api` based on data flow analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ -predicate isUninterestingForDataFlowModels(CS::Callable api) { isHigherOrder(api) } - -/** - * Holds if it is irrelevant to generate models for `api` based on type-based analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ -predicate isUninterestingForTypeBasedFlowModels(CS::Callable api) { none() } - -/** - * A class of callables that are potentially relevant for generating source or - * sink models. - */ -class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } -} - -/** - * A class of callables that are potentially relevant for generating sink models. - */ -class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } -} - -/** - * A class of callables that are potentially relevant for generating source models. - */ -class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { - not hasManualSourceModel(this) and - // Do not generate source models for overridable callables - // as virtual dispatch implies that too many methods - // will be considered sources. - not this.(Overridable).overridesOrImplements(_) - } -} - -/** - * A class of callables that are potentially relevant for generating summary or - * neutral models. - * - * In the Standard library and 3rd party libraries it is the callables (or callables that have a - * super implementation) that can be called from outside the library itself. - */ -class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - /** - * Gets the callable that a model will be lifted to. - * - * The lifted callable is relevant in terms of model - * generation (this is ensured by `liftedImpl`). - */ - Callable lift() { result = lift } - - /** - * Holds if `this` is relevant in terms of model generation. - */ - predicate isRelevant() { relevant(this) } -} - -/** - * Holds if `t` is a type that is generally used for bulk data in collection types. - * Eg. char[] is roughly equivalent to string and thus a highly - * relevant type for model generation. - */ -private predicate isPrimitiveTypeUsedForBulkData(CS::Type t) { - t instanceof CS::ByteType or - t instanceof CS::CharType -} - -/** - * Holds if the collection type `ct` is irrelevant for model generation. - * Collection types where the type of the elements are - * (1) unknown - are considered relevant. - * (2) known - at least one the child types should be relevant (a non-simple type - * or a type used for bulk data) - */ -private predicate irrelevantCollectionType(CS::Type ct) { - Collections::isCollectionType(ct) and - forex(CS::Type child | child = ct.getAChild() | - child instanceof CS::SimpleType and - not isPrimitiveTypeUsedForBulkData(child) - ) -} - -/** - * Holds for type `t` for fields that are relevant as an intermediate - * read or write step in the data flow analysis. - * That is, flow through any data-flow node that does not have a relevant type - * will be excluded. - */ -predicate isRelevantType(CS::Type t) { - not t instanceof CS::SimpleType and - not t instanceof CS::Enum and - not t instanceof SystemDateTimeStruct and - not t instanceof SystemTypeClass and - not irrelevantCollectionType(t) -} - -/** - * Gets the underlying type of the content `c`. - */ -private CS::Type getUnderlyingContType(DataFlow::Content c) { - result = c.(DataFlow::FieldContent).getField().getType() or - result = c.(DataFlow::SyntheticFieldContent).getField().getType() -} - -/** - * Gets the underlying type of the content `c`. - */ -CS::Type getUnderlyingContentType(DataFlow::ContentSet c) { - exists(DataFlow::Content cont | - c.isSingleton(cont) and - result = getUnderlyingContType(cont) - ) - or - exists(CS::Property p | - c.isProperty(p) and - result = p.getType() - ) -} - -/** - * Gets the MaD string representation of the qualifier. - */ -string qualifierString() { result = "Argument[this]" } - -string parameterAccess(CS::Parameter p) { - if Collections::isCollectionType(p.getType()) - then result = "Argument[" + p.getPosition() + "].Element" - else result = "Argument[" + p.getPosition() + "]" -} - -/** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ -string parameterContentAccess(CS::Parameter p) { result = "Argument[" + p.getPosition() + "]" } - -class InstanceParameterNode = DataFlowPrivate::InstanceParameterNode; - -class ParameterPosition = DataFlowDispatch::ParameterPosition; - -private signature string parameterAccessSig(Parameter p); - -module ParamReturnNodeAsOutput { - bindingset[c] - string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) { - result = getParamAccess(c.getParameter(pos.getPosition())) - or - pos.isThisParameter() and - result = qualifierString() - } -} - -/** - * Gets the MaD string representation of return through parameter at position - * `pos` of callable `c`. - */ -bindingset[c] -string paramReturnNodeAsOutput(CS::Callable c, ParameterPosition pos) { - result = ParamReturnNodeAsOutput::paramReturnNodeAsOutput(c, pos) -} - -bindingset[c] -string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) { - result = ParamReturnNodeAsOutput::paramReturnNodeAsOutput(c, pos) -} - -/** - * Gets the enclosing callable of `ret`. - */ -Callable returnNodeEnclosingCallable(DataFlow::Node ret) { - result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable(_) -} - -/** - * Holds if `node` is an own instance access. - */ -predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) { - node.asExpr() instanceof CS::ThisAccess -} - -private predicate isRelevantMemberAccess(DataFlow::Node node) { - exists(CS::MemberAccess access | access = node.asExpr() | - access.hasThisQualifier() and - access.getTarget().isEffectivelyPublic() and - ( - access instanceof CS::FieldAccess - or - access.getTarget().(CS::Property).getSetter().isPublic() - ) - ) -} - -predicate sinkModelSanitizer(DataFlow::Node node) { none() } - -/** - * Holds if `source` is an api entrypoint relevant for creating sink models. - */ -predicate apiSource(DataFlow::Node source) { - isRelevantMemberAccess(source) or source instanceof DataFlow::ParameterNode -} - -private predicate uniquelyCalls(DataFlowCallable dc1, DataFlowCallable dc2) { - exists(DataFlowCall call | - dc1 = call.getEnclosingCallable() and - dc2 = unique(DataFlowCallable dc0 | dc0 = viableCallable(call) | dc0) - ) -} - -bindingset[dc1, dc2] -private predicate uniquelyCallsPlus(DataFlowCallable dc1, DataFlowCallable dc2) = - fastTC(uniquelyCalls/2)(dc1, dc2) - -/** - * Holds if it is not relevant to generate a source model for `api`, even - * if flow is detected from a node within `source` to a sink within `api`. - */ -bindingset[sourceEnclosing, api] -predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api) { - not exists(DataFlowCallable dc1, DataFlowCallable dc2 | uniquelyCallsPlus(dc1, dc2) or dc1 = dc2 | - dc1.getUnderlyingCallable() = api and - dc2.getUnderlyingCallable() = sourceEnclosing - ) -} - -/** - * Gets the MaD input string representation of `source`. - */ -string asInputArgumentSpecific(DataFlow::Node source) { - exists(int pos | - pos = source.(DataFlow::ParameterNode).getParameter().getPosition() and - result = "Argument[" + pos + "]" - ) - or - source.asExpr() instanceof DataFlowPrivate::FieldOrPropertyAccess and - result = qualifierString() -} - -/** - * Holds if `kind` is a relevant sink kind for creating sink models. - */ -bindingset[kind] -predicate isRelevantSinkKind(string kind) { any() } - -/** - * Holds if `kind` is a relevant source kind for creating source models. - */ -bindingset[kind] -predicate isRelevantSourceKind(string kind) { any() } - -/** - * Holds if the the content `c` is a container. - */ -predicate containerContent(DataFlow::ContentSet c) { c.isElement() } - -/** - * Holds if there is a taint step from `node1` to `node2` in content flow. - */ -predicate isAdditionalContentFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - TaintTrackingPrivate::defaultAdditionalTaintStep(nodeFrom, nodeTo, _) and - not nodeTo.asExpr() instanceof CS::ElementAccess and - not exists(DataFlow::ContentSet c | - DataFlowPrivate::readStep(nodeFrom, c, nodeTo) and containerContent(c) - ) -} - -bindingset[d] -private string getFullyQualifiedName(Declaration d) { - exists(string qualifier, string name | - d.hasFullyQualifiedName(qualifier, name) and - result = QualifiedName::getQualifiedName(qualifier, name) - ) -} - -/** - * Holds if the content set `c` is a field, property or synthetic field. - */ -predicate isField(ContentSet c) { c.isField(_) or c.isSyntheticField(_) or c.isProperty(_) } - -/** - * Gets the MaD synthetic name string representation for the content set `c`, if any. - */ -string getSyntheticName(DataFlow::ContentSet c) { - exists(CS::Field f | - not f.isEffectivelyPublic() and - c.isField(f) and - result = getFullyQualifiedName(f) - ) - or - exists(CS::Property p | - not p.isEffectivelyPublic() and - c.isProperty(p) and - result = getFullyQualifiedName(p) - ) - or - c.isSyntheticField(result) -} - -/** - * Gets the MaD string representation of the content set `c`. - */ -string printContent(DataFlow::ContentSet c) { - exists(CS::Field f, string name | name = getFullyQualifiedName(f) | - c.isField(f) and - f.isEffectivelyPublic() and - result = "Field[" + name + "]" - ) - or - exists(CS::Property p, string name | name = getFullyQualifiedName(p) | - c.isProperty(p) and - p.isEffectivelyPublic() and - result = "Property[" + name + "]" - ) - or - result = "SyntheticField[" + getSyntheticName(c) + "]" - or - c.isElement() and - result = "Element" -} diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll deleted file mode 100644 index 8bb706bc484c..000000000000 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll +++ /dev/null @@ -1,93 +0,0 @@ -private import CaptureModels - -/** - * Capture fluent APIs that return `this`. - * Example of a fluent API: - * ```csharp - * public class BasicFlow { - * public BasicFlow ReturnThis(object input) - * { - * // some side effect - * return this; - * } - * ``` - * Captured Model: - * ```Summaries;BasicFlow;false;ReturnThis;(System.Object);Argument[this];ReturnValue;value;df-generated``` - * Capture APIs that transfer taint from an input parameter to an output return - * value or parameter. - * Allows a sequence of read steps followed by a sequence of store steps. - * - * Examples: - * - * ```csharp - * public class BasicFlow { - * private string tainted; - * - * public String ReturnField() - * { - * return tainted; - * } - * - * public void AssignFieldToArray(object[] target) - * { - * target[0] = tainted; - * } - * } - * ``` - * Captured Models: - * ``` - * Summaries;BasicFlow;false;ReturnField;();Argument[this];ReturnValue;taint;df-generated | - * Summaries;BasicFlow;false;AssignFieldToArray;(System.Object[]);Argument[this];Argument[0].Element;taint;df-generated - * ``` - * - * ```csharp - * public class BasicFlow { - * private string tainted; - * - * public void SetField(string s) - * { - * tainted = s; - * } - * } - * ``` - * Captured Model: - * ```Summaries;BasicFlow;false;SetField;(System.String);Argument[0];Argument[this];taint;df-generated``` - * - * ```csharp - * public class BasicFlow { - * public void ReturnSubstring(string s) - * { - * return s.Substring(0, 1); - * } - * } - * ``` - * Captured Model: - * ```Summaries;BasicFlow;false;ReturnSubstring;(System.String);Argument[0];ReturnValue;taint;df-generated``` - * - * ```csharp - * public class BasicFlow { - * public void AssignToArray(int data, int[] target) - * { - * target[0] = data; - * } - * } - * ``` - * Captured Model: - * ```Summaries;BasicFlow;false;AssignToArray;(System.Int32,System.Int32[]);Argument[0];Argument[1].Element;taint;df-generated``` - */ -string captureFlow(DataFlowSummaryTargetApi api) { - result = captureQualifierFlow(api) or - result = captureThroughFlow(api) -} - -/** - * Gets the neutral summary model for `api`, if any. - * A neutral summary model is generated, if we are not generating - * a summary model that applies to `api` and if it relevant to generate - * a model for `api`. - */ -string captureNoFlow(DataFlowSummaryTargetApi api) { - not exists(DataFlowSummaryTargetApi api0 | exists(captureFlow(api0)) and api0.lift() = api.lift()) and - api.isRelevant() and - result = Printing::asNeutralSummaryModel(api) -} diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 1a0a3d2ca420..f7b0633ddd39 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -2,7 +2,7 @@ private import csharp private import semmle.code.csharp.frameworks.system.collections.Generic as GenericCollections private import semmle.code.csharp.dataflow.internal.DataFlowPrivate private import semmle.code.csharp.frameworks.system.linq.Expressions -private import CaptureModelsSpecific as Specific +private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput private import CaptureModelsPrinting /** @@ -38,7 +38,7 @@ private predicate localTypeParameter(Callable callable, TypeParameter tp) { */ private predicate parameter(Callable callable, string input, TypeParameter tp) { exists(Parameter p | - input = Specific::parameterAccess(p) and + input = ModelGeneratorInput::parameterAccess(p) and p = callable.getAParameter() and ( // Parameter of type tp @@ -69,7 +69,7 @@ private string implicit(Callable callable, TypeParameter tp) { then access = ".Element" else access = getSyntheticField(tp) | - result = Specific::qualifierString() + access + result = ModelGeneratorInput::qualifierString() + access ) } @@ -191,9 +191,7 @@ private module Printing = ModelPrinting; * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi { - TypeBasedFlowTargetApi() { not Specific::isUninterestingForTypeBasedFlowModels(this) } - +class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach. diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql index f5d8593a32ac..b2777d4bc346 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -3,7 +3,7 @@ import utils.modelgenerator.internal.CaptureModels import TestUtilities.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { - string getCapturedModel(Callable c) { result = captureContentFlow(c) } + string getCapturedModel(Callable c) { result = ContentSensitive::captureFlow(c) } string getKind() { result = "contentbased-summary" } } diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql index c9fd2c2655db..922588049d26 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql @@ -1,5 +1,5 @@ import csharp -import utils.modelgenerator.internal.CaptureSummaryFlowQuery +import utils.modelgenerator.internal.CaptureModels import TestUtilities.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql index 5e4a67bcf0cb..29cb8f7fb874 100644 --- a/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql +++ b/csharp/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql @@ -1,5 +1,5 @@ import csharp -import utils.modelgenerator.internal.CaptureSummaryFlowQuery +import utils.modelgenerator.internal.CaptureModels import TestUtilities.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index 784d248d8dce..589d75c3635d 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -760,7 +760,7 @@ ContentApprox getContentApprox(Content c) { /** * Holds if the the content `c` is a container. */ -predicate containerContent(Content c) { +predicate containerContent(ContentSet c) { c instanceof ArrayContent or c instanceof CollectionContent or c instanceof MapKeyContent or diff --git a/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql b/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql index e0e793348f59..ad3008e54a59 100644 --- a/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureContentSummaryModels.ql @@ -9,5 +9,5 @@ import internal.CaptureModels from DataFlowSummaryTargetApi api, string flow -where flow = captureContentFlow(api) +where flow = ContentSensitive::captureFlow(api) select flow order by flow diff --git a/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql b/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql index 2ff65ad7fae0..c91f182f6038 100644 --- a/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureNeutralModels.ql @@ -7,7 +7,6 @@ */ import internal.CaptureModels -import internal.CaptureSummaryFlowQuery from DataFlowSummaryTargetApi api, string noflow where noflow = captureNoFlow(api) diff --git a/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql b/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql index 3f18bf49428a..7fc7c0f7a571 100644 --- a/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql +++ b/java/ql/src/utils/modelgenerator/CaptureSummaryModels.ql @@ -7,7 +7,6 @@ */ import internal.CaptureModels -import internal.CaptureSummaryFlowQuery from DataFlowSummaryTargetApi api, string flow where flow = captureFlow(api) diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index ab5de0d01979..6724527a1b17 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -1,635 +1,292 @@ /** - * Provides classes and predicates related to capturing summary, source, - * and sink models of the Standard or a 3rd party library. + * Provides predicates related to capturing summary models of the Standard or a 3rd party library. */ -private import CaptureModelsSpecific -private import CaptureModelsPrinting +private import java as J +private import semmle.code.java.dataflow.DataFlow +private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow +private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow +private import semmle.code.java.dataflow.internal.DataFlowDispatch +private import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon +private import semmle.code.java.dataflow.internal.DataFlowImplSpecific +private import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate +private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil +private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl +private import semmle.code.java.dataflow.internal.ModelExclusions +private import semmle.code.java.dataflow.internal.TaintTrackingImplSpecific +private import semmle.code.java.dataflow.SSA as Ssa +private import semmle.code.java.dataflow.TaintTracking +private import codeql.mad.modelgenerator.internal.ModelGeneratorImpl /** - * A node from which flow can return to the caller. This is either a regular - * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + * Holds if the type `t` is a primitive type used for bulk data. */ -private class ReturnNodeExt extends DataFlow::Node { - private DataFlowImplCommon::ReturnKindExt kind; - - ReturnNodeExt() { - kind = DataFlowImplCommon::getValueReturnPosition(this).getKind() or - kind = DataFlowImplCommon::getParamReturnPosition(this, _).getKind() - } - - /** - * Gets the kind of the return node. - */ - DataFlowImplCommon::ReturnKindExt getKind() { result = kind } +predicate isPrimitiveTypeUsedForBulkData(J::Type t) { + t.hasName(["byte", "char", "Byte", "Character"]) } -bindingset[c] -private signature string printCallableParamSig(Callable c, ParameterPosition p); - -private module PrintReturnNodeExt { - string getOutput(ReturnNodeExt node) { - node.getKind() instanceof DataFlowImplCommon::ValueReturnKind and - result = "ReturnValue" - or - exists(ParameterPosition pos | - pos = node.getKind().(DataFlowImplCommon::ParamUpdateReturnKind).getPosition() and - result = printCallableParam(returnNodeEnclosingCallable(node), pos) - ) - } -} +module ModelGeneratorInput implements ModelGeneratorInputSig { + class Type = J::Type; -string getOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) -} + class Parameter = J::Parameter; -string getContentOutput(ReturnNodeExt node) { - result = PrintReturnNodeExt::getOutput(node) -} + class Callable = J::Callable; -class DataFlowSummaryTargetApi extends SummaryTargetApi { - DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } -} - -class DataFlowSourceTargetApi = SourceTargetApi; - -class DataFlowSinkTargetApi = SinkTargetApi; - -private module ModelPrintingInput implements ModelPrintingSig { - class SummaryApi = DataFlowSummaryTargetApi; - - class SourceOrSinkApi = SourceOrSinkTargetApi; - - string getProvenance() { result = "df-generated" } -} - -module Printing = ModelPrinting; - -/** - * Holds if `c` is a relevant content kind, where the underlying type is relevant. - */ -private predicate isRelevantTypeInContent(DataFlow::ContentSet c) { - isRelevantType(getUnderlyingContentType(c)) -} - -/** - * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. - */ -private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { - exists(DataFlow::ContentSet f | - DataFlowPrivate::readStep(node1, f, node2) and - // Partially restrict the content types used for intermediate steps. - (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) - ) - or - exists(DataFlow::ContentSet f | DataFlowPrivate::storeStep(node1, f, node2) | containerContent(f)) -} - -/** - * Holds if content `c` is either a field, a synthetic field or language specific - * content of a relevant type or a container like content. - */ -pragma[nomagic] -private predicate isRelevantContent0(DataFlow::ContentSet c) { - isRelevantTypeInContent(c) or - containerContent(c) -} - -/** - * Gets the MaD string representation of the parameter node `p`. - */ -string parameterNodeAsInput(DataFlow::ParameterNode p) { - result = parameterAccess(p.asParameter()) - or - result = qualifierString() and p instanceof InstanceParameterNode -} - -/** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ -string parameterNodeAsContentInput(DataFlow::ParameterNode p) { - result = parameterContentAccess(p.asParameter()) - or - result = qualifierString() and p instanceof InstanceParameterNode -} - -/** - * Gets the MaD input string representation of `source`. - */ -string asInputArgument(DataFlow::Node source) { result = asInputArgumentSpecific(source) } - -/** - * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). - */ -string captureQualifierFlow(DataFlowSummaryTargetApi api) { - exists(ReturnNodeExt ret | - api = returnNodeEnclosingCallable(ret) and - isOwnInstanceAccessNode(ret) - ) and - result = Printing::asLiftedValueModel(api, qualifierString(), "ReturnValue") -} - -private int accessPathLimit0() { result = 2 } - -private newtype TTaintState = - TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or - TTaintStore(int n) { n in [1 .. accessPathLimit0()] } - -abstract private class TaintState extends TTaintState { - abstract string toString(); -} - -/** - * A FlowState representing a tainted read. - */ -private class TaintRead extends TaintState, TTaintRead { - private int step; - - TaintRead() { this = TTaintRead(step) } - - /** - * Gets the flow state step number. - */ - int getStep() { result = step } + class NodeExtended extends DataFlow::Node { + Callable getAsExprEnclosingCallable() { result = this.asExpr().getEnclosingCallable() } + } - override string toString() { result = "TaintRead(" + step + ")" } -} + private predicate isInfrequentlyUsed(J::CompilationUnit cu) { + cu.getPackage().getName().matches("javax.swing%") or + cu.getPackage().getName().matches("java.awt%") + } -/** - * A FlowState representing a tainted write. - */ -private class TaintStore extends TaintState, TTaintStore { - private int step; + private predicate relevant(Callable api) { + api.isPublic() and + api.getDeclaringType().isPublic() and + api.fromSource() and + not isUninterestingForModels(api) and + not isInfrequentlyUsed(api.getCompilationUnit()) + } - TaintStore() { this = TTaintStore(step) } + private J::Method getARelevantOverride(J::Method m) { + result = m.getAnOverride() and + relevant(result) and + // Other exclusions for overrides. + not m instanceof J::ToStringMethod + } /** - * Gets the flow state step number. + * Gets the super implementation of `m` if it is relevant. + * If such a super implementations does not exist, returns `m` if it is relevant. */ - int getStep() { result = step } - - override string toString() { result = "TaintStore(" + step + ")" } -} - -/** - * A data-flow configuration for tracking flow through APIs. - * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. - * - * This can be used to generate Flow summaries for APIs from parameter to return. - */ -module PropagateFlowConfig implements DataFlow::StateConfigSig { - class FlowState = TaintState; - - predicate isSource(DataFlow::Node source, FlowState state) { - source instanceof DataFlow::ParameterNode and - source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi and - state.(TaintRead).getStep() = 0 + private J::Callable liftedImpl(J::Callable m) { + ( + result = getARelevantOverride(m) + or + result = m and relevant(m) + ) and + not exists(getARelevantOverride(result)) } - predicate isSink(DataFlow::Node sink, FlowState state) { - sink instanceof ReturnNodeExt and - not isOwnInstanceAccessNode(sink) and - not exists(captureQualifierFlow(sink.asExpr().getEnclosingCallable())) and - (state instanceof TaintRead or state instanceof TaintStore) + private predicate hasManualSummaryModel(Callable api) { + api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or + api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable() } - predicate isAdditionalFlowStep( - DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 - ) { - exists(DataFlow::ContentSet c | - DataFlowImplCommon::store(node1, c.getAStoreContent(), node2, _, _) and - isRelevantContent0(c) and - ( - state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 - or - state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() - ) - ) - or - exists(DataFlow::ContentSet c | - DataFlowPrivate::readStep(node1, c, node2) and - isRelevantContent0(c) and - state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() - ) + private predicate hasManualSourceModel(Callable api) { + api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable() } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + private predicate hasManualSinkModel(Callable api) { + api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or + api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable() } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext + predicate isUninterestingForDataFlowModels(Callable api) { + api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) } -} - -module PropagateFlow = TaintTracking::GlobalWithState; -/** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ -string captureThroughFlow0( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt -) { - exists(string input, string output | - p.getEnclosingCallable() = api and - returnNodeExt.(DataFlow::Node).getEnclosingCallable() = api and - input = parameterNodeAsInput(p) and - output = getOutput(returnNodeExt) and - input != output and - result = Printing::asLiftedTaintModel(api, input, output) - ) -} - -/** - * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. - */ -string captureThroughFlow(DataFlowSummaryTargetApi api) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - PropagateFlow::flow(p, returnNodeExt) and - result = captureThroughFlow0(api, p, returnNodeExt) - ) -} - -private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - source instanceof DataFlow::ParameterNode and - source.getEnclosingCallable() instanceof DataFlowSummaryTargetApi + class SourceOrSinkTargetApi extends Callable { + SourceOrSinkTargetApi() { relevant(this) } } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - sink.getEnclosingCallable() instanceof DataFlowSummaryTargetApi + class SinkTargetApi extends SourceOrSinkTargetApi { + SinkTargetApi() { not hasManualSinkModel(this) } } - predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; - - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + class SourceTargetApi extends SourceOrSinkTargetApi { + SourceTargetApi() { not hasManualSourceModel(this) } } - int accessPathLimit() { result = 2 } + class SummaryTargetApi extends Callable { + private Callable lift; - predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } + SummaryTargetApi() { + lift = liftedImpl(this) and + not hasManualSummaryModel(lift) + } - DataFlow::FlowFeature getAFeature() { - result instanceof DataFlow::FeatureEqualSourceSinkCallContext - } -} + Callable lift() { result = lift } -private module PropagateContentFlow = ContentDataFlow::Global; - -private string getContent(PropagateContentFlow::AccessPath ap, int i) { - exists(ContentSet head, PropagateContentFlow::AccessPath tail | - head = ap.getHead() and - tail = ap.getTail() - | - i = 0 and - result = "." + printContent(head) - or - i > 0 and result = getContent(tail, i - 1) - ) -} - -/** - * Gets the MaD string representation of a store step access path. - */ -private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i) -} - -/** - * Gets the MaD string representation of a read step access path. - */ -private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { - result = concat(int i | | getContent(ap, i), "" order by i desc) -} - -/** - * Holds if the access path `ap` contains a field or synthetic field access. - */ -private predicate mentionsField(PropagateContentFlow::AccessPath ap) { - exists(ContentSet head, PropagateContentFlow::AccessPath tail | - head = ap.getHead() and - tail = ap.getTail() - | - mentionsField(tail) or isField(head) - ) -} - -private predicate apiFlow( - DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, - ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores, boolean preservesValue -) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and - p.getEnclosingCallable() = api -} - -/** - * A class of APIs relevant for modeling using content flow. - * The following heuristic is applied: - * Content flow is only relevant for an API, if - * #content flow <= 2 * #parameters + 3 - * If an API produces more content flow, it is likely that - * 1. Types are not sufficiently constrained leading to a combinatorial - * explosion in dispatch and thus in the generated summaries. - * 2. It is a reasonable approximation to use the non-content based flow - * detection instead, as reads and stores would use a significant - * part of an objects internal state. - */ -private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { - ContentDataFlowSummaryTargetApi() { - count(string input, string output | - exists( - DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, - ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores - | - apiFlow(this, p, reads, returnNodeExt, stores, _) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) - ) - ) <= 2 * this.getNumberOfParameters() + 3 + predicate isRelevant() { relevant(this) } } -} - -pragma[nomagic] -private predicate apiContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath stores, boolean preservesValue -) { - PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and - returnNodeExt.getEnclosingCallable() = api and - p.getEnclosingCallable() = api -} -/** - * Holds if any of the content sets in `path` translates into a synthetic field. - */ -private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { - exists(PropagateContentFlow::AccessPath tail, ContentSet head | - head = path.getHead() and - tail = path.getTail() - | - exists(getSyntheticName(head)) or - hasSyntheticContent(tail) - ) -} + private string isExtensible(Callable c) { + if c.getDeclaringType().isFinal() then result = "false" else result = "true" + } -/** - * A module containing predicates for validating access paths containing content sets - * that translates into synthetic fields, when used for generated summary models. - */ -private module AccessPathSyntheticValidation { /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`). + * Holds if the callable `c` is in package `package` + * and is a member of `type`. */ - private predicate step( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | - p.getType() = t1 and - returnNodeExt.getType() = t2 and - apiContentFlow(_, p, read, returnNodeExt, store, _) + private predicate qualifiedName(Callable c, string package, string type) { + exists(RefType t | t = c.getDeclaringType() | + package = t.getCompilationUnit().getPackage().getName() and + type = t.getErasure().(J::RefType).getNestedName() ) } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. - * - * Step A -> Synth. - */ - private predicate synthPathEntry( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - not hasSyntheticContent(read) and - hasSyntheticContent(store) and - step(t1, read, t2, store) + predicate isRelevantType(Type t) { + not t instanceof J::TypeClass and + not t instanceof J::EnumType and + not t instanceof J::PrimitiveType and + not t instanceof J::BoxedType and + not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and + not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and + ( + not t.(J::Array).getElementType() instanceof J::PrimitiveType or + isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) + ) and + ( + not t.(J::Array).getElementType() instanceof J::BoxedType or + isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) + ) and + ( + not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or + isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType()) + ) } - /** - * Holds if there exists an API that has content flow from `read` (on type `t1`) - * to `store` (on type `t2`), where `read` has synthetic content - * and `store` does not. - * - * Step Synth -> A. - */ - private predicate synthPathExit( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - hasSyntheticContent(read) and - not hasSyntheticContent(store) and - step(t1, read, t2, store) + Type getUnderlyingContentType(DataFlow::ContentSet c) { + result = c.(DataFlow::FieldContent).getField().getType() or + result = c.(DataFlow::SyntheticFieldContent).getField().getType() } - /** - * Holds if there exists a path of steps from `read` to an exit. - * - * read ->* Synth -> A - */ - private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { - synthPathExit(t, read, _, _) - or - hasSyntheticContent(read) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(t, read, midType, mid) and - reachesSynthExit(midType, mid.reverse()) - ) + string qualifierString() { result = "Argument[this]" } + + string parameterAccess(J::Parameter p) { + if + p.getType() instanceof J::Array and + not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType()) + then result = "Argument[" + p.getPosition() + "].ArrayElement" + else + if p.getType() instanceof ContainerFlow::ContainerType + then result = "Argument[" + p.getPosition() + "].Element" + else result = "Argument[" + p.getPosition() + "]" } - /** - * Holds if there exists a path of steps from an entry to `store`. - * - * A -> Synth ->* store - */ - private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { - synthPathEntry(_, _, t, store) - or - hasSyntheticContent(store) and - exists(PropagateContentFlow::AccessPath mid, Type midType | - hasSyntheticContent(mid) and - step(midType, mid, t, store) and - synthEntryReaches(midType, mid.reverse()) - ) - } + string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" } - /** - * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) - * contain content that will be translated into a synthetic field, when being used in - * a MaD summary model, and if there is a range of APIs, such that - * when chaining their flow access paths, there exists access paths `A` and `B` where - * A ->* read -> store ->* B and where `A` and `B` do not contain content that will - * be translated into a synthetic field. - * - * This is needed because we don't want to include summaries that reads from or - * stores into a "dead" synthetic field. - * - * Example: - * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and - * `setX`, which gets and sets a private field `X` on `t`. - * This would lead to the following content flows - * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. - * setX : Argument[0] -> Argument[this].SyntheticField[t.X] - * As the reads and stores are on synthetic fields we should only make summaries - * if both of these methods exist. - */ - pragma[nomagic] - predicate acceptReadStore( - Type t1, PropagateContentFlow::AccessPath read, Type t2, PropagateContentFlow::AccessPath store - ) { - synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse()) + class InstanceParameterNode = DataFlow::InstanceParameterNode; + + bindingset[c] + string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) { + result = parameterAccess(c.getParameter(pos)) or - exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read | - synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) - or - synthEntryReaches(t1, store0) and - step(t1, read, t2, store) and - reachesSynthExit(t2, store.reverse()) - ) + result = qualifierString() and pos = -1 } -} -/** - * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. - * Flow is considered relevant, - * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. - * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if - * the synthetic content is "live" on the relevant declaring type. - */ -private predicate apiRelevantContentFlow( - ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, - PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, - PropagateContentFlow::AccessPath store, boolean preservesValue -) { - apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and - ( - not hasSyntheticContent(read) and not hasSyntheticContent(store) + bindingset[c] + string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) { + result = parameterContentAccess(c.getParameter(pos)) or - AccessPathSyntheticValidation::acceptReadStore(p.getType(), read, returnNodeExt.getType(), store) - ) -} + result = qualifierString() and pos = -1 + } -pragma[nomagic] -private predicate captureContentFlow0( - ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, - boolean lift -) { - exists( - DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath reads, - PropagateContentFlow::AccessPath stores - | - apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and - input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and - output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and - input != output and - (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) - ) -} + Callable returnNodeEnclosingCallable(DataFlow::Node ret) { + result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable() + } -/** - * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to - * the return value or a parameter). - * - * Models are lifted to the best type in case the read and store access paths do not - * contain a field or synthetic field access. - */ -string captureContentFlow(ContentDataFlowSummaryTargetApi api) { - exists(string input, string output, boolean lift, boolean preservesValue | - captureContentFlow0(api, input, output, _, lift) and - preservesValue = max(boolean p | captureContentFlow0(api, input, output, p, lift)) and - result = Printing::asModel(api, input, output, preservesValue, lift) - ) -} + predicate isOwnInstanceAccessNode(DataFlowPrivate::ReturnNode node) { + node.asExpr().(J::ThisAccess).isOwnInstanceAccess() + } -/** - * A dataflow configuration used for finding new sources. - * The sources are the already known existing sources and the sinks are the API return nodes. - * - * This can be used to generate Source summaries for an API, if the API expose an already known source - * via its return (then the API itself becomes a source). - */ -module PropagateFromSourceConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - exists(string kind | - isRelevantSourceKind(kind) and - ExternalFlow::sourceNode(source, kind) + predicate sinkModelSanitizer(DataFlow::Node node) { + // exclude variable capture jump steps + exists(Ssa::SsaImplicitInit closure | + closure.captures(_) and + node.asExpr() = closure.getAFirstUse() ) } - predicate isSink(DataFlow::Node sink) { - sink instanceof ReturnNodeExt and - sink.getEnclosingCallable() instanceof DataFlowSourceTargetApi + predicate apiSource(DataFlow::Node source) { + ( + source.asExpr().(J::FieldAccess).isOwnFieldAccess() or + source instanceof DataFlow::ParameterNode + ) and + exists(J::RefType t | + t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and + not t instanceof J::TypeObject and + t.isPublic() + ) } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext } + predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - predicate isBarrier(DataFlow::Node n) { - exists(Type t | t = n.getType() and not isRelevantType(t)) + string getInputArgument(DataFlow::Node source) { + exists(int pos | + source.(DataFlow::ParameterNode).isParameterOf(_, pos) and + if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString() + ) + or + source.asExpr() instanceof J::FieldAccess and + result = qualifierString() } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + bindingset[kind] + predicate isRelevantSinkKind(string kind) { + not kind = "log-injection" and + not kind.matches("regex-use%") and + not kind = "file-content-store" } -} -private module PropagateFromSource = TaintTracking::Global; + bindingset[kind] + predicate isRelevantSourceKind(string kind) { any() } -/** - * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. - */ -string captureSource(DataFlowSourceTargetApi api) { - exists(DataFlow::Node source, ReturnNodeExt sink, string kind | - PropagateFromSource::flow(source, sink) and - ExternalFlow::sourceNode(source, kind) and - api = sink.getEnclosingCallable() and - not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and - result = Printing::asSourceModel(api, getOutput(sink), kind) - ) -} + predicate containerContent = DataFlowPrivate::containerContent/1; -/** - * A dataflow configuration used for finding new sinks. - * The sources are the parameters of the API and the fields of the enclosing type. - * - * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) - * into an existing known sink (then the API itself becomes a sink). - */ -module PropagateToSinkConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node source) { - apiSource(source) and source.getEnclosingCallable() instanceof DataFlowSinkTargetApi + predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and + not exists(DataFlow::Content f | + DataFlowPrivate::readStep(node1, f, node2) and containerContent(f) + ) } - predicate isSink(DataFlow::Node sink) { - exists(string kind | isRelevantSinkKind(kind) and ExternalFlow::sinkNode(sink, kind)) + predicate isField(DataFlow::ContentSet c) { + c instanceof DataFlowUtil::FieldContent or + c instanceof DataFlowUtil::SyntheticFieldContent } - predicate isBarrier(DataFlow::Node node) { - exists(Type t | t = node.getType() and not isRelevantType(t)) + string getSyntheticName(DataFlow::ContentSet c) { + exists(Field f | + not f.isPublic() and + f = c.(DataFlowUtil::FieldContent).getField() and + result = f.getQualifiedName() + ) or - sinkModelSanitizer(node) + result = c.(DataFlowUtil::SyntheticFieldContent).getField() } - DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext } + string printContent(DataFlow::ContentSet c) { + exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() | + result = "Field[" + f.getQualifiedName() + "]" + ) + or + result = "SyntheticField[" + getSyntheticName(c) + "]" + or + c instanceof DataFlowUtil::CollectionContent and result = "Element" + or + c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement" + or + c instanceof DataFlowUtil::MapValueContent and result = "MapValue" + or + c instanceof DataFlowUtil::MapKeyContent and result = "MapKey" + } - predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - isRelevantTaintStep(node1, node2) + predicate partialModel( + Callable api, string package, string type, string extensible, string name, string parameters + ) { + qualifiedName(api, package, type) and + extensible = isExtensible(api) and + name = api.getName() and + parameters = ExternalFlow::paramsString(api) } -} -private module PropagateToSink = TaintTracking::Global; + predicate sourceNode = ExternalFlow::sourceNode/2; -/** - * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. - */ -string captureSink(DataFlowSinkTargetApi api) { - exists(DataFlow::Node src, DataFlow::Node sink, string kind | - PropagateToSink::flow(src, sink) and - ExternalFlow::sinkNode(sink, kind) and - api = src.getEnclosingCallable() and - result = Printing::asSinkModel(api, asInputArgument(src), kind) - ) + predicate sinkNode = ExternalFlow::sinkNode/2; } + +import MakeModelGenerator diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll index 33d176c3d893..dbf08b15e902 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModelsPrinting.qll @@ -1,11 +1,11 @@ private import java as J -private import codeql.mad.modelgenerator.ModelPrinting -private import CaptureModelsSpecific as Specific +private import codeql.mad.modelgenerator.internal.ModelPrinting +private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput private module ModelPrintingLang implements ModelPrintingLangSig { class Callable = J::Callable; - predicate partialModel = Specific::partialModel/6; + predicate partialModel = ModelGeneratorInput::partialModel/6; } import ModelPrintingImpl diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll deleted file mode 100644 index f359d59973ca..000000000000 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModelsSpecific.qll +++ /dev/null @@ -1,380 +0,0 @@ -/** - * Provides predicates related to capturing summary models of the Standard or a 3rd party library. - */ - -private import java as J -private import semmle.code.java.dataflow.internal.DataFlowPrivate -private import semmle.code.java.dataflow.internal.DataFlowUtil as DataFlowUtil -private import semmle.code.java.dataflow.internal.ContainerFlow as ContainerFlow -private import semmle.code.java.dataflow.internal.FlowSummaryImpl as FlowSummaryImpl -private import semmle.code.java.dataflow.internal.ModelExclusions -private import semmle.code.java.dataflow.DataFlow as Df -private import semmle.code.java.dataflow.internal.ContentDataFlow as Cdf -private import semmle.code.java.dataflow.SSA as Ssa -private import semmle.code.java.dataflow.TaintTracking as Tt -import semmle.code.java.dataflow.ExternalFlow as ExternalFlow -import semmle.code.java.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon -import semmle.code.java.dataflow.internal.DataFlowPrivate as DataFlowPrivate -import semmle.code.java.dataflow.internal.DataFlowDispatch as DataFlowDispatch - -module DataFlow = Df::DataFlow; - -module ContentDataFlow = Cdf::ContentDataFlow; - -module TaintTracking = Tt::TaintTracking; - -class Type = J::Type; - -class Unit = J::Unit; - -class Callable = J::Callable; - -class ContentSet = DataFlowUtil::ContentSet; - -private predicate isInfrequentlyUsed(J::CompilationUnit cu) { - cu.getPackage().getName().matches("javax.swing%") or - cu.getPackage().getName().matches("java.awt%") -} - -private predicate relevant(Callable api) { - api.isPublic() and - api.getDeclaringType().isPublic() and - api.fromSource() and - not isUninterestingForModels(api) and - not isInfrequentlyUsed(api.getCompilationUnit()) -} - -private J::Method getARelevantOverride(J::Method m) { - result = m.getAnOverride() and - relevant(result) and - // Other exclusions for overrides. - not m instanceof J::ToStringMethod -} - -/** - * Gets the super implementation of `m` if it is relevant. - * If such a super implementations does not exist, returns `m` if it is relevant. - */ -private J::Callable liftedImpl(J::Callable m) { - ( - result = getARelevantOverride(m) - or - result = m and relevant(m) - ) and - not exists(getARelevantOverride(result)) -} - -private predicate hasManualSummaryModel(Callable api) { - api = any(FlowSummaryImpl::Public::SummarizedCallable sc | sc.applyManualModel()).asCallable() or - api = any(FlowSummaryImpl::Public::NeutralSummaryCallable sc | sc.hasManualModel()).asCallable() -} - -private predicate hasManualSourceModel(Callable api) { - api = any(ExternalFlow::SourceCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSourceCallable sc | sc.hasManualModel()).asCallable() -} - -private predicate hasManualSinkModel(Callable api) { - api = any(ExternalFlow::SinkCallable sc | sc.hasManualModel()) or - api = any(FlowSummaryImpl::Public::NeutralSinkCallable sc | sc.hasManualModel()).asCallable() -} - -/** - * Holds if it is irrelevant to generate models for `api` based on data flow analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ -predicate isUninterestingForDataFlowModels(Callable api) { - api.getDeclaringType() instanceof J::Interface and not exists(api.getBody()) -} - -/** - * A class of callables that are potentially relevant for generating source or - * sink models. - */ -class SourceOrSinkTargetApi extends Callable { - SourceOrSinkTargetApi() { relevant(this) } -} - -/** - * A class of callables that are potentially relevant for generating sink models. - */ -class SinkTargetApi extends SourceOrSinkTargetApi { - SinkTargetApi() { not hasManualSinkModel(this) } -} - -/** - * A class of callables that are potentially relevant for generating source models. - */ -class SourceTargetApi extends SourceOrSinkTargetApi { - SourceTargetApi() { not hasManualSourceModel(this) } -} - -/** - * Holds if it is irrelevant to generate models for `api` based on type-based analysis. - * - * This serves as an extra filter for the `relevant` predicate. - */ -predicate isUninterestingForTypeBasedFlowModels(Callable api) { none() } - -/** - * A class of callables that are potentially relevant for generating summary or - * neutral models. - * - * In the Standard library and 3rd party libraries it is the callables (or callables that have a - * super implementation) that can be called from outside the library itself. - */ -class SummaryTargetApi extends Callable { - private Callable lift; - - SummaryTargetApi() { - lift = liftedImpl(this) and - not hasManualSummaryModel(lift) - } - - /** - * Gets the callable that a model will be lifted to. - */ - Callable lift() { result = lift } - - /** - * Holds if this callable is relevant in terms of generating models. - */ - predicate isRelevant() { relevant(this) } -} - -private string isExtensible(Callable c) { - if c.getDeclaringType().isFinal() then result = "false" else result = "true" -} - -/** - * Holds if the callable `c` is in package `package` - * and is a member of `type`. - */ -private predicate qualifiedName(Callable c, string package, string type) { - exists(RefType t | t = c.getDeclaringType() | - package = t.getCompilationUnit().getPackage().getName() and - type = t.getErasure().(J::RefType).getNestedName() - ) -} - -predicate partialModel( - Callable api, string package, string type, string extensible, string name, string parameters -) { - qualifiedName(api, package, type) and - extensible = isExtensible(api) and - name = api.getName() and - parameters = ExternalFlow::paramsString(api) -} - -predicate isPrimitiveTypeUsedForBulkData(J::Type t) { - t.hasName(["byte", "char", "Byte", "Character"]) -} - -/** - * Holds for type `t` for fields that are relevant as an intermediate - * read or write step in the data flow analysis. - */ -predicate isRelevantType(J::Type t) { - not t instanceof J::TypeClass and - not t instanceof J::EnumType and - not t instanceof J::PrimitiveType and - not t instanceof J::BoxedType and - not t.(J::RefType).getAnAncestor().hasQualifiedName("java.lang", "Number") and - not t.(J::RefType).getAnAncestor().hasQualifiedName("java.nio.charset", "Charset") and - ( - not t.(J::Array).getElementType() instanceof J::PrimitiveType or - isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) - ) and - ( - not t.(J::Array).getElementType() instanceof J::BoxedType or - isPrimitiveTypeUsedForBulkData(t.(J::Array).getElementType()) - ) and - ( - not t.(ContainerFlow::CollectionType).getElementType() instanceof J::BoxedType or - isPrimitiveTypeUsedForBulkData(t.(ContainerFlow::CollectionType).getElementType()) - ) -} - -/** - * Gets the underlying type of the content `c`. - */ -J::Type getUnderlyingContentType(DataFlow::Content c) { - result = c.(DataFlow::FieldContent).getField().getType() or - result = c.(DataFlow::SyntheticFieldContent).getField().getType() -} - -/** - * Gets the MaD string representation of the qualifier. - */ -string qualifierString() { result = "Argument[this]" } - -/** - * Gets the MaD string representation of the parameter `p`. - */ -string parameterAccess(J::Parameter p) { - if - p.getType() instanceof J::Array and - not isPrimitiveTypeUsedForBulkData(p.getType().(J::Array).getElementType()) - then result = "Argument[" + p.getPosition() + "].ArrayElement" - else - if p.getType() instanceof ContainerFlow::ContainerType - then result = "Argument[" + p.getPosition() + "].Element" - else result = "Argument[" + p.getPosition() + "]" -} - -/** - * Gets the MaD string representation of the parameter `p` - * when used in content flow. - */ -string parameterContentAccess(J::Parameter p) { result = "Argument[" + p.getPosition() + "]" } - -class InstanceParameterNode = DataFlow::InstanceParameterNode; - -class ParameterPosition = DataFlowDispatch::ParameterPosition; - -/** - * Gets the MaD string representation of return through parameter at position - * `pos` of callable `c`. - */ -bindingset[c] -string paramReturnNodeAsOutput(Callable c, ParameterPosition pos) { - result = parameterAccess(c.getParameter(pos)) - or - result = qualifierString() and pos = -1 -} - -/** - * Gets the MaD string representation of return through parameter at position - * `pos` of callable `c` for content flow. - */ -bindingset[c] -string paramReturnNodeAsContentOutput(Callable c, ParameterPosition pos) { - result = parameterContentAccess(c.getParameter(pos)) - or - result = qualifierString() and pos = -1 -} - -/** - * Gets the enclosing callable of `ret`. - */ -Callable returnNodeEnclosingCallable(DataFlow::Node ret) { - result = DataFlowImplCommon::getNodeEnclosingCallable(ret).asCallable() -} - -/** - * Holds if `node` is an own instance access. - */ -predicate isOwnInstanceAccessNode(ReturnNode node) { - node.asExpr().(J::ThisAccess).isOwnInstanceAccess() -} - -predicate sinkModelSanitizer(DataFlow::Node node) { - // exclude variable capture jump steps - exists(Ssa::SsaImplicitInit closure | - closure.captures(_) and - node.asExpr() = closure.getAFirstUse() - ) -} - -/** - * Holds if `source` is an api entrypoint relevant for creating sink models. - */ -predicate apiSource(DataFlow::Node source) { - ( - source.asExpr().(J::FieldAccess).isOwnFieldAccess() or - source instanceof DataFlow::ParameterNode - ) and - exists(J::RefType t | - t = source.getEnclosingCallable().getDeclaringType().getAnAncestor() and - not t instanceof J::TypeObject and - t.isPublic() - ) -} - -/** - * Holds if it is not relevant to generate a source model for `api`, even - * if flow is detected from a node within `source` to a sink within `api`. - */ -predicate irrelevantSourceSinkApi(Callable source, SourceTargetApi api) { none() } - -/** - * Gets the MaD input string representation of `source`. - */ -string asInputArgumentSpecific(DataFlow::Node source) { - exists(int pos | - source.(DataFlow::ParameterNode).isParameterOf(_, pos) and - if pos >= 0 then result = "Argument[" + pos + "]" else result = qualifierString() - ) - or - source.asExpr() instanceof J::FieldAccess and - result = qualifierString() -} - -/** - * Holds if `kind` is a relevant sink kind for creating sink models. - */ -bindingset[kind] -predicate isRelevantSinkKind(string kind) { - not kind = "log-injection" and - not kind.matches("regex-use%") and - not kind = "file-content-store" -} - -/** - * Holds if `kind` is a relevant source kind for creating source models. - */ -bindingset[kind] -predicate isRelevantSourceKind(string kind) { any() } - -predicate containerContent = DataFlowPrivate::containerContent/1; - -/** - * Holds if there is a taint step from `node1` to `node2` in content flow. - */ -predicate isAdditionalContentFlowStep(DataFlow::Node node1, DataFlow::Node node2) { - TaintTracking::defaultAdditionalTaintStep(node1, node2, _) and - not exists(DataFlow::Content f | - DataFlowPrivate::readStep(node1, f, node2) and containerContent(f) - ) -} - -/** - * Holds if the content set `c` is a field or a synthetic field. - */ -predicate isField(ContentSet c) { - c instanceof DataFlowUtil::FieldContent or - c instanceof DataFlowUtil::SyntheticFieldContent -} - -/** - * Gets the MaD synthetic name string representation for the content set `c`, if any. - */ -string getSyntheticName(DataFlow::ContentSet c) { - exists(Field f | - not f.isPublic() and - f = c.(DataFlowUtil::FieldContent).getField() and - result = f.getQualifiedName() - ) - or - result = c.(DataFlowUtil::SyntheticFieldContent).getField() -} - -/** - * Gets the MaD string representation of the content set `c`. - */ -string printContent(ContentSet c) { - exists(Field f | f = c.(DataFlowUtil::FieldContent).getField() and f.isPublic() | - result = "Field[" + f.getQualifiedName() + "]" - ) - or - result = "SyntheticField[" + getSyntheticName(c) + "]" - or - c instanceof DataFlowUtil::CollectionContent and result = "Element" - or - c instanceof DataFlowUtil::ArrayContent and result = "ArrayElement" - or - c instanceof DataFlowUtil::MapValueContent and result = "MapValue" - or - c instanceof DataFlowUtil::MapKeyContent and result = "MapKey" -} diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll b/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll deleted file mode 100644 index 5b1a6fc031b0..000000000000 --- a/java/ql/src/utils/modelgenerator/internal/CaptureSummaryFlowQuery.qll +++ /dev/null @@ -1,84 +0,0 @@ -private import CaptureModels - -/** - * Capture fluent APIs that return `this`. - * Example of a fluent API: - * ```java - * public class Foo { - * public Foo someAPI() { - * // some side-effect - * return this; - * } - * } - * ``` - * - * Capture APIs that transfer taint from an input parameter to an output return - * value or parameter. - * Allows a sequence of read steps followed by a sequence of store steps. - * - * Examples: - * - * ```java - * public class Foo { - * private String tainted; - * - * public String returnsTainted() { - * return tainted; - * } - * - * public void putsTaintIntoParameter(List foo) { - * foo.add(tainted); - * } - * } - * ``` - * Captured Models: - * ``` - * p;Foo;true;returnsTainted;;Argument[this];ReturnValue;taint;df-generated - * p;Foo;true;putsTaintIntoParameter;(List);Argument[this];Argument[0];taint;df-generated - * ``` - * - * ```java - * public class Foo { - * private String tainted; - * public void doSomething(String input) { - * tainted = input; - * } - * ``` - * Captured Model: - * ```p;Foo;true;doSomething;(String);Argument[0];Argument[this];taint;df-generated``` - * - * ```java - * public class Foo { - * public String returnData(String tainted) { - * return tainted.substring(0,10) - * } - * } - * ``` - * Captured Model: - * ```p;Foo;true;returnData;;Argument[0];ReturnValue;taint;df-generated``` - * - * ```java - * public class Foo { - * public void addToList(String tainted, List foo) { - * foo.add(tainted); - * } - * } - * ``` - * Captured Model: - * ```p;Foo;true;addToList;;Argument[0];Argument[1];taint;df-generated``` - */ -string captureFlow(DataFlowSummaryTargetApi api) { - result = captureQualifierFlow(api) or - result = captureThroughFlow(api) -} - -/** - * Gets the neutral summary model for `api`, if any. - * A neutral summary model is generated, if we are not generating - * a summary model that applies to `api`. - */ -string captureNoFlow(DataFlowSummaryTargetApi api) { - not exists(DataFlowSummaryTargetApi api0 | exists(captureFlow(api0)) and api0.lift() = api.lift()) and - api.isRelevant() and - result = Printing::asNeutralSummaryModel(api) -} diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll index 3d56dff50726..36aec8053196 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureTypeBasedSummaryModels.qll @@ -1,7 +1,8 @@ private import java private import semmle.code.java.Collections private import semmle.code.java.dataflow.internal.ContainerFlow -private import CaptureModelsSpecific as Specific +private import CaptureModels as CaptureModels +private import CaptureModels::ModelGeneratorInput as ModelGeneratorInput private import CaptureModelsPrinting /** @@ -81,7 +82,7 @@ private predicate localTypeParameter(Callable callable, TypeVariable tv) { private string getAccessPath(Type t) { if t instanceof Array and - not Specific::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType()) + not CaptureModels::isPrimitiveTypeUsedForBulkData(t.(Array).getElementType()) then result = ".ArrayElement" else if t instanceof ContainerType or t instanceof IterableClass @@ -134,7 +135,7 @@ private string implicit(Callable callable, TypeVariable tv) { then access = getAccessPath(decl) else access = getSyntheticField(tv) | - result = Specific::qualifierString() + access + result = ModelGeneratorInput::qualifierString() + access ) } @@ -286,7 +287,7 @@ private predicate output(Callable callable, TypeVariable tv, string output) { module ModelPrintingInput implements ModelPrintingSig { class SummaryApi = TypeBasedFlowTargetApi; - class SourceOrSinkApi = Specific::SourceOrSinkTargetApi; + class SourceOrSinkApi = ModelGeneratorInput::SourceOrSinkTargetApi; string getProvenance() { result = "tb-generated" } } @@ -297,9 +298,7 @@ private module Printing = ModelPrinting; * A class of callables that are relevant generating summaries for based * on the Theorems for Free approach. */ -class TypeBasedFlowTargetApi extends Specific::SummaryTargetApi { - TypeBasedFlowTargetApi() { not Specific::isUninterestingForTypeBasedFlowModels(this) } - +class TypeBasedFlowTargetApi extends ModelGeneratorInput::SummaryTargetApi { /** * Gets the string representation of all type based summaries for `this` * inspired by the Theorems for Free approach. diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql index 3d2a2e07ac6e..4cb7f4604039 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureContentSummaryModels.ql @@ -3,7 +3,7 @@ import utils.modelgenerator.internal.CaptureModels import TestUtilities.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { - string getCapturedModel(Callable c) { result = captureContentFlow(c) } + string getCapturedModel(Callable c) { result = ContentSensitive::captureFlow(c) } string getKind() { result = "contentbased-summary" } } diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql index e68730cc0edd..cdc2dfcaa459 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureNeutralModels.ql @@ -1,5 +1,5 @@ import java -import utils.modelgenerator.internal.CaptureSummaryFlowQuery +import utils.modelgenerator.internal.CaptureModels import TestUtilities.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/java/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql b/java/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql index 415ebab13439..c9e5050fc1fc 100644 --- a/java/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql +++ b/java/ql/test/utils/modelgenerator/dataflow/CaptureSummaryModels.ql @@ -1,5 +1,5 @@ import java -import utils.modelgenerator.internal.CaptureSummaryFlowQuery +import utils.modelgenerator.internal.CaptureModels import TestUtilities.InlineMadTest module InlineMadTestConfig implements InlineMadTestConfigSig { diff --git a/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll new file mode 100644 index 000000000000..f668d80ef1fc --- /dev/null +++ b/shared/mad/codeql/mad/modelgenerator/internal/ModelGeneratorImpl.qll @@ -0,0 +1,939 @@ +/** + * INTERNAL: Do not use. + * + * Provides classes and predicates related to capturing summary, source, + * and sink models of the Standard or a 3rd party library. + */ + +private import codeql.dataflow.DataFlow +private import codeql.dataflow.TaintTracking as Tt +private import codeql.dataflow.internal.ContentDataFlowImpl +private import codeql.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon +private import codeql.util.Location +private import ModelPrinting + +/** + * Provides language-specific model generator parameters. + */ +signature module ModelGeneratorInputSig Lang> { + /** + * A Type. + */ + class Type; + + /** + * A Parameter. + */ + class Parameter; + + /** + * A Callable. + */ + class Callable { + /** + * Gets the number of parameters of this callable. + */ + int getNumberOfParameters(); + + /** + * Gets a string representation of this callable. + */ + string toString(); + } + + /** + * A node. + */ + class NodeExtended extends Lang::Node { + /** + * Gets the type of this node. + */ + Type getType(); + + /** + * Gets the enclosing callable of this node. + */ + Callable getEnclosingCallable(); + + /** + * Gets the enclosing callable of this node, when considered as an expression. + */ + Callable getAsExprEnclosingCallable(); + + /** + * Gets the parameter corresponding to this node, if any. + */ + Parameter asParameter(); + } + + /** + * A class of callables that are potentially relevant for generating summary or + * neutral models. + * + * In the Standard library and 3rd party libraries it is the callables (or callables that have a + * super implementation) that can be called from outside the library itself. + */ + class SummaryTargetApi extends Callable { + /** + * Gets the callable that a model will be lifted to. + * + * The lifted callable is relevant in terms of model + * generation (this is ensured by `liftedImpl`). + */ + Callable lift(); + + /** + * Holds if `this` is relevant in terms of model generation. + */ + predicate isRelevant(); + } + + /** + * A class of callables that are potentially relevant for generating source or + * sink models. + */ + class SourceOrSinkTargetApi extends Callable; + + /** + * A class of callables that are potentially relevant for generating source models. + */ + class SourceTargetApi extends SourceOrSinkTargetApi; + + /** + * A class of callables that are potentially relevant for generating sink models. + */ + class SinkTargetApi extends SourceOrSinkTargetApi; + + /** + * An instance parameter node. + */ + class InstanceParameterNode extends Lang::Node; + + /** + * Holds for type `t` for fields that are relevant as an intermediate + * read or write step in the data flow analysis. + * That is, flow through any data-flow node that does not have a relevant type + * will be excluded. + */ + predicate isRelevantType(Type t); + + /** + * Gets the underlying type of the content `c`. + */ + Type getUnderlyingContentType(Lang::ContentSet c); + + /** + * Gets the MaD string representation of the qualifier. + */ + string qualifierString(); + + /** + * Gets the MaD string representation of the parameter `p`. + */ + string parameterAccess(Parameter p); + + /** + * Gets the MaD string representation of the parameter `p` + * when used in content flow. + */ + string parameterContentAccess(Parameter p); + + /** + * Gets the MaD string representation of return through parameter at position + * `pos` of callable `c`. + */ + bindingset[c] + string paramReturnNodeAsOutput(Callable c, Lang::ParameterPosition p); + + /** + * Gets the MaD string representation of return through parameter at position + * `pos` of callable `c` when used in content flow. + */ + bindingset[c] + string paramReturnNodeAsContentOutput(Callable c, Lang::ParameterPosition pos); + + /** + * Gets the enclosing callable of `ret`. + */ + Callable returnNodeEnclosingCallable(Lang::Node node); + + /** + * Holds if `node` is an own instance access. + */ + predicate isOwnInstanceAccessNode(Lang::ReturnNode node); + + /** + * Holds if `node` is a sanitizer for sink model construction. + */ + predicate sinkModelSanitizer(Lang::Node node); + + /** + * Holds if `source` is an api entrypoint relevant for creating sink models. + */ + predicate apiSource(Lang::Node source); + + /** + * Gets the MaD input string representation of `source`. + */ + string getInputArgument(Lang::Node source); + + /** + * Holds if it is not relevant to generate a source model for `api`, even + * if flow is detected from a node within `source` to a sink within `api`. + */ + bindingset[sourceEnclosing, api] + predicate irrelevantSourceSinkApi(Callable sourceEnclosing, SourceTargetApi api); + + /** + * Holds if `kind` is a relevant sink kind for creating sink models. + */ + bindingset[kind] + predicate isRelevantSinkKind(string kind); + + /** + * Holds if `kind` is a relevant source kind for creating source models. + */ + bindingset[kind] + predicate isRelevantSourceKind(string kind); + + /** + * Holds if the the content `c` is a container. + */ + predicate containerContent(Lang::ContentSet c); + + /** + * Holds if there is a taint step from `node1` to `node2` in content flow. + */ + predicate isAdditionalContentFlowStep(Lang::Node nodeFrom, Lang::Node nodeTo); + + /** + * Holds if the content set `c` is field like. + */ + predicate isField(Lang::ContentSet c); + + /** + * Gets the MaD synthetic name string representation for the content set `c`, if any. + */ + string getSyntheticName(Lang::ContentSet c); + + /** + * Gets the MaD string representation of the content set `c`. + */ + string printContent(Lang::ContentSet c); + + /** + * Holds if it is irrelevant to generate models for `api` based on data flow analysis. + * + * This serves as an extra filter for the `relevant` predicate. + */ + predicate isUninterestingForDataFlowModels(Callable api); + + /** + * Holds if `namespace`, `type`, `extensible`, `name` and `parameters` are string representations + * for the corresponding MaD columns for `api`. + */ + predicate partialModel( + Callable api, string namespace, string type, string extensible, string name, string parameters + ); + + /** + * Holds if `node` is specified as a source with the given kind in a MaD flow + * model. + */ + predicate sourceNode(Lang::Node node, string kind); + + /** + * Holds if `node` is specified as a sink with the given kind in a MaD flow + * model. + */ + predicate sinkNode(Lang::Node node, string kind); +} + +module MakeModelGenerator< + LocationSig Location, InputSig Lang, Tt::InputSig TaintLang, + ModelGeneratorInputSig ModelGeneratorInput> +{ + private module DataFlow { + import Lang + import DataFlowMake + import DataFlowImplCommon::MakeImplCommon + } + + private import ModelGeneratorInput + private import Tt::TaintFlowMake as TaintTracking + + private module ModelPrintingLang implements ModelPrintingLangSig { + class Callable = ModelGeneratorInput::Callable; + + predicate partialModel = ModelGeneratorInput::partialModel/6; + } + + private import ModelPrintingImpl as Printing + + final private class NodeExtendedFinal = NodeExtended; + + /** + * A node from which flow can return to the caller. This is either a regular + * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + */ + private class ReturnNodeExt extends NodeExtendedFinal { + private DataFlow::ReturnKindExt kind; + + ReturnNodeExt() { + kind = DataFlow::getValueReturnPosition(this).getKind() or + kind = DataFlow::getParamReturnPosition(this, _).getKind() + } + + /** + * Gets the kind of the return node. + */ + DataFlow::ReturnKindExt getKind() { result = kind } + } + + bindingset[c] + private signature string printCallableParamSig(Callable c, DataFlow::ParameterPosition p); + + private module PrintReturnNodeExt { + string getOutput(ReturnNodeExt node) { + node.getKind() instanceof DataFlow::ValueReturnKind and + result = "ReturnValue" + or + exists(DataFlow::ParameterPosition pos | + pos = node.getKind().(DataFlow::ParamUpdateReturnKind).getPosition() and + result = printCallableParam(returnNodeEnclosingCallable(node), pos) + ) + } + } + + string getOutput(ReturnNodeExt node) { + result = PrintReturnNodeExt::getOutput(node) + } + + final private class SummaryTargetApiFinal = SummaryTargetApi; + + class DataFlowSummaryTargetApi extends SummaryTargetApiFinal { + DataFlowSummaryTargetApi() { not isUninterestingForDataFlowModels(this) } + } + + class DataFlowSourceTargetApi = SourceTargetApi; + + class DataFlowSinkTargetApi = SinkTargetApi; + + private module ModelPrintingInput implements Printing::ModelPrintingSig { + class SummaryApi = DataFlowSummaryTargetApi; + + class SourceOrSinkApi = SourceOrSinkTargetApi; + + string getProvenance() { result = "df-generated" } + } + + module ModelPrinting = Printing::ModelPrinting; + + /** + * Holds if `c` is a relevant content kind, where the underlying type is relevant. + */ + private predicate isRelevantTypeInContent(DataFlow::ContentSet c) { + isRelevantType(getUnderlyingContentType(c)) + } + + /** + * Holds if data can flow from `node1` to `node2` either via a read or a write of an intermediate field `f`. + */ + private predicate isRelevantTaintStep(DataFlow::Node node1, DataFlow::Node node2) { + exists(DataFlow::ContentSet f | + DataFlow::readStep(node1, f, node2) and + // Partially restrict the content types used for intermediate steps. + (not exists(getUnderlyingContentType(f)) or isRelevantTypeInContent(f)) + ) + or + exists(DataFlow::ContentSet f | DataFlow::storeStep(node1, f, node2) | containerContent(f)) + } + + /** + * Holds if content `c` is either a field, a synthetic field or language specific + * content of a relevant type or a container like content. + */ + pragma[nomagic] + private predicate isRelevantContent0(DataFlow::ContentSet c) { + isRelevantTypeInContent(c) or + containerContent(c) + } + + /** + * Gets the MaD string representation of the parameter node `p`. + */ + string parameterNodeAsInput(DataFlow::ParameterNode p) { + result = parameterAccess(p.(NodeExtended).asParameter()) + or + result = qualifierString() and p instanceof InstanceParameterNode + } + + /** + * Gets the MaD input string representation of `source`. + */ + string asInputArgument(NodeExtended source) { result = getInputArgument(source) } + + /** + * Gets the summary model of `api`, if it follows the `fluent` programming pattern (returns `this`). + */ + private string captureQualifierFlow(DataFlowSummaryTargetApi api) { + exists(ReturnNodeExt ret | + api = returnNodeEnclosingCallable(ret) and + isOwnInstanceAccessNode(ret) + ) and + result = ModelPrinting::asLiftedValueModel(api, qualifierString(), "ReturnValue") + } + + private int accessPathLimit0() { result = 2 } + + private newtype TTaintState = + TTaintRead(int n) { n in [0 .. accessPathLimit0()] } or + TTaintStore(int n) { n in [1 .. accessPathLimit0()] } + + abstract private class TaintState extends TTaintState { + abstract string toString(); + } + + /** + * A FlowState representing a tainted read. + */ + private class TaintRead extends TaintState, TTaintRead { + private int step; + + TaintRead() { this = TTaintRead(step) } + + /** + * Gets the flow state step number. + */ + int getStep() { result = step } + + override string toString() { result = "TaintRead(" + step + ")" } + } + + /** + * A FlowState representing a tainted write. + */ + private class TaintStore extends TaintState, TTaintStore { + private int step; + + TaintStore() { this = TTaintStore(step) } + + /** + * Gets the flow state step number. + */ + int getStep() { result = step } + + override string toString() { result = "TaintStore(" + step + ")" } + } + + /** + * A data-flow configuration for tracking flow through APIs. + * The sources are the parameters of an API and the sinks are the return values (excluding `this`) and parameters. + * + * This can be used to generate Flow summaries for APIs from parameter to return. + */ + module PropagateFlowConfig implements DataFlow::StateConfigSig { + class FlowState = TaintState; + + predicate isSource(DataFlow::Node source, FlowState state) { + source instanceof DataFlow::ParameterNode and + source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSummaryTargetApi and + state.(TaintRead).getStep() = 0 + } + + predicate isSink(DataFlow::Node sink, FlowState state) { + sink instanceof ReturnNodeExt and + not isOwnInstanceAccessNode(sink) and + not exists(captureQualifierFlow(sink.(NodeExtended).getAsExprEnclosingCallable())) and + (state instanceof TaintRead or state instanceof TaintStore) + } + + predicate isAdditionalFlowStep( + DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + ) { + exists(DataFlow::ContentSet c | + DataFlow::store(node1, c.getAStoreContent(), node2, _, _) and + isRelevantContent0(c) and + ( + state1 instanceof TaintRead and state2.(TaintStore).getStep() = 1 + or + state1.(TaintStore).getStep() + 1 = state2.(TaintStore).getStep() + ) + ) + or + exists(DataFlow::ContentSet c | + DataFlow::readStep(node1, c, node2) and + isRelevantContent0(c) and + state1.(TaintRead).getStep() + 1 = state2.(TaintRead).getStep() + ) + } + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureEqualSourceSinkCallContext + } + } + + module PropagateFlow = TaintTracking::GlobalWithState; + + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + */ + string captureThroughFlow0( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt + ) { + exists(string input, string output | + p.(NodeExtended).getEnclosingCallable() = api and + returnNodeExt.getEnclosingCallable() = api and + input = parameterNodeAsInput(p) and + output = getOutput(returnNodeExt) and + input != output and + result = ModelPrinting::asLiftedTaintModel(api, input, output) + ) + } + + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to return value or parameter. + */ + private string captureThroughFlow(DataFlowSummaryTargetApi api) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + PropagateFlow::flow(p, returnNodeExt) and + result = captureThroughFlow0(api, p, returnNodeExt) + ) + } + + /** + * Gets the summary model(s) of `api`, if there is flow from parameters to the + * return value or parameter or if `api` is a fluent API. + */ + string captureFlow(DataFlowSummaryTargetApi api) { + result = captureQualifierFlow(api) or + result = captureThroughFlow(api) + } + + /** + * Gets the neutral summary model for `api`, if any. + * A neutral summary model is generated, if we are not generating + * a summary model that applies to `api`. + */ + string captureNoFlow(DataFlowSummaryTargetApi api) { + not exists(DataFlowSummaryTargetApi api0 | + exists(captureFlow(api0)) and api0.lift() = api.lift() + ) and + api.isRelevant() and + result = ModelPrinting::asNeutralSummaryModel(api) + } + + /** + * Provides classes and predicates related to capturing summary models + * based on content data flow. + */ + module ContentSensitive { + private import MakeImplContentDataFlow as ContentDataFlow + + private module PropagateContentFlowConfig implements ContentDataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + source instanceof DataFlow::ParameterNode and + source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSummaryTargetApi + } + + predicate isSink(DataFlow::Node sink) { + sink.(ReturnNodeExt).getEnclosingCallable() instanceof DataFlowSummaryTargetApi + } + + predicate isAdditionalFlowStep = isAdditionalContentFlowStep/2; + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + int accessPathLimit() { result = 2 } + + predicate isRelevantContent(DataFlow::ContentSet s) { isRelevantContent0(s) } + + DataFlow::FlowFeature getAFeature() { + result instanceof DataFlow::FeatureEqualSourceSinkCallContext + } + } + + private module PropagateContentFlow = ContentDataFlow::Global; + + private string getContentOutput(ReturnNodeExt node) { + result = PrintReturnNodeExt::getOutput(node) + } + + /** + * Gets the MaD string representation of the parameter `p` + * when used in content flow. + */ + private string parameterNodeAsContentInput(DataFlow::ParameterNode p) { + result = parameterContentAccess(p.(NodeExtended).asParameter()) + or + result = qualifierString() and p instanceof InstanceParameterNode + } + + private string getContent(PropagateContentFlow::AccessPath ap, int i) { + exists(DataFlow::ContentSet head, PropagateContentFlow::AccessPath tail | + head = ap.getHead() and + tail = ap.getTail() + | + i = 0 and + result = "." + printContent(head) + or + i > 0 and result = getContent(tail, i - 1) + ) + } + + /** + * Gets the MaD string representation of a store step access path. + */ + private string printStoreAccessPath(PropagateContentFlow::AccessPath ap) { + result = concat(int i | | getContent(ap, i), "" order by i) + } + + /** + * Gets the MaD string representation of a read step access path. + */ + private string printReadAccessPath(PropagateContentFlow::AccessPath ap) { + result = concat(int i | | getContent(ap, i), "" order by i desc) + } + + /** + * Holds if the access path `ap` contains a field or synthetic field access. + */ + private predicate mentionsField(PropagateContentFlow::AccessPath ap) { + exists(DataFlow::ContentSet head, PropagateContentFlow::AccessPath tail | + head = ap.getHead() and + tail = ap.getTail() + | + mentionsField(tail) or isField(head) + ) + } + + private predicate apiFlow( + DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue + ) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + returnNodeExt.getEnclosingCallable() = api and + p.(NodeExtended).getEnclosingCallable() = api + } + + /** + * A class of APIs relevant for modeling using content flow. + * The following heuristic is applied: + * Content flow is only relevant for an API, if + * #content flow <= 2 * #parameters + 3 + * If an API produces more content flow, it is likely that + * 1. Types are not sufficiently constrained leading to a combinatorial + * explosion in dispatch and thus in the generated summaries. + * 2. It is a reasonable approximation to use the non-content based flow + * detection instead, as reads and stores would use a significant + * part of an objects internal state. + */ + private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi { + ContentDataFlowSummaryTargetApi() { + count(string input, string output | + exists( + DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, + ReturnNodeExt returnNodeExt, PropagateContentFlow::AccessPath stores + | + apiFlow(this, p, reads, returnNodeExt, stores, _) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) + ) + ) <= 2 * this.getNumberOfParameters() + 3 + } + } + + pragma[nomagic] + private predicate apiContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath stores, boolean preservesValue + ) { + PropagateContentFlow::flow(p, reads, returnNodeExt, stores, preservesValue) and + returnNodeExt.getEnclosingCallable() = api and + p.(NodeExtended).getEnclosingCallable() = api + } + + /** + * Holds if any of the content sets in `path` translates into a synthetic field. + */ + private predicate hasSyntheticContent(PropagateContentFlow::AccessPath path) { + exists(PropagateContentFlow::AccessPath tail, DataFlow::ContentSet head | + head = path.getHead() and + tail = path.getTail() + | + exists(getSyntheticName(head)) or + hasSyntheticContent(tail) + ) + } + + /** + * A module containing predicates for validating access paths containing content sets + * that translates into synthetic fields, when used for generated summary models. + */ + private module AccessPathSyntheticValidation { + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`). + */ + private predicate step( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + exists(DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt | + p.(NodeExtended).getType() = t1 and + returnNodeExt.getType() = t2 and + apiContentFlow(_, p, read, returnNodeExt, store, _) + ) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does. + * + * Step A -> Synth. + */ + private predicate synthPathEntry( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + not hasSyntheticContent(read) and + hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists an API that has content flow from `read` (on type `t1`) + * to `store` (on type `t2`), where `read` has synthetic content + * and `store` does not. + * + * Step Synth -> A. + */ + private predicate synthPathExit( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + hasSyntheticContent(read) and + not hasSyntheticContent(store) and + step(t1, read, t2, store) + } + + /** + * Holds if there exists a path of steps from `read` to an exit. + * + * read ->* Synth -> A + */ + private predicate reachesSynthExit(Type t, PropagateContentFlow::AccessPath read) { + synthPathExit(t, read, _, _) + or + hasSyntheticContent(read) and + exists(PropagateContentFlow::AccessPath mid, Type midType | + hasSyntheticContent(mid) and + step(t, read, midType, mid) and + reachesSynthExit(midType, mid.reverse()) + ) + } + + /** + * Holds if there exists a path of steps from an entry to `store`. + * + * A -> Synth ->* store + */ + private predicate synthEntryReaches(Type t, PropagateContentFlow::AccessPath store) { + synthPathEntry(_, _, t, store) + or + hasSyntheticContent(store) and + exists(PropagateContentFlow::AccessPath mid, Type midType | + hasSyntheticContent(mid) and + step(midType, mid, t, store) and + synthEntryReaches(midType, mid.reverse()) + ) + } + + /** + * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`) + * contain content that will be translated into a synthetic field, when being used in + * a MaD summary model, and if there is a range of APIs, such that + * when chaining their flow access paths, there exists access paths `A` and `B` where + * A ->* read -> store ->* B and where `A` and `B` do not contain content that will + * be translated into a synthetic field. + * + * This is needed because we don't want to include summaries that reads from or + * stores into an "internal" synthetic field. + * + * Example: + * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and + * `setX`, which gets and sets a private field `X` on `t`. + * This would lead to the following content flows + * getX : Argument[this].SyntheticField[t.X] -> ReturnValue. + * setX : Argument[0] -> Argument[this].SyntheticField[t.X] + * As the reads and stores are on synthetic fields we should only make summaries + * if both of these methods exist. + */ + pragma[nomagic] + predicate acceptReadStore( + Type t1, PropagateContentFlow::AccessPath read, Type t2, + PropagateContentFlow::AccessPath store + ) { + synthPathEntry(t1, read, t2, store) and reachesSynthExit(t2, store.reverse()) + or + exists(PropagateContentFlow::AccessPath store0 | store0.reverse() = read | + synthEntryReaches(t1, store0) and synthPathExit(t1, read, t2, store) + or + synthEntryReaches(t1, store0) and + step(t1, read, t2, store) and + reachesSynthExit(t2, store.reverse()) + ) + } + } + + /** + * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`. + * Flow is considered relevant, + * 1. If `read` or `store` do not contain a content set that translates into a synthetic field. + * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if + * the synthetic content is "live" on the relevant declaring type. + */ + private predicate apiRelevantContentFlow( + ContentDataFlowSummaryTargetApi api, DataFlow::ParameterNode p, + PropagateContentFlow::AccessPath read, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath store, boolean preservesValue + ) { + apiContentFlow(api, p, read, returnNodeExt, store, preservesValue) and + ( + not hasSyntheticContent(read) and not hasSyntheticContent(store) + or + AccessPathSyntheticValidation::acceptReadStore(p.(NodeExtended).getType(), read, + returnNodeExt.getType(), store) + ) + } + + pragma[nomagic] + private predicate captureFlow0( + ContentDataFlowSummaryTargetApi api, string input, string output, boolean preservesValue, + boolean lift + ) { + exists( + DataFlow::ParameterNode p, ReturnNodeExt returnNodeExt, + PropagateContentFlow::AccessPath reads, PropagateContentFlow::AccessPath stores + | + apiRelevantContentFlow(api, p, reads, returnNodeExt, stores, preservesValue) and + input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and + output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and + input != output and + (if mentionsField(reads) or mentionsField(stores) then lift = false else lift = true) + ) + } + + /** + * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to + * the return value or a parameter). + * + * Models are lifted to the best type in case the read and store access paths do not + * contain a field or synthetic field access. + */ + string captureFlow(ContentDataFlowSummaryTargetApi api) { + exists(string input, string output, boolean lift, boolean preservesValue | + captureFlow0(api, input, output, _, lift) and + preservesValue = max(boolean p | captureFlow0(api, input, output, p, lift)) and + result = ModelPrinting::asModel(api, input, output, preservesValue, lift) + ) + } + } + + /** + * A dataflow configuration used for finding new sources. + * The sources are the already known existing sources and the sinks are the API return nodes. + * + * This can be used to generate Source summaries for an API, if the API expose an already known source + * via its return (then the API itself becomes a source). + */ + module PropagateFromSourceConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + exists(string kind | + isRelevantSourceKind(kind) and + sourceNode(source, kind) + ) + } + + predicate isSink(DataFlow::Node sink) { + sink.(ReturnNodeExt).getEnclosingCallable() instanceof DataFlowSourceTargetApi + } + + DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSinkCallContext } + + predicate isBarrier(DataFlow::Node n) { + exists(Type t | t = n.(NodeExtended).getType() and not isRelevantType(t)) + } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isRelevantTaintStep(node1, node2) + } + } + + private module PropagateFromSource = TaintTracking::Global; + + /** + * Gets the source model(s) of `api`, if there is flow from an existing known source to the return of `api`. + */ + string captureSource(DataFlowSourceTargetApi api) { + exists(NodeExtended source, ReturnNodeExt sink, string kind | + PropagateFromSource::flow(source, sink) and + sourceNode(source, kind) and + api = sink.getEnclosingCallable() and + not irrelevantSourceSinkApi(source.getEnclosingCallable(), api) and + result = ModelPrinting::asSourceModel(api, getOutput(sink), kind) + ) + } + + /** + * A dataflow configuration used for finding new sinks. + * The sources are the parameters of the API and the fields of the enclosing type. + * + * This can be used to generate Sink summaries for APIs, if the API propagates a parameter (or enclosing type field) + * into an existing known sink (then the API itself becomes a sink). + */ + module PropagateToSinkConfig implements DataFlow::ConfigSig { + predicate isSource(DataFlow::Node source) { + apiSource(source) and + source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSinkTargetApi + } + + predicate isSink(DataFlow::Node sink) { + exists(string kind | isRelevantSinkKind(kind) and sinkNode(sink, kind)) + } + + predicate isBarrier(DataFlow::Node node) { + exists(Type t | t = node.(NodeExtended).getType() and not isRelevantType(t)) + or + sinkModelSanitizer(node) + } + + DataFlow::FlowFeature getAFeature() { result instanceof DataFlow::FeatureHasSourceCallContext } + + predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + isRelevantTaintStep(node1, node2) + } + } + + private module PropagateToSink = TaintTracking::Global; + + /** + * Gets the sink model(s) of `api`, if there is flow from a parameter to an existing known sink. + */ + string captureSink(DataFlowSinkTargetApi api) { + exists(NodeExtended src, NodeExtended sink, string kind | + PropagateToSink::flow(src, sink) and + sinkNode(sink, kind) and + api = src.getEnclosingCallable() and + result = ModelPrinting::asSinkModel(api, asInputArgument(src), kind) + ) + } +} diff --git a/shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll b/shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll similarity index 100% rename from shared/mad/codeql/mad/modelgenerator/ModelPrinting.qll rename to shared/mad/codeql/mad/modelgenerator/internal/ModelPrinting.qll diff --git a/shared/mad/qlpack.yml b/shared/mad/qlpack.yml index 9933a7eef31b..e1bb442e4aee 100644 --- a/shared/mad/qlpack.yml +++ b/shared/mad/qlpack.yml @@ -4,4 +4,5 @@ groups: shared library: true dependencies: codeql/util: ${workspace} + codeql/dataflow: ${workspace} warnOnImplicitThis: true