From 8e5d23fc1cfc12b2f8f9df2430133b2129db52bf Mon Sep 17 00:00:00 2001 From: Asger F Date: Mon, 8 Apr 2024 11:53:59 +0200 Subject: [PATCH] Ruby: WIP instantiation for Ruby --- ruby/ql/lib/codeql/ruby/ApiGraphs.qll | 18 ++- .../ruby/frameworks/data/ModelsAsData.qll | 114 ++++++++++++++++++ .../data/internal/ApiGraphModelsSpecific.qll | 52 ++++++++ .../ruby/typetracking/ApiGraphShared.qll | 2 + ruby/ql/src/queries/modeling/GenerateModel.ql | 26 +++- .../utils/modeleditor/GenerateModel.expected | 5 +- 6 files changed, 208 insertions(+), 9 deletions(-) diff --git a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll index cc887a9a05c7..580fe3de8214 100644 --- a/ruby/ql/lib/codeql/ruby/ApiGraphs.qll +++ b/ruby/ql/lib/codeql/ruby/ApiGraphs.qll @@ -1047,15 +1047,29 @@ module API { import MkShared - /** Gets the API node corresponding to the module/class object for `mod`. */ + /** Gets the API node corresponding to the module/class object for `mod`, with epsilon edges to descendent modules/classes. */ bindingset[mod] pragma[inline_late] Node getModuleNode(DataFlow::ModuleNode mod) { result = Impl::MkModuleObjectDown(mod) } - /** Gets the API node corresponding to instances of `mod`. */ + /** Gets the API node corresponding to instances of `mod`, with epsilon edges to instances of descendent modules/classes. */ bindingset[mod] pragma[inline_late] Node getModuleInstance(DataFlow::ModuleNode mod) { result = getModuleNode(mod).getInstance() } + + /** Gets the API node corresponding to instances of `mod` with epsilon edges to ancestor modules/classes. */ + bindingset[mod] + pragma[inline_late] + Node getModuleNodeUp(DataFlow::ModuleNode mod) { result = Impl::MkModuleObjectUp(mod) } + + /** Gets the API node corresponding to instances of `mod`, with epsilon edges to instances of ancestor modules/classes. */ + bindingset[mod] + pragma[inline_late] + Node getModuleInstanceUp(DataFlow::ModuleNode mod) { + result = getModuleNodeUp(mod).getInstance() + } + + import Impl } private import Internal diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll index 4d57191dc1ed..ab03eba22ac1 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/ModelsAsData.qll @@ -60,3 +60,117 @@ private class SummarizedCallableFromModel extends SummarizedCallable { ) } } + +/** + * Specifies which parts of the API graph to export in `ModelExport`. + */ +signature module ModelExportSig { + /** + * Holds if the exported model should contain `node`, if it is publicly accessible. + * + * This ensures that all ways to access `node` will be exported in type models. + */ + predicate shouldContain(API::Node node); + + /** + * Holds if a named must be generated for `node` if it is to be included in the exported graph. + */ + default predicate mustBeNamed(API::Node node) { none() } + + /** + * Holds if the exported model should preserve all paths leading to an instance of `type`, + * including partial ones. It does not need to be closed transitively, `ModelExport` will + * extend this to include type models from which `type` can be derived. + */ + default predicate shouldContainType(string type) { none() } +} + +/** + * Module for exporting type models for a given set of nodes in the API graph. + */ +module ModelExport { + private import codeql.mad.dynamic.GraphExport + private import internal.ApiGraphModelsExport + + private module GraphExportConfig implements GraphExportSig { + predicate edge = Specific::apiGraphHasEdge/3; + + predicate shouldContain = S::shouldContain/1; + + predicate shouldNotContain(API::Node node) { + // Only export def-nodes, exclude use-nodes + node instanceof API::Internal::MkModuleObjectDown + or + node instanceof API::Internal::MkModuleInstanceDown + or + node instanceof API::Internal::MkForwardNode + or + node instanceof API::Internal::MkMethodAccessNode + } + + predicate mustBeNamed(API::Node node) { S::mustBeNamed(node) } + + predicate exposedName(API::Node node, string type, string path) { + path = "" and + exists(DataFlow::ModuleNode mod | + node = API::Internal::MkModuleObjectUp(mod) and + type = mod.getQualifiedName() + "!" + or + node = API::Internal::MkModuleInstanceUp(mod) and + type = mod.getQualifiedName() + ) + } + + private string suggestedMethodName(DataFlow::MethodNode method) { + exists(DataFlow::ModuleNode mod, string name | + method = mod.getOwnSingletonMethod(name) and + result = mod.getQualifiedName() + "." + name + or + method = mod.getOwnInstanceMethod(name) and + result = mod.getQualifiedName() + "#" + name + ) + } + + predicate suggestedName(API::Node node, string type) { + // exists(DataFlow::MethodNode method | + // node.asSink() = method.getAReturnNode() and type = suggestedMethodName(method) + "()" + // ) + none() + } + + bindingset[host] + predicate hasTypeSummary(API::Node host, string path) { + exists(string methodName | + methodReturnsReceiver(host.getMethod(methodName).asCallable()) and + path = "Method[" + methodName + "].ReturnValue" + ) + } + + pragma[nomagic] + private predicate methodReturnsReceiver(DataFlow::MethodNode func) { + getAReceiverRef(func).flowsTo(func.getAReturnNode()) + } + + pragma[nomagic] + private DataFlow::CallNode getAReceiverCall(DataFlow::MethodNode func) { + result = getAReceiverRef(func).getAMethodCall() + } + + pragma[nomagic] + private predicate callReturnsReceiver(DataFlow::CallNode call) { + methodReturnsReceiver(call.getATarget()) + } + + pragma[nomagic] + private DataFlow::LocalSourceNode getAReceiverRef(DataFlow::MethodNode func) { + result = func.getSelfParameter() + or + result = getAReceiverCall(func) and + callReturnsReceiver(result) + } + } + + private module ExportedGraph = TypeGraphExport; + + import ExportedGraph +} diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll index e4359f6d4ca7..600521a5f57d 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModelsSpecific.qll @@ -27,6 +27,7 @@ import codeql.ruby.ApiGraphs import codeql.ruby.DataFlow::DataFlow as DataFlow private import FlowSummaryImpl::Public private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatch +import codeql.Locations // re-export Location pragma[nomagic] private predicate isUsedTopLevelConstant(string name) { @@ -248,3 +249,54 @@ predicate isExtraValidTokenArgumentInIdentifyingAccessPath(string name, string a } module ModelOutputSpecific { } + +/** + * Holds if the value of `source` is exposed at `sink`. + */ +bindingset[source] +predicate sourceFlowsToSink(API::Node source, API::Node sink) { + // TODO: also establish subclass relationship + source.getAValueReachableFromSource() = sink.asSink() +} + +/** + * Holds if the edge `pred -> succ` labelled with `path` exists in the API graph. + */ +bindingset[pred] +predicate apiGraphHasEdge(API::Node pred, string path, API::Node succ) { + exists(string name | + API::Internal::methodEdge(pred, name, succ) and path = "Method[" + name + "]" + ) + or + API::Internal::elementEdge(pred, succ) and path = "Element" + or + API::Internal::instanceEdge(pred, succ) and path = "Instance" + or + API::Internal::returnEdge(pred, succ) and path = "ReturnValue" + or + exists(DataFlowDispatch::ArgumentPosition pos | + not pos.isSelf() and + API::Internal::argumentEdge(pred, pos, succ) and + path = "Argument[" + FlowSummaryImpl::Input::encodeArgumentPosition(pos) + "]" + ) + or + exists(DataFlowDispatch::ParameterPosition pos | + not pos.isSelf() and + API::Internal::parameterEdge(pred, pos, succ) and + path = "Parameter[" + FlowSummaryImpl::Input::encodeParameterPosition(pos) + "]" + ) + or + path = "" and + API::Internal::epsilonEdge(pred, succ) +} + +pragma[nomagic] +private predicate inheritanceEdge(API::Node pred, API::Node succ) { + exists(DataFlow::ModuleNode mod | + pred = API::Internal::getModuleNodeUp(mod) and + succ = API::Internal::getModuleNodeUp(mod.getAnImmediateAncestor()) + or + pred = API::Internal::getModuleInstanceUp(mod) and + succ = API::Internal::getModuleInstanceUp(mod.getAnImmediateAncestor()) + ) +} diff --git a/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll b/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll index 7215116e8ef3..3fe19975ae22 100644 --- a/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll +++ b/ruby/ql/lib/codeql/ruby/typetracking/ApiGraphShared.qll @@ -144,6 +144,8 @@ module ApiGraphShared { private import Cached + predicate epsilonEdge = Cached::epsilonEdge/2; + /** Gets an API node corresponding to the end of forward-tracking to `localSource`. */ pragma[nomagic] private ApiNode forwardEndNode(DataFlow::LocalSourceNode localSource) { diff --git a/ruby/ql/src/queries/modeling/GenerateModel.ql b/ruby/ql/src/queries/modeling/GenerateModel.ql index c7811f12c66b..8b7ef14c6946 100644 --- a/ruby/ql/src/queries/modeling/GenerateModel.ql +++ b/ruby/ql/src/queries/modeling/GenerateModel.ql @@ -8,11 +8,23 @@ private import internal.Types private import internal.Summaries +private import codeql.ruby.ApiGraphs +private import codeql.ruby.DataFlow +private import codeql.ruby.frameworks.data.ModelsAsData -/** - * Holds if `(type2, path)` should be seen as an instance of `type1`. - */ -query predicate typeModel = Types::typeModel/3; +module ModelExportConfig implements ModelExportSig { + predicate shouldContain(API::Node node) { + exists(DataFlow::MethodNode method | node = method.backtrack()) + } + + predicate shouldContainType(string type) { + type = any(DataFlow::ModuleNode mod).getQualifiedName() + ["", "!"] + } +} + +module ExportedModel = ModelExport; + +query predicate typeModel = ExportedModel::typeModel/3; /** * Holds if the value at `(type, path)` should be seen as a flow @@ -35,7 +47,11 @@ query predicate sinkModel(string type, string path, string kind) { none() } * `kind` should be either `value` or `taint`, for value-preserving or taint-preserving steps, * respectively. */ -query predicate summaryModel = Summaries::summaryModel/5; +query predicate summaryModel(string type, string path, string input, string output, string kind) { + Summaries::summaryModel(type, path, input, output, kind) + or + ExportedModel::summaryModel(type, path, input, output, kind) +} /** * Holds if `path` can be substituted for a token `TypeVar[name]`. diff --git a/ruby/ql/test/query-tests/utils/modeleditor/GenerateModel.expected b/ruby/ql/test/query-tests/utils/modeleditor/GenerateModel.expected index 284c7ed17d57..08e260f1257e 100644 --- a/ruby/ql/test/query-tests/utils/modeleditor/GenerateModel.expected +++ b/ruby/ql/test/query-tests/utils/modeleditor/GenerateModel.expected @@ -1,7 +1,8 @@ sourceModel sinkModel +summaryModel +| A! | Method[new] | Argument[0] | ReturnValue | value | typeVariableModel typeModel | M1 | B | | -summaryModel -| A! | Method[new] | Argument[0] | ReturnValue | value | +| M1! | B! | |