Skip to content

Introduce subassembly offset output artifact #15710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Language Features:


Compiler Features:
* Commandline Interface: Add ``--assembly-structure`` option to provide information about assemblies and nested assemblies.
* Standard JSON Interface: Add ``evm.bytecode.assemblyStructure`` output that provides information about assemblies and nested assemblies.


Bugfixes:
Expand Down
31 changes: 31 additions & 0 deletions docs/using-the-compiler.rst
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ Input Description
// transientStorageLayout - Slots, offsets and types of the contract's state variables in transient storage.
// evm.assembly - New assembly format
// evm.legacyAssembly - Old-style assembly format in JSON
// evm.bytecode.assemblyStructure - Structure of the bytecode, providing offsets, lengths and creation indicators.
// evm.bytecode.functionDebugData - Debugging information at function level
// evm.bytecode.object - Bytecode object
// evm.bytecode.opcodes - Opcodes list
Expand Down Expand Up @@ -603,6 +604,36 @@ Output Description
"legacyAssembly": {},
// Bytecode and related details.
"bytecode": {
// Structure tree of the bytecode, providing information about the root (top-level) assembly including
// its nested assemblies.
"assemblyStructure": {
// Indicates whether assembly in question in creation or runtime.
"isCreation": true,
// Size of the assembly in bytes.
"length": 1595,
// Start position of the assembly relatives to its parent.
"start": 0,
// List of nested assemblies (sub assemblies).
"subAssemblies": [
{
"isCreation": false,
"length": 684,
"start": 877
},
{
"isCreation": true,
"length": 34,
"start": 1561,
"subAssemblies": [
{
"isCreation": false,
"length": 8,
"start": 26
}
]
}
]
},
// Debugging data at the level of functions.
"functionDebugData": {
// Now follows a set of functions including compiler-internal and
Expand Down
15 changes: 10 additions & 5 deletions libevmasm/Assembly.cpp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be implemented for EOF as well (i.e. in assembleEOF(), or, if possible, just in assemble() covering both with the same code).

Since we're at the stage where EOF is passing semantic tests (and they will be enabled by default quite soon), we should start requiring all new features for work on EOF as well.

Original file line number Diff line number Diff line change
Expand Up @@ -1257,12 +1257,11 @@ LinkerObject const& Assembly::assembleLegacy() const
solAssert(m_assembledObject.linkReferences.empty());

LinkerObject& ret = m_assembledObject;

size_t subTagSize = 1;
std::map<u256, LinkerObject::ImmutableRefs> immutableReferencesBySub;
for (auto const& sub: m_subs)
{
auto const& linkerObject = sub->assemble();
auto const& linkerObject = sub->assembleLegacy();
if (!linkerObject.immutableReferences.empty())
{
assertThrow(
Expand Down Expand Up @@ -1316,7 +1315,7 @@ LinkerObject const& Assembly::assembleLegacy() const

unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast<unsigned>(m_auxiliaryData.size());
for (auto const& sub: m_subs)
bytesRequiredIncludingData += static_cast<unsigned>(sub->assemble().bytecode.size());
bytesRequiredIncludingData += static_cast<unsigned>(sub->assembleLegacy().bytecode.size());

unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData);
ret.bytecode.reserve(bytesRequiredIncludingData);
Expand Down Expand Up @@ -1366,7 +1365,7 @@ LinkerObject const& Assembly::assembleLegacy() const
case PushSubSize:
{
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assembleLegacy().bytecode.size();
item.setPushedValue(u256(s));
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
Expand Down Expand Up @@ -1463,7 +1462,7 @@ LinkerObject const& Assembly::assembleLegacy() const
std::map<LinkerObject, size_t> subAssemblyOffsets;
for (auto const& [subIdPath, bytecodeOffset]: subRefs)
{
LinkerObject subObject = subAssemblyById(subIdPath)->assemble();
LinkerObject subObject = subAssemblyById(subIdPath)->assembleLegacy();
bytesRef r(ret.bytecode.data() + bytecodeOffset, bytesPerDataRef);

// In order for de-duplication to kick in, not only must the bytecode be identical, but
Expand All @@ -1475,6 +1474,12 @@ LinkerObject const& Assembly::assembleLegacy() const
toBigEndian(ret.bytecode.size(), r);
subAssemblyOffsets[subObject] = ret.bytecode.size();
ret.bytecode += subObject.bytecode;
ret.subAssemblyData.push_back({
subAssemblyOffsets[subObject],
subObject.bytecode.size(),
subAssemblyById(subIdPath)->isCreation(),
subObject.subAssemblyData
});
}
for (auto const& ref: subObject.linkReferences)
ret.linkReferences[ref.first + subAssemblyOffsets[subObject]] = ref.second;
Expand Down
1 change: 0 additions & 1 deletion libevmasm/Assembly.h
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ class Assembly

private:
bool m_invalid = false;

Assembly const* subAssemblyById(size_t _subId) const;

void encodeAllPossibleSubPathsInAssemblyTree(std::vector<size_t> _pathFromRoot = {}, std::vector<Assembly*> _assembliesOnPath = {});
Expand Down
7 changes: 7 additions & 0 deletions libevmasm/EVMAssemblyStack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ void EVMAssemblyStack::assemble()

m_evmAssembly->optimise(m_optimiserSettings);
m_object = m_evmAssembly->assemble();
// Recreate subAssembly data to include parent object
m_object.subAssemblyData = {{
0,
m_object.bytecode.size(),
m_evmAssembly->isCreation(),
m_object.subAssemblyData
}};
// TODO: Check for EOF
solAssert(m_evmAssembly->codeSections().size() == 1);
m_sourceMapping = AssemblyItem::computeSourceMapping(m_evmAssembly->codeSections().front().items, sourceIndices());
Expand Down
9 changes: 9 additions & 0 deletions libevmasm/LinkerObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ struct LinkerObject
/// Bytecode offsets of named tags like function entry points.
std::map<std::string, FunctionDebugData> functionDebugData;

struct Structure {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe AssemblyStructure is better?

size_t start;
size_t length;
bool isCreation;
std::vector<Structure> subAssemblies {};
};

std::vector<Structure> subAssemblyData;

/// Appends the bytecode of @a _other and incorporates its link references.
void append(LinkerObject const& _other);

Expand Down
7 changes: 7 additions & 0 deletions libsolidity/interface/CompilerStack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,13 @@ void CompilerStack::assembleYul(
{
// Assemble deployment (incl. runtime) object.
compiledContract.object = compiledContract.evmAssembly->assemble();
// Recreate subAssembly data to include parent object
compiledContract.object.subAssemblyData = {{
0,
compiledContract.object.bytecode.size(),
compiledContract.evmAssembly->isCreation(),
compiledContract.object.subAssemblyData
}};
}
catch (evmasm::AssemblyException const& error)
{
Expand Down
37 changes: 36 additions & 1 deletion libsolidity/interface/StandardCompiler.cpp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add the feature to the CLI too. We should keep them at parity (and it's a pain for testing/development if the only way to access a feature is through StandardJSON).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, Yul compilation is not covered. Aside from general inconsistency, this makes two-step compilation less powerful, which may be a problem for future parallelization.

Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ bool isArtifactRequested(Json const& _outputSelection, std::string const& _file,
std::vector<std::string> evmObjectComponents(std::string const& _objectKind)
{
solAssert(_objectKind == "bytecode" || _objectKind == "deployedBytecode", "");
std::vector<std::string> components{"", ".object", ".opcodes", ".sourceMap", ".functionDebugData", ".generatedSources", ".linkReferences", ".ethdebug"};
std::vector<std::string> components{"", ".object", ".opcodes", ".sourceMap", ".functionDebugData", ".generatedSources", ".linkReferences", ".ethdebug", ".assemblyStructure"};
if (_objectKind == "deployedBytecode")
components.push_back(".immutableReferences");
return util::applyMap(components, [&](auto const& _s) { return "evm." + _objectKind + _s; });
Expand Down Expand Up @@ -1304,6 +1304,8 @@ Json StandardCompiler::importEVMAssembly(StandardCompiler::InputsAndSettings _in
creationJSON["linkReferences"] = formatLinkReferences(stack.object(sourceName).linkReferences);
if (evmCreationArtifactRequested("ethdebug"))
creationJSON["ethdebug"] = stack.ethdebug(sourceName);
if (evmCreationArtifactRequested("assemblyStructure"))
creationJSON["assemblyStructure"] = formatAssemblyStructure(stack.object(sourceName).subAssemblyData);
evmData["bytecode"] = creationJSON;
}

Expand Down Expand Up @@ -1581,6 +1583,8 @@ Json StandardCompiler::compileSolidity(StandardCompiler::InputsAndSettings _inpu
creationJSON["generatedSources"] = compilerStack.generatedSources(contractName, /* _runtime */ false);
if (evmCreationArtifactRequested("ethdebug"))
creationJSON["ethdebug"] = compilerStack.ethdebug(contractName);
if (evmCreationArtifactRequested("assemblyStructure"))
creationJSON["assemblyStructure"] = formatAssemblyStructure(compilerStack.object(contractName).subAssemblyData);
evmData["bytecode"] = creationJSON;
}

Expand Down Expand Up @@ -1882,3 +1886,34 @@ Json StandardCompiler::formatFunctionDebugData(

return ret;
}

Json StandardCompiler::formatAssemblyStructure(std::vector<evmasm::LinkerObject::Structure> const& _assemblyStructure)
{
std::function<Json(std::vector<evmasm::LinkerObject::Structure> const&)> recursiveHelper = [&](auto const& _assemblyStructure)
{
Json subAssemblies = Json::array();
for (auto const& subAssembly: _assemblyStructure)
{
Json assemblyStructure = {
{"start", Json::number_unsigned_t(subAssembly.start)},
{"length", Json::number_unsigned_t(subAssembly.length)},
{"isCreation", Json::boolean_t(subAssembly.isCreation)}
};
if (!subAssembly.subAssemblies.empty())
assemblyStructure["subAssemblies"] = recursiveHelper(subAssembly.subAssemblies);
subAssemblies.emplace_back(assemblyStructure);
}
return subAssemblies;
};

Json assemblyStructure = Json::object();
if (!_assemblyStructure.empty())
assemblyStructure = {
{"start", Json::number_unsigned_t(_assemblyStructure[0].start)},
{"length", Json::number_unsigned_t(_assemblyStructure[0].length)},
{"isCreation", Json::boolean_t(_assemblyStructure[0].isCreation)},
{"subAssemblies", recursiveHelper(_assemblyStructure[0].subAssemblies)}
};

return assemblyStructure;
}
2 changes: 2 additions & 0 deletions libsolidity/interface/StandardCompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class StandardCompiler
std::map<std::string, evmasm::LinkerObject::FunctionDebugData> const& _debugInfo
);

static Json formatAssemblyStructure(std::vector<evmasm::LinkerObject::Structure> const& _assemblyStructure);

private:
struct InputsAndSettings
{
Expand Down
30 changes: 27 additions & 3 deletions solc/CommandLineInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ static std::string const g_strSrcMapRuntime = "srcmap-runtime";
static std::string const g_strStorageLayout = "storage-layout";
static std::string const g_strTransientStorageLayout = "transient-storage-layout";
static std::string const g_strVersion = "version";
static std::string const g_strAssemblyStructure = "assembly-structure";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: options are in alphabetical order here.


static bool needsHumanTargetedStdout(CommandLineOptions const& _options)
{
Expand All @@ -159,7 +160,8 @@ static bool needsHumanTargetedStdout(CommandLineOptions const& _options)
_options.compiler.outputs.opcodes ||
_options.compiler.outputs.signatureHashes ||
_options.compiler.outputs.storageLayout ||
_options.compiler.outputs.transientStorageLayout;
_options.compiler.outputs.transientStorageLayout ||
_options.compiler.outputs.assemblyStructure;
}

static bool coloredOutput(CommandLineOptions const& _options)
Expand Down Expand Up @@ -210,7 +212,6 @@ void CommandLineInterface::handleBinary(std::string const& _contract)
binary = objectWithLinkRefsHex(m_assemblyStack->object(_contract));
if (m_options.compiler.outputs.binaryRuntime)
binaryRuntime = objectWithLinkRefsHex(m_assemblyStack->runtimeObject(_contract));

if (m_options.compiler.outputs.binary)
{
if (!m_options.output.dir.empty())
Expand Down Expand Up @@ -591,6 +592,22 @@ void CommandLineInterface::handleEthdebug(std::string const& _contract)
}
}

void CommandLineInterface::handleAssemblyStructure(std::string const& _contract)
{
solAssert(CompilerInputModes.count(m_options.input.mode) == 1);
solAssert(m_compiler->compilationSuccessful());

if (!m_options.compiler.outputs.assemblyStructure)
return;

solAssert(m_assemblyStack);
std::string const data = jsonPrint(
removeNullMembers(StandardCompiler::formatAssemblyStructure(m_assemblyStack->object(_contract).subAssemblyData)),
m_options.formatting.json
);
sout() << "Assembly structure:" << std::endl << data << std::endl;
}

void CommandLineInterface::readInputFiles()
{
solAssert(!m_standardJsonInput.has_value());
Expand Down Expand Up @@ -960,6 +977,7 @@ void CommandLineInterface::compile()
m_options.compiler.outputs.binaryRuntime ||
m_options.compiler.outputs.ethdebug ||
m_options.compiler.outputs.ethdebugRuntime ||
m_options.compiler.outputs.assemblyStructure ||
(m_options.compiler.combinedJsonRequests && (
m_options.compiler.combinedJsonRequests->binary ||
m_options.compiler.combinedJsonRequests->binaryRuntime ||
Expand All @@ -970,7 +988,8 @@ void CommandLineInterface::compile()
m_options.compiler.combinedJsonRequests->srcMap ||
m_options.compiler.combinedJsonRequests->srcMapRuntime ||
m_options.compiler.combinedJsonRequests->funDebug ||
m_options.compiler.combinedJsonRequests->funDebugRuntime
m_options.compiler.combinedJsonRequests->funDebugRuntime ||
m_options.compiler.combinedJsonRequests->assemblyStructure
));

m_compiler->selectContracts({{"", {{"", pipelineConfig}}}});
Expand Down Expand Up @@ -1105,6 +1124,10 @@ void CommandLineInterface::handleCombinedJSON()
contractData[g_strFunDebugRuntime] = StandardCompiler::formatFunctionDebugData(
m_assemblyStack->runtimeObject(contractName).functionDebugData
);
if (m_options.compiler.combinedJsonRequests->assemblyStructure)
contractData[g_strAssemblyStructure] = StandardCompiler::formatAssemblyStructure(
m_assemblyStack->object(contractName).subAssemblyData
);
}
}

Expand Down Expand Up @@ -1467,6 +1490,7 @@ void CommandLineInterface::outputCompilationResults()
handleNatspec(true, contract);
handleNatspec(false, contract);
handleEthdebug(contract);
handleAssemblyStructure(contract);
} // end of contracts iteration
}

Expand Down
1 change: 1 addition & 0 deletions solc/CommandLineInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ class CommandLineInterface
void handleStorageLayout(std::string const& _contract);
void handleTransientStorageLayout(std::string const& _contract);
void handleEthdebug(std::string const& _contract);
void handleAssemblyStructure(std::string const& _contract);

/// Tries to read @ m_sourceCodes as a JSONs holding ASTs
/// such that they can be imported into the compiler (importASTs())
Expand Down
5 changes: 4 additions & 1 deletion solc/CommandLineParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,13 +475,15 @@ void CommandLineParser::parseOutputSelection()
CompilerOutputs::componentName(&CompilerOutputs::asmJson),
CompilerOutputs::componentName(&CompilerOutputs::yulCFGJson),
CompilerOutputs::componentName(&CompilerOutputs::ethdebug),
CompilerOutputs::componentName(&CompilerOutputs::assemblyStructure),
};
static std::set<std::string> const evmAssemblyJsonImportModeOutputs = {
CompilerOutputs::componentName(&CompilerOutputs::asm_),
CompilerOutputs::componentName(&CompilerOutputs::binary),
CompilerOutputs::componentName(&CompilerOutputs::binaryRuntime),
CompilerOutputs::componentName(&CompilerOutputs::opcodes),
CompilerOutputs::componentName(&CompilerOutputs::asmJson),
CompilerOutputs::componentName(&CompilerOutputs::assemblyStructure),
};
switch (_mode)
{
Expand Down Expand Up @@ -775,6 +777,7 @@ General Information)").c_str(),
(CompilerOutputs::componentName(&CompilerOutputs::metadata).c_str(), "Combined Metadata JSON whose IPFS hash is stored on-chain.")
(CompilerOutputs::componentName(&CompilerOutputs::storageLayout).c_str(), "Slots, offsets and types of the contract's state variables located in storage.")
(CompilerOutputs::componentName(&CompilerOutputs::transientStorageLayout).c_str(), "Slots, offsets and types of the contract's state variables located in transient storage.")
(CompilerOutputs::componentName(&CompilerOutputs::assemblyStructure).c_str(), "Structure of the assembly and its subassemblies providing offsets, lengths and creation indicators.")
;
if (!_forHelp) // Note: We intentionally keep this undocumented for now.
{
Expand Down Expand Up @@ -1577,7 +1580,7 @@ void CommandLineParser::parseCombinedJsonOption()
&CombinedJsonRequests::natspecUser,
&CombinedJsonRequests::signatureHashes,
&CombinedJsonRequests::storageLayout,
&CombinedJsonRequests::transientStorageLayout
&CombinedJsonRequests::transientStorageLayout,
};

for (auto const invalidOption: invalidOptions)
Expand Down
4 changes: 4 additions & 0 deletions solc/CommandLineParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ struct CompilerOutputs
{"yul-cfg-json", &CompilerOutputs::yulCFGJson},
{"ethdebug", &CompilerOutputs::ethdebug},
{"ethdebug-runtime", &CompilerOutputs::ethdebugRuntime},
{"assembly-structure", &CompilerOutputs::assemblyStructure},
};
return components;
}
Expand All @@ -114,6 +115,7 @@ struct CompilerOutputs
bool transientStorageLayout = false;
bool ethdebug = false;
bool ethdebugRuntime = false;
bool assemblyStructure = false;
};

struct CombinedJsonRequests
Expand Down Expand Up @@ -144,6 +146,7 @@ struct CombinedJsonRequests
{"devdoc", &CombinedJsonRequests::natspecDev},
{"userdoc", &CombinedJsonRequests::natspecUser},
{"ast", &CombinedJsonRequests::ast},
{"assembly-structure", &CombinedJsonRequests::assemblyStructure},
};
return components;
}
Expand All @@ -166,6 +169,7 @@ struct CombinedJsonRequests
bool natspecDev = false;
bool natspecUser = false;
bool ast = false;
bool assemblyStructure = false;
};

struct CommandLineOptions
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ set(libsolidity_sources
libsolidity/SolidityParser.cpp
libsolidity/SolidityTypes.cpp
libsolidity/StandardCompiler.cpp
libsolidity/SubAssemblyOffsetsTest.cpp
libsolidity/SyntaxTest.cpp
libsolidity/SyntaxTest.h
libsolidity/ViewPureChecker.cpp
Expand Down
1 change: 1 addition & 0 deletions test/cmdlineTests/assembly_structure/args
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--assembly-structure --pretty-json --json-indent 4 --no-cbor-metadata
Loading