Skip to content

Commit 73af652

Browse files
committed
PlainAssemblyParser: Syntax for subassemblies
1 parent 09fc1d4 commit 73af652

File tree

6 files changed

+347
-4
lines changed

6 files changed

+347
-4
lines changed

test/libevmasm/PlainAssemblyParser.cpp

+52-4
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,16 @@ Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _sou
3939
{
4040
m_sourceStream = std::istringstream(_source);
4141
m_sourceName = std::move(_sourceName);
42-
Json codeJSON = Json::array();
4342
m_lineNumber = 0;
4443

45-
if (!m_line.has_value())
46-
advanceLine();
44+
advanceLine();
45+
return parseAssembly(0);
46+
}
47+
48+
Json PlainAssemblyParser::parseAssembly(size_t _nestingLevel)
49+
{
50+
Json assemblyJSON = {{".code", Json::array()}};
51+
Json& codeJSON = assemblyJSON[".code"];
4752

4853
while (m_line.has_value())
4954
{
@@ -52,6 +57,25 @@ Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _sou
5257
advanceLine();
5358
continue;
5459
}
60+
61+
size_t newLevel = parseNestingLevel();
62+
if (newLevel > _nestingLevel)
63+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Indentation does not match the current subassembly nesting level.")));
64+
65+
if (newLevel < _nestingLevel)
66+
return assemblyJSON;
67+
68+
if (currentToken().value == ".sub")
69+
{
70+
advanceLine();
71+
72+
std::string nextDataIndex = std::to_string(assemblyJSON[".data"].size());
73+
assemblyJSON[".data"][nextDataIndex] = parseAssembly(_nestingLevel + 1);
74+
continue;
75+
}
76+
else if (assemblyJSON.contains(".data"))
77+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("The code of an assembly must be specified before its subassemblies.")));
78+
5579
if (c_instructions.contains(currentToken().value))
5680
{
5781
expectNoMoreArguments();
@@ -91,7 +115,21 @@ Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _sou
91115

92116
advanceLine();
93117
}
94-
return {{".code", codeJSON}};
118+
119+
return assemblyJSON;
120+
}
121+
122+
size_t PlainAssemblyParser::parseNestingLevel() const
123+
{
124+
std::string_view indentationString = indentation();
125+
126+
if (indentationString != std::string(indentationString.size(), ' '))
127+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Non-space characters used for indentation.")));
128+
129+
if (indentationString.size() % 4 != 0)
130+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Each indentation level must consist of 4 spaces.")));
131+
132+
return indentationString.size() / 4;
95133
}
96134

97135
PlainAssemblyParser::Token const& PlainAssemblyParser::currentToken() const
@@ -106,6 +144,16 @@ PlainAssemblyParser::Token const& PlainAssemblyParser::nextToken() const
106144
return m_lineTokens[m_tokenIndex + 1];
107145
}
108146

147+
std::string_view PlainAssemblyParser::indentation() const
148+
{
149+
soltestAssert(m_line.has_value());
150+
151+
if (m_lineTokens.empty())
152+
return *m_line;
153+
154+
return std::string_view(*m_line).substr(0, m_lineTokens.at(0).position);
155+
}
156+
109157
bool PlainAssemblyParser::advanceToken()
110158
{
111159
if (!hasMoreTokens())

test/libevmasm/PlainAssemblyParser.h

+11
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,18 @@ namespace solidity::evmasm::test
3737
/// - A non-empty line represents a single assembly item.
3838
/// - The name of the item is the first thing on the line and may consist of one or more tokens.
3939
/// - One or more arguments follow the name.
40+
/// - Indentation determines assembly nesting level (4 spaces per level).
41+
/// - A new subassembly starts with '.sub' and contains all subsequent lines at a higher nesting level.
42+
/// The first line at the same or lower nesting level ends the subassembly.
43+
/// - Subassemblies can be nested to arbitrary depth.
44+
/// - The code of an assembly must be specified before its subassemblies.
4045
///
4146
/// Supported items:
4247
/// - All instruction names.
4348
/// - PUSH <hex value>
4449
/// - PUSH [tag] <tagID>
4550
/// - tag <tagID>
51+
/// - .sub
4652
class PlainAssemblyParser
4753
{
4854
public:
@@ -57,10 +63,15 @@ class PlainAssemblyParser
5763
size_t position; ///< Position of the first character of the token within m_line.
5864
};
5965

66+
Json parseAssembly(size_t _nestingLevel);
67+
size_t parseNestingLevel() const;
68+
6069
Token const& currentToken() const;
6170
Token const& nextToken() const;
6271
bool hasMoreTokens() const { return m_tokenIndex + 1 < m_lineTokens.size(); }
6372

73+
std::string_view indentation() const;
74+
6475
bool advanceToken();
6576
std::string_view expectArgument();
6677
void expectNoMoreArguments();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// ====
2+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
3+
// ----
4+
// InputAssemblyJSON: {
5+
// ".code": []
6+
// }
7+
// Assembly:
8+
// Bytecode:
9+
// Opcodes:
10+
// SourceMappings:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
PUSH 0x2
2+
DUP1
3+
ADD
4+
5+
.sub
6+
PUSH 0x42
7+
DUP1
8+
MUL
9+
10+
.sub
11+
STOP
12+
// ====
13+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
14+
// ----
15+
// InputAssemblyJSON: {
16+
// ".code": [
17+
// {
18+
// "name": "PUSH",
19+
// "value": "2"
20+
// },
21+
// {
22+
// "name": "DUP1"
23+
// },
24+
// {
25+
// "name": "ADD"
26+
// }
27+
// ],
28+
// ".data": {
29+
// "0": {
30+
// ".code": [
31+
// {
32+
// "name": "PUSH",
33+
// "value": "42"
34+
// },
35+
// {
36+
// "name": "DUP1"
37+
// },
38+
// {
39+
// "name": "MUL"
40+
// }
41+
// ]
42+
// },
43+
// "1": {
44+
// ".code": [
45+
// {
46+
// "name": "STOP"
47+
// }
48+
// ]
49+
// }
50+
// }
51+
// }
52+
// Assembly:
53+
// 0x02
54+
// dup1
55+
// add
56+
// stop
57+
//
58+
// sub_0: assembly {
59+
// 0x42
60+
// dup1
61+
// mul
62+
// }
63+
//
64+
// sub_1: assembly {
65+
// stop
66+
// }
67+
// Bytecode: 60028001fe
68+
// Opcodes: PUSH1 0x2 DUP1 ADD INVALID
69+
// SourceMappings: :::-:0;;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
.sub
2+
.sub
3+
.sub
4+
// ====
5+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
6+
// ----
7+
// InputAssemblyJSON: {
8+
// ".code": [],
9+
// ".data": {
10+
// "0": {
11+
// ".code": []
12+
// },
13+
// "1": {
14+
// ".code": []
15+
// },
16+
// "2": {
17+
// ".code": []
18+
// }
19+
// }
20+
// }
21+
// Assembly:
22+
// stop
23+
//
24+
// sub_0: assembly {
25+
// }
26+
//
27+
// sub_1: assembly {
28+
// }
29+
//
30+
// sub_2: assembly {
31+
// }
32+
// Bytecode: fe
33+
// Opcodes: INVALID
34+
// SourceMappings:

0 commit comments

Comments
 (0)