Skip to content

Commit 1f6b943

Browse files
charles-coopercyberthirstchen-robert
authoredJun 2, 2024··
fix[codegen]: zero-length dynarray abi_decode validation (#4060)
fix an edge case in `abi_decode` dynarray validation. when the child type is dynamic and the runtime length is zero, the check that the offset pointer is valid (points within the payload) was skipped. skipping the offset pointer check is valid any time the runtime length is nonzero, because the pointer is bounded by the checks in the recursive runtime loop in `_dynarray_make_setter`. however, it is invalid to skip the check when the runtime length of the dynarray is zero, because then the recursive loop does not get run. the impact of this can be seen in the included test cases, particularly `test_abi_decode_top_level_head_oob`. although as of eb01136 it is impossible to convince the decoder to *copy* oob data since the validation is only skipped when the length is zero, a payload can be crafted which will revert depending on if some value outside of the buffer is nonzero (i.e. the runtime behavior can be influenced by some data outside of the payload). this commit fixes this issue by _unconditionally_ checking that the offset pointer is valid. note that the check is now always performed, even when the runtime length is nonzero and therefore the check is redundant (because, as stated, the checks within the loop already bound the offset pointer). a more efficient implementation is possible, since the check only needs to be run in the case that the runtime length is 0, which theoretically can be merged into the same basic block with the 0-case in the `repeat` loop. however, this commit leaves that to future optimizer work; the optimization here is it just avoids the multiplication when the child type is dynamic (because the result of the multiplication is always 0). this commit also fixes another bug in dynarray recursion; the calculation in `_abi_payload_size` was not correct when the size of the child type is larger than 32. misc: - add additional tests for abi_decode validation. --------- Co-authored-by: cyberthirst <cyberthirst.eth@gmail.com> Co-authored-by: Robert Chen <me@robertchen.cc>
1 parent e52241a commit 1f6b943

File tree

2 files changed

+259
-7
lines changed

2 files changed

+259
-7
lines changed
 

‎tests/functional/builtins/codegen/test_abi_decode.py

+246-3
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,8 @@ def f(x: Bytes[32 * 3]):
495495
decoded_y1: Bytes[32] = _abi_decode(y, Bytes[32])
496496
a = b"bar"
497497
decoded_y2: Bytes[32] = _abi_decode(y, Bytes[32])
498-
499-
assert decoded_y1 != decoded_y2
498+
# original POC:
499+
# assert decoded_y1 != decoded_y2
500500
"""
501501
c = get_contract(code)
502502

@@ -1043,7 +1043,7 @@ def run():
10431043
c.run()
10441044

10451045

1046-
def test_abi_decode_extcall_zero_len_array(get_contract):
1046+
def test_abi_decode_extcall_empty_array(get_contract):
10471047
code = """
10481048
@external
10491049
def bar() -> (uint256, uint256):
@@ -1061,6 +1061,59 @@ def run():
10611061
c.run()
10621062

10631063

1064+
def test_abi_decode_extcall_complex_empty_dynarray(get_contract):
1065+
# 5th word of the payload points to the last word of the payload
1066+
# which is considered the length of the Point.y array
1067+
# because the length is 0, the decoding should succeed
1068+
code = """
1069+
struct Point:
1070+
x: uint256
1071+
y: DynArray[uint256, 2]
1072+
z: uint256
1073+
1074+
@external
1075+
def bar() -> (uint256, uint256, uint256, uint256, uint256, uint256):
1076+
return 32, 1, 32, 1, 64, 0
1077+
1078+
interface A:
1079+
def bar() -> DynArray[Point, 2]: nonpayable
1080+
1081+
@external
1082+
def run():
1083+
x: DynArray[Point, 2] = extcall A(self).bar()
1084+
assert len(x) == 1 and len(x[0].y) == 0
1085+
"""
1086+
c = get_contract(code)
1087+
1088+
c.run()
1089+
1090+
1091+
def test_abi_decode_extcall_complex_empty_dynarray2(tx_failed, get_contract):
1092+
# top-level head points 1B over the runtime buffer end
1093+
# thus the decoding should fail although the length is 0
1094+
code = """
1095+
struct Point:
1096+
x: uint256
1097+
y: DynArray[uint256, 2]
1098+
z: uint256
1099+
1100+
@external
1101+
def bar() -> (uint256, uint256):
1102+
return 33, 0
1103+
1104+
interface A:
1105+
def bar() -> DynArray[Point, 2]: nonpayable
1106+
1107+
@external
1108+
def run():
1109+
x: DynArray[Point, 2] = extcall A(self).bar()
1110+
"""
1111+
c = get_contract(code)
1112+
1113+
with tx_failed():
1114+
c.run()
1115+
1116+
10641117
def test_abi_decode_extcall_zero_len_array2(get_contract):
10651118
code = """
10661119
@external
@@ -1080,3 +1133,193 @@ def run() -> uint256:
10801133
length = c.run()
10811134

10821135
assert length == 0
1136+
1137+
1138+
def test_abi_decode_top_level_head_oob(tx_failed, get_contract):
1139+
code = """
1140+
@external
1141+
def run(x: Bytes[256], y: uint256):
1142+
player_lost: bool = empty(bool)
1143+
1144+
if y == 1:
1145+
player_lost = True
1146+
1147+
decoded: DynArray[Bytes[1], 2] = empty(DynArray[Bytes[1], 2])
1148+
decoded = _abi_decode(x, DynArray[Bytes[1], 2])
1149+
"""
1150+
c = get_contract(code)
1151+
1152+
# head points over the buffer end
1153+
payload = (0x0100, *_replicate(0x00, 7))
1154+
1155+
data = _abi_payload_from_tuple(payload)
1156+
1157+
with tx_failed():
1158+
c.run(data, 1)
1159+
1160+
with tx_failed():
1161+
c.run(data, 0)
1162+
1163+
1164+
def test_abi_decode_dynarray_complex_insufficient_data(env, tx_failed, get_contract):
1165+
code = """
1166+
struct Point:
1167+
x: uint256
1168+
y: uint256
1169+
1170+
@external
1171+
def run(x: Bytes[32 * 8]):
1172+
y: Bytes[32 * 8] = x
1173+
decoded_y1: DynArray[Point, 3] = _abi_decode(y, DynArray[Point, 3])
1174+
"""
1175+
c = get_contract(code)
1176+
1177+
# runtime buffer has insufficient size - we decode 3 points, but provide only
1178+
# 3 * 32B of payload
1179+
payload = (0x20, 0x03, *_replicate(0x03, 3))
1180+
1181+
data = _abi_payload_from_tuple(payload)
1182+
1183+
with tx_failed():
1184+
c.run(data)
1185+
1186+
1187+
def test_abi_decode_dynarray_complex2(env, tx_failed, get_contract):
1188+
# point head to the 1st 0x01 word (ie the length)
1189+
# but size of the point is 3 * 32B, thus we'd decode 2B over the buffer end
1190+
code = """
1191+
struct Point:
1192+
x: uint256
1193+
y: uint256
1194+
z: uint256
1195+
1196+
1197+
@external
1198+
def run(x: Bytes[32 * 8]):
1199+
y: Bytes[32 * 11] = x
1200+
decoded_y1: DynArray[Point, 2] = _abi_decode(y, DynArray[Point, 2])
1201+
"""
1202+
c = get_contract(code)
1203+
1204+
payload = (
1205+
0xC0, # points to the 1st 0x01 word (ie the length)
1206+
*_replicate(0x03, 5),
1207+
*_replicate(0x01, 2),
1208+
)
1209+
1210+
data = _abi_payload_from_tuple(payload)
1211+
1212+
with tx_failed():
1213+
c.run(data)
1214+
1215+
1216+
def test_abi_decode_complex_empty_dynarray(env, tx_failed, get_contract):
1217+
# point head to the last word of the payload
1218+
# this will be the length, but because it's set to 0, the decoding should succeed
1219+
code = """
1220+
struct Point:
1221+
x: uint256
1222+
y: DynArray[uint256, 2]
1223+
z: uint256
1224+
1225+
1226+
@external
1227+
def run(x: Bytes[32 * 16]):
1228+
y: Bytes[32 * 16] = x
1229+
decoded_y1: DynArray[Point, 2] = _abi_decode(y, DynArray[Point, 2])
1230+
assert len(decoded_y1) == 1 and len(decoded_y1[0].y) == 0
1231+
"""
1232+
c = get_contract(code)
1233+
1234+
payload = (
1235+
0x20,
1236+
0x01,
1237+
0x20,
1238+
0x01,
1239+
0xA0, # points to the last word of the payload
1240+
0x04,
1241+
0x02,
1242+
0x02,
1243+
0x00, # length is 0, so decoding should succeed
1244+
)
1245+
1246+
data = _abi_payload_from_tuple(payload)
1247+
1248+
c.run(data)
1249+
1250+
1251+
def test_abi_decode_complex_arithmetic_overflow(tx_failed, get_contract):
1252+
# inner head roundtrips due to arithmetic overflow
1253+
code = """
1254+
struct Point:
1255+
x: uint256
1256+
y: DynArray[uint256, 2]
1257+
z: uint256
1258+
1259+
1260+
@external
1261+
def run(x: Bytes[32 * 16]):
1262+
y: Bytes[32 * 16] = x
1263+
decoded_y1: DynArray[Point, 2] = _abi_decode(y, DynArray[Point, 2])
1264+
"""
1265+
c = get_contract(code)
1266+
1267+
payload = (
1268+
0x20,
1269+
0x01,
1270+
0x20,
1271+
0x01, # both Point.x and Point.y length
1272+
2**256 - 0x20, # points to the "previous" word of the payload
1273+
0x04,
1274+
0x02,
1275+
0x02,
1276+
0x00,
1277+
)
1278+
1279+
data = _abi_payload_from_tuple(payload)
1280+
1281+
with tx_failed():
1282+
c.run(data)
1283+
1284+
1285+
def test_abi_decode_empty_toplevel_dynarray(get_contract):
1286+
code = """
1287+
@external
1288+
def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]):
1289+
y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x
1290+
assert len(y) == 2 * 32
1291+
decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode(
1292+
y,
1293+
DynArray[DynArray[uint256, 3], 3]
1294+
)
1295+
assert len(decoded_y1) == 0
1296+
"""
1297+
c = get_contract(code)
1298+
1299+
payload = (0x20, 0x00) # DynArray head, DynArray length
1300+
1301+
data = _abi_payload_from_tuple(payload)
1302+
1303+
c.run(data)
1304+
1305+
1306+
def test_abi_decode_invalid_toplevel_dynarray_head(tx_failed, get_contract):
1307+
# head points 1B over the bounds of the runtime buffer
1308+
code = """
1309+
@external
1310+
def run(x: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4]):
1311+
y: Bytes[2 * 32 + 3 * 32 + 3 * 32 * 4] = x
1312+
decoded_y1: DynArray[DynArray[uint256, 3], 3] = _abi_decode(
1313+
y,
1314+
DynArray[DynArray[uint256, 3], 3]
1315+
)
1316+
"""
1317+
c = get_contract(code)
1318+
1319+
# head points 1B over the bounds of the runtime buffer
1320+
payload = (0x21, 0x00) # DynArray head, DynArray length
1321+
1322+
data = _abi_payload_from_tuple(payload)
1323+
1324+
with tx_failed():
1325+
c.run(data)

‎vyper/codegen/core.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -889,11 +889,17 @@ def _dirty_read_risk(ir_node):
889889
def _abi_payload_size(ir_node):
890890
SCALE = ir_node.location.word_scale
891891
assert SCALE == 32 # we must be in some byte-addressable region, like memory
892-
893892
OFFSET = DYNAMIC_ARRAY_OVERHEAD * SCALE
894893

895894
if isinstance(ir_node.typ, DArrayT):
896-
return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), SCALE]]
895+
# the amount of size each value occupies in static section
896+
# (the amount of size it occupies in the dynamic section is handled in
897+
# make_setter recursion)
898+
item_size = ir_node.typ.value_type.abi_type.static_size()
899+
if item_size == 0:
900+
# manual optimization; the mload cannot currently be optimized out
901+
return ["add", OFFSET, 0]
902+
return ["add", OFFSET, ["mul", get_dyn_array_count(ir_node), item_size]]
897903

898904
if isinstance(ir_node.typ, _BytestringT):
899905
return ["add", OFFSET, get_bytearray_length(ir_node)]
@@ -1175,14 +1181,17 @@ def clamp_dyn_array(ir_node, hi=None):
11751181

11761182
assert (hi is not None) == _dirty_read_risk(ir_node)
11771183

1178-
# if the subtype is dynamic, the check will be performed in the recursion
1179-
if hi is not None and not t.abi_type.subtyp.is_dynamic():
1184+
if hi is not None:
11801185
assert t.count < 2**64 # sanity check
11811186

11821187
# note: this add does not risk arithmetic overflow because
11831188
# length is bounded by count * elemsize.
11841189
item_end = add_ofst(ir_node, _abi_payload_size(ir_node))
11851190

1191+
# if the subtype is dynamic, the length check is performed in
1192+
# the recursion, UNLESS the count is zero. here we perform the
1193+
# check all the time, but it could maybe be optimized out in the
1194+
# make_setter loop (in the common case that runtime count > 0).
11861195
len_check = ["seq", ["assert", ["le", item_end, hi]], len_check]
11871196

11881197
return IRnode.from_list(len_check, error_msg=f"{ir_node.typ} bounds check")

0 commit comments

Comments
 (0)
Please sign in to comment.