From 103493ed0501996e1920f425f924be60c7fe6b29 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sun, 19 Jan 2025 17:57:39 +0800 Subject: [PATCH 01/33] [P2] Installation and Data Preloading Issue #11: add new type mk_cpu_pkt to support both data and ctrl type from cpu in one packet --- controller/ControllerRTL.py | 17 ++++++------ controller/test/ControllerRTL_test.py | 39 +++++++++++++++++---------- lib/messages.py | 11 ++++++++ 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index f164f1d..a369948 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -20,7 +20,7 @@ class ControllerRTL(Component): - def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, + def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, CGRADataType, CGRAAddrType, multi_cgra_rows, multi_cgra_columns, controller_id, controller2addr_map, idTo2d_map): @@ -36,8 +36,8 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, s.recv_from_noc = RecvIfcRTL(NocPktType) s.send_to_noc = SendIfcRTL(NocPktType) - s.recv_from_cpu_ctrl_pkt = RecvIfcRTL(CtrlPktType) - s.send_to_ctrl_ring_ctrl_pkt = SendIfcRTL(CtrlPktType) + s.recv_from_cpu_pkt = RecvIfcRTL(CpuPktType) + s.send_to_ctrl_ring_pkt = SendIfcRTL(CpuPktType) # Request from/to tiles. s.recv_from_tile_load_request_pkt = RecvIfcRTL(NocPktType) @@ -70,7 +70,7 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, # termination). s.crossbar = XbarBypassQueueRTL(NocPktType, 3, 1) - s.recv_ctrl_pkt_queue = NormalQueueRTL(CtrlPktType) + s.recv_pkt_queue = NormalQueueRTL(CpuPktType) # # TODO: below ifcs should be connected through another NoC within # # one CGRA, instead of per-tile and performing like a bus. @@ -102,6 +102,7 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, assert addr2controller_vector[addr_base] == -1, f"address range [{begin_addr}, {end_addr}] overlaps with others." addr2controller_vector[addr_base] = ControllerIdType(src_controller_id) + # What does this do? Connect itself? s.addr2controller_lut[addr_base] //= ControllerIdType(src_controller_id) # Constructs the idTo2d lut. @@ -131,8 +132,8 @@ def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, # other CGRAs can be delivered via the NoC across CGRAs. Note that the packet # format can be in a universal fashion to support both data and config. Later # on, the format can be packet-based or flit-based. - s.recv_from_cpu_ctrl_pkt //= s.recv_ctrl_pkt_queue.recv - s.recv_ctrl_pkt_queue.send //= s.send_to_ctrl_ring_ctrl_pkt + s.recv_from_cpu_pkt //= s.recv_pkt_queue.recv + s.recv_pkt_queue.send //= s.send_to_ctrl_ring_pkt @update def update_received_msg(): @@ -271,7 +272,7 @@ def update_sending_to_noc_msg(): s.crossbar.send[0].msg.payload) def line_trace(s): - send_to_ctrl_ring_ctrl_pkt_str = "send_to_ctrl_ring_ctrl_pkt: " + str(s.send_to_ctrl_ring_ctrl_pkt.msg) + send_to_ctrl_ring_pkt_str = "send_to_ctrl_ring_pkt: " + str(s.send_to_ctrl_ring_pkt.msg) recv_from_tile_load_request_pkt_str = "recv_from_tile_load_request_pkt: " + str(s.recv_from_tile_load_request_pkt.msg) recv_from_tile_load_response_pkt_str = "recv_from_tile_load_response_pkt: " + str(s.recv_from_tile_load_response_pkt.msg) recv_from_tile_store_request_pkt_str = "recv_from_tile_store_request_pkt: " + str(s.recv_from_tile_store_request_pkt.msg) @@ -281,5 +282,5 @@ def line_trace(s): send_to_tile_store_request_data_str = "send_to_tile_store_request_data: " + str(s.send_to_tile_store_request_data.msg) recv_from_noc_str = "recv_from_noc_pkt: " + str(s.recv_from_noc.msg) send_to_noc_str = "send_to_noc_pkt: " + str(s.send_to_noc.msg) + "; rdy: " + str(s.send_to_noc.rdy) + "; val: " + str(s.send_to_noc.val) - return f'{send_to_ctrl_ring_ctrl_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_tile_load_request_addr_str} || {send_to_tile_store_request_addr_str} || {send_to_tile_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' + return f'{send_to_ctrl_ring_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_tile_load_request_addr_str} || {send_to_tile_store_request_addr_str} || {send_to_tile_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' diff --git a/controller/test/ControllerRTL_test.py b/controller/test/ControllerRTL_test.py index 2942ab8..19d82a6 100644 --- a/controller/test/ControllerRTL_test.py +++ b/controller/test/ControllerRTL_test.py @@ -26,7 +26,7 @@ class TestHarness(Component): - def construct(s, ControllerIdType, CtrlPktType, CmdType, MsgType, + def construct(s, ControllerIdType, CpuPktType, CmdType, MsgType, AddrType, PktType, controller_id, from_tile_load_request_pkt_msgs, from_tile_load_response_pkt_msgs, @@ -54,7 +54,7 @@ def construct(s, ControllerIdType, CtrlPktType, CmdType, MsgType, s.src_from_noc_val_rdy = TestSrcRTL(PktType, from_noc_pkts) s.sink_to_noc_val_rdy = TestNetSinkRTL(PktType, expected_to_noc_pkts, cmp_fn = cmp_func) - s.dut = ControllerRTL(ControllerIdType, CmdType, CtrlPktType, + s.dut = ControllerRTL(ControllerIdType, CmdType, CpuPktType, PktType, MsgType, AddrType, # Number of controllers globally (x/y dimension). 1, num_terminals, @@ -75,9 +75,9 @@ def construct(s, ControllerIdType, CtrlPktType, CmdType, MsgType, s.src_from_noc_val_rdy.send //= s.dut.recv_from_noc s.dut.send_to_noc //= s.sink_to_noc_val_rdy.recv - s.dut.recv_from_cpu_ctrl_pkt.val //= 0 - s.dut.recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() - s.dut.send_to_ctrl_ring_ctrl_pkt.rdy //= 0 + s.dut.recv_from_cpu_pkt.val //= 0 + s.dut.recv_from_cpu_pkt.msg //= CpuPktType() + s.dut.send_to_ctrl_ring_pkt.rdy //= 0 def done(s): return s.src_from_tile_load_request_pkt_en_rdy.done() and \ @@ -170,14 +170,25 @@ def mk_src_pkts(nterminals, lst): 3: [12, 15], } -CtrlPktType = mk_ring_across_tiles_pkt(nterminals, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) +# CtrlPktType = mk_ring_across_tiles_pkt(nterminals, +# num_ctrl_actions, +# ctrl_mem_size, +# num_ctrl_operations, +# num_fu_inports, +# num_fu_outports, +# num_tile_inports, +# num_tile_outports) +# CpuPktType = mk_cpu_pkt(1, +# nterminals, +# num_ctrl_actions, +# ctrl_mem_size, +# num_ctrl_operations, +# num_fu_inports, +# num_fu_outports, +# num_tile_inports, +# num_tile_outports) + +CpuPktType = mk_cpu_pkt(0) Pkt = mk_multi_cgra_noc_pkt(nterminals, 1, addr_nbits = addr_nbits, @@ -236,7 +247,7 @@ def mk_src_pkts(nterminals, lst): def test_simple(): print("controller2addr_map: ", controller2addr_map) - th = TestHarness(ControllerIdType, CtrlPktType, + th = TestHarness(ControllerIdType, CpuPktType, CmdType, DataType, AddrType, Pkt, controller_id, from_tile_load_request_pkts, diff --git a/lib/messages.py b/lib/messages.py index 0aab313..4b61db1 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -487,3 +487,14 @@ def str_func(s): namespace = {'__str__': str_func} ) +def mk_cpu_pkt(datatype_id, + # DataType + payload_nbits=16, predicate_nbits=1, bypass_nbits=1, + # CtrlPktType + nrouters = 4, ctrl_actions = 8, ctrl_mem_size = 4, ctrl_operations = 7, ctrl_fu_inports = 4, ctrl_fu_outports = 4, ctrl_tile_inports = 5, ctrl_tile_outports = 5, + prefix="CPUPkt"): + + if datatype_id == 0: + return mk_data(payload_nbits, predicate_nbits, bypass_nbits) + else: + return mk_ring_across_tiles_pkt(nrouters, ctrl_actions, ctrl_mem_size, ctrl_operations, ctrl_fu_inports, ctrl_fu_outports, ctrl_tile_inports, ctrl_tile_outports) \ No newline at end of file From 9182cfb334be47afbfa8ebebc478ff334b5d9242 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sun, 19 Jan 2025 18:20:52 +0800 Subject: [PATCH 02/33] [P2] Installation and Data Preloading Issue #11: add new type mk_cpu_pkt to support both data and ctrl type from cpu in one packet --- systolic/CgraSystolicArrayRTL.py | 6 +++--- .../test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/systolic/CgraSystolicArrayRTL.py b/systolic/CgraSystolicArrayRTL.py index 08ec167..4dafd65 100644 --- a/systolic/CgraSystolicArrayRTL.py +++ b/systolic/CgraSystolicArrayRTL.py @@ -45,7 +45,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, data_mem_size_global) # Interfaces - s.recv_from_cpu_ctrl_pkt = RecvIfcRTL(CtrlPktType) + s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) s.recv_from_noc = RecvIfcRTL(NocPktType) s.send_to_noc = SendIfcRTL(NocPktType) @@ -99,13 +99,13 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.send_to_noc //= s.controller.send_to_noc # Connects the ctrl interface between CPU and controller. - s.recv_from_cpu_ctrl_pkt //= s.controller.recv_from_cpu_ctrl_pkt + s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt # Connects ring with each control memory. for i in range(s.num_tiles): s.ctrl_ring.send[i] //= s.tile[i].recv_ctrl_pkt - s.ctrl_ring.recv[0] //= s.controller.send_to_ctrl_ring_ctrl_pkt + s.ctrl_ring.recv[0] //= s.controller.send_to_ctrl_ring_pkt for i in range(1, s.num_tiles): s.ctrl_ring.recv[i].val //= 0 s.ctrl_ring.recv[i].msg //= CtrlPktType() diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index f54600b..768f935 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -60,7 +60,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, controller2addr_map, preload_data, preload_const) # Connections. - s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_pkt s.dut.send_to_noc.rdy //= 0 s.dut.recv_from_noc.val //= 0 From 6f5dde388a6a1ea2ce225b26f249c1e082735c2e Mon Sep 17 00:00:00 2001 From: yuqisun Date: Mon, 20 Jan 2025 22:58:20 +0800 Subject: [PATCH 03/33] [P2] Installation and Data Preloading Issue #11: rename variables to more meaningful --- controller/ControllerRTL.py | 133 ++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 67 deletions(-) diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index a369948..850bc86 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -24,7 +24,7 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, CGRADataType, CGRAAddrType, multi_cgra_rows, multi_cgra_columns, controller_id, controller2addr_map, idTo2d_map): - + # Checks for ring network. assert(multi_cgra_columns >= multi_cgra_rows) # Used for calculating the x/y position. @@ -37,30 +37,30 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, s.send_to_noc = SendIfcRTL(NocPktType) s.recv_from_cpu_pkt = RecvIfcRTL(CpuPktType) - s.send_to_ctrl_ring_pkt = SendIfcRTL(CpuPktType) + s.send_to_intra_cgra_pkt = SendIfcRTL(CpuPktType) # Request from/to tiles. - s.recv_from_tile_load_request_pkt = RecvIfcRTL(NocPktType) - s.recv_from_tile_load_response_pkt = RecvIfcRTL(NocPktType) - s.recv_from_tile_store_request_pkt = RecvIfcRTL(NocPktType) + s.recv_from_local_cgra_load_request_pkt = RecvIfcRTL(NocPktType) + s.recv_from_local_cgra_load_response_pkt = RecvIfcRTL(NocPktType) + s.recv_from_local_cgra_store_request_pkt = RecvIfcRTL(NocPktType) - s.send_to_tile_load_request_addr = SendIfcRTL(CGRAAddrType) - s.send_to_tile_load_response_data = SendIfcRTL(CGRADataType) - s.send_to_tile_store_request_addr = SendIfcRTL(CGRAAddrType) - s.send_to_tile_store_request_data = SendIfcRTL(CGRADataType) + s.send_to_local_cgra_load_request_addr = SendIfcRTL(CGRAAddrType) + s.send_to_local_cgra_load_response_data = SendIfcRTL(CGRADataType) + s.send_to_local_cgra_store_request_addr = SendIfcRTL(CGRAAddrType) + s.send_to_local_cgra_store_request_data = SendIfcRTL(CGRADataType) # Component - s.recv_from_tile_load_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) - s.recv_from_tile_load_response_pkt_queue = ChannelRTL(NocPktType, latency = 1) - s.recv_from_tile_store_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) + s.recv_from_local_cgra_load_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) + s.recv_from_local_cgra_load_response_pkt_queue = ChannelRTL(NocPktType, latency = 1) + s.recv_from_local_cgra_store_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) - s.send_to_tile_load_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) - s.send_to_tile_load_response_data_queue = ChannelRTL(CGRADataType, latency = 1) - s.send_to_tile_store_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) - s.send_to_tile_store_request_data_queue = ChannelRTL(CGRADataType, latency = 1) + s.send_to_local_cgra_load_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) + s.send_to_local_cgra_load_response_data_queue = ChannelRTL(CGRADataType, latency = 1) + s.send_to_local_cgra_store_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) + s.send_to_local_cgra_store_request_data_queue = ChannelRTL(CGRADataType, latency = 1) # s.recv_from_other_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) - # s.send_to_tile_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) + # s.send_to_local_cgra_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) # s.send_to_other_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) # Crossbar with 3 inports (load and store requests towards remote @@ -102,7 +102,6 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, assert addr2controller_vector[addr_base] == -1, f"address range [{begin_addr}, {end_addr}] overlaps with others." addr2controller_vector[addr_base] = ControllerIdType(src_controller_id) - # What does this do? Connect itself? s.addr2controller_lut[addr_base] //= ControllerIdType(src_controller_id) # Constructs the idTo2d lut. @@ -115,15 +114,15 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, # Connections # Requests towards others, 1 cycle delay to improve timing. - s.recv_from_tile_load_request_pkt_queue.recv //= s.recv_from_tile_load_request_pkt - s.recv_from_tile_load_response_pkt_queue.recv //= s.recv_from_tile_load_response_pkt - s.recv_from_tile_store_request_pkt_queue.recv //= s.recv_from_tile_store_request_pkt + s.recv_from_local_cgra_load_request_pkt_queue.recv //= s.recv_from_local_cgra_load_request_pkt + s.recv_from_local_cgra_load_response_pkt_queue.recv //= s.recv_from_local_cgra_load_response_pkt + s.recv_from_local_cgra_store_request_pkt_queue.recv //= s.recv_from_local_cgra_store_request_pkt # Requests towards local from others, 1 cycle delay to improve timing. - s.send_to_tile_load_request_addr_queue.send //= s.send_to_tile_load_request_addr - s.send_to_tile_load_response_data_queue.send //= s.send_to_tile_load_response_data - s.send_to_tile_store_request_addr_queue.send //= s.send_to_tile_store_request_addr - s.send_to_tile_store_request_data_queue.send //= s.send_to_tile_store_request_data + s.send_to_local_cgra_load_request_addr_queue.send //= s.send_to_local_cgra_load_request_addr + s.send_to_local_cgra_load_response_data_queue.send //= s.send_to_local_cgra_load_response_data + s.send_to_local_cgra_store_request_addr_queue.send //= s.send_to_local_cgra_store_request_addr + s.send_to_local_cgra_store_request_data_queue.send //= s.send_to_local_cgra_store_request_data # For control signals delivery from CPU to tiles. # TODO: https://github.com/tancheng/VectorCGRA/issues/11 -- The request needs @@ -133,7 +132,7 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, # format can be in a universal fashion to support both data and config. Later # on, the format can be packet-based or flit-based. s.recv_from_cpu_pkt //= s.recv_pkt_queue.recv - s.recv_pkt_queue.send //= s.send_to_ctrl_ring_pkt + s.recv_pkt_queue.send //= s.send_to_intra_cgra_pkt @update def update_received_msg(): @@ -142,8 +141,8 @@ def update_received_msg(): kStoreRequestInportIdx = 2 # For the load request from local tiles. - s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_tile_load_request_pkt_queue.send.val - s.recv_from_tile_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy + s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_local_cgra_load_request_pkt_queue.send.val + s.recv_from_local_cgra_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy s.crossbar.recv[kLoadRequestInportIdx].msg @= \ NocPktType(controller_id, 0, @@ -154,7 +153,7 @@ def update_received_msg(): 0, 0, CMD_LOAD_REQUEST, - s.recv_from_tile_load_request_pkt_queue.send.msg.addr, + s.recv_from_local_cgra_load_request_pkt_queue.send.msg.addr, 0, 1, 0) @@ -162,8 +161,8 @@ def update_received_msg(): # For the store request from local tiles. - s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_tile_store_request_pkt_queue.send.val - s.recv_from_tile_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy + s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_local_cgra_store_request_pkt_queue.send.val + s.recv_from_local_cgra_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy s.crossbar.recv[kStoreRequestInportIdx].msg @= \ NocPktType(controller_id, 0, @@ -174,16 +173,16 @@ def update_received_msg(): 0, 0, CMD_STORE_REQUEST, - s.recv_from_tile_store_request_pkt_queue.send.msg.addr, - s.recv_from_tile_store_request_pkt_queue.send.msg.data, - s.recv_from_tile_store_request_pkt_queue.send.msg.predicate, + s.recv_from_local_cgra_store_request_pkt_queue.send.msg.addr, + s.recv_from_local_cgra_store_request_pkt_queue.send.msg.data, + s.recv_from_local_cgra_store_request_pkt_queue.send.msg.predicate, 0) # For the load response (i.e., the data towards other) from local memory. s.crossbar.recv[kLoadResponseInportIdx].val @= \ - s.recv_from_tile_load_response_pkt_queue.send.val - s.recv_from_tile_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy + s.recv_from_local_cgra_load_response_pkt_queue.send.val + s.recv_from_local_cgra_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy s.crossbar.recv[kLoadResponseInportIdx].msg @= \ NocPktType(controller_id, 0, @@ -196,9 +195,9 @@ def update_received_msg(): CMD_LOAD_RESPONSE, # Retrieves the load (from NoC) address from the message. # The addr information is embedded in the message. - s.recv_from_tile_load_response_pkt_queue.send.msg.addr, - s.recv_from_tile_load_response_pkt_queue.send.msg.data, - s.recv_from_tile_load_response_pkt_queue.send.msg.predicate, + s.recv_from_local_cgra_load_response_pkt_queue.send.msg.addr, + s.recv_from_local_cgra_load_response_pkt_queue.send.msg.data, + s.recv_from_local_cgra_load_response_pkt_queue.send.msg.predicate, 0) # TODO: For the other cmd types. @@ -208,43 +207,43 @@ def update_received_msg(): # def update_received_msg_from_noc(): # Initiates the signals. - s.send_to_tile_load_request_addr_queue.recv.val @= 0 - s.send_to_tile_store_request_addr_queue.recv.val @= 0 - s.send_to_tile_store_request_data_queue.recv.val @= 0 - s.send_to_tile_load_response_data_queue.recv.val @= 0 - s.send_to_tile_load_request_addr_queue.recv.msg @= CGRAAddrType() - s.send_to_tile_store_request_addr_queue.recv.msg @= CGRAAddrType() - s.send_to_tile_store_request_data_queue.recv.msg @= CGRADataType() - s.send_to_tile_load_response_data_queue.recv.msg @= CGRADataType() + s.send_to_local_cgra_load_request_addr_queue.recv.val @= 0 + s.send_to_local_cgra_store_request_addr_queue.recv.val @= 0 + s.send_to_local_cgra_store_request_data_queue.recv.val @= 0 + s.send_to_local_cgra_load_response_data_queue.recv.val @= 0 + s.send_to_local_cgra_load_request_addr_queue.recv.msg @= CGRAAddrType() + s.send_to_local_cgra_store_request_addr_queue.recv.msg @= CGRAAddrType() + s.send_to_local_cgra_store_request_data_queue.recv.msg @= CGRADataType() + s.send_to_local_cgra_load_response_data_queue.recv.msg @= CGRADataType() s.recv_from_noc.rdy @= 0 # For the load request from NoC. received_pkt = s.recv_from_noc.msg if s.recv_from_noc.val: if s.recv_from_noc.msg.cmd == CMD_LOAD_REQUEST: - if s.send_to_tile_load_request_addr_queue.recv.rdy: + if s.send_to_local_cgra_load_request_addr_queue.recv.rdy: s.recv_from_noc.rdy @= 1 - s.send_to_tile_load_request_addr_queue.recv.msg @= \ + s.send_to_local_cgra_load_request_addr_queue.recv.msg @= \ CGRAAddrType(received_pkt.addr) - s.send_to_tile_load_request_addr_queue.recv.val @= 1 + s.send_to_local_cgra_load_request_addr_queue.recv.val @= 1 elif s.recv_from_noc.msg.cmd == CMD_STORE_REQUEST: - if s.send_to_tile_store_request_addr_queue.recv.rdy & \ - s.send_to_tile_store_request_data_queue.recv.rdy: + if s.send_to_local_cgra_store_request_addr_queue.recv.rdy & \ + s.send_to_local_cgra_store_request_data_queue.recv.rdy: s.recv_from_noc.rdy @= 1 - s.send_to_tile_store_request_addr_queue.recv.msg @= \ + s.send_to_local_cgra_store_request_addr_queue.recv.msg @= \ CGRAAddrType(received_pkt.addr) - s.send_to_tile_store_request_data_queue.recv.msg @= \ + s.send_to_local_cgra_store_request_data_queue.recv.msg @= \ CGRADataType(received_pkt.data, received_pkt.predicate, 0, 0) - s.send_to_tile_store_request_addr_queue.recv.val @= 1 - s.send_to_tile_store_request_data_queue.recv.val @= 1 + s.send_to_local_cgra_store_request_addr_queue.recv.val @= 1 + s.send_to_local_cgra_store_request_data_queue.recv.val @= 1 elif s.recv_from_noc.msg.cmd == CMD_LOAD_RESPONSE: - if s.send_to_tile_load_response_data_queue.recv.rdy: + if s.send_to_local_cgra_load_response_data_queue.recv.rdy: s.recv_from_noc.rdy @= 1 - s.send_to_tile_load_response_data_queue.recv.msg @= \ + s.send_to_local_cgra_load_response_data_queue.recv.msg @= \ CGRADataType(received_pkt.data, received_pkt.predicate, 0, 0) - s.send_to_tile_load_response_data_queue.recv.val @= 1 + s.send_to_local_cgra_load_response_data_queue.recv.val @= 1 # else: # # TODO: Handle other cmd types. @@ -272,15 +271,15 @@ def update_sending_to_noc_msg(): s.crossbar.send[0].msg.payload) def line_trace(s): - send_to_ctrl_ring_pkt_str = "send_to_ctrl_ring_pkt: " + str(s.send_to_ctrl_ring_pkt.msg) - recv_from_tile_load_request_pkt_str = "recv_from_tile_load_request_pkt: " + str(s.recv_from_tile_load_request_pkt.msg) - recv_from_tile_load_response_pkt_str = "recv_from_tile_load_response_pkt: " + str(s.recv_from_tile_load_response_pkt.msg) - recv_from_tile_store_request_pkt_str = "recv_from_tile_store_request_pkt: " + str(s.recv_from_tile_store_request_pkt.msg) + send_to_intra_cgra_pkt_str = "send_to_intra_cgra_pkt: " + str(s.send_to_intra_cgra_pkt.msg) + recv_from_local_cgra_load_request_pkt_str = "recv_from_local_cgra_load_request_pkt: " + str(s.recv_from_local_cgra_load_request_pkt.msg) + recv_from_local_cgra_load_response_pkt_str = "recv_from_local_cgra_load_response_pkt: " + str(s.recv_from_local_cgra_load_response_pkt.msg) + recv_from_local_cgra_store_request_pkt_str = "recv_from_local_cgra_store_request_pkt: " + str(s.recv_from_local_cgra_store_request_pkt.msg) crossbar_str = "crossbar: {" + s.crossbar.line_trace() + "}" - send_to_tile_load_request_addr_str = "send_to_tile_load_request_addr: " + str(s.send_to_tile_load_request_addr.msg) - send_to_tile_store_request_addr_str = "send_to_tile_store_request_addr: " + str(s.send_to_tile_store_request_addr.msg) - send_to_tile_store_request_data_str = "send_to_tile_store_request_data: " + str(s.send_to_tile_store_request_data.msg) + send_to_local_cgra_load_request_addr_str = "send_to_local_cgra_load_request_addr: " + str(s.send_to_local_cgra_load_request_addr.msg) + send_to_local_cgra_store_request_addr_str = "send_to_local_cgra_store_request_addr: " + str(s.send_to_local_cgra_store_request_addr.msg) + send_to_local_cgra_store_request_data_str = "send_to_local_cgra_store_request_data: " + str(s.send_to_local_cgra_store_request_data.msg) recv_from_noc_str = "recv_from_noc_pkt: " + str(s.recv_from_noc.msg) send_to_noc_str = "send_to_noc_pkt: " + str(s.send_to_noc.msg) + "; rdy: " + str(s.send_to_noc.rdy) + "; val: " + str(s.send_to_noc.val) - return f'{send_to_ctrl_ring_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_tile_load_request_addr_str} || {send_to_tile_store_request_addr_str} || {send_to_tile_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' + return f'{send_to_intra_cgra_pkt_str} || {recv_from_local_cgra_load_request_pkt_str} || {recv_from_local_cgra_load_response_pkt_str} || {recv_from_local_cgra_store_request_pkt_str} || {crossbar_str} || {send_to_local_cgra_load_request_addr_str} || {send_to_local_cgra_store_request_addr_str} || {send_to_local_cgra_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' From c6c2747f6158848e6e27e1987a7198233d6ec214 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Mon, 20 Jan 2025 23:42:45 +0800 Subject: [PATCH 04/33] [P2] Installation and Data Preloading Issue #11: add new cmd for const --- lib/cmd_type.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/cmd_type.py b/lib/cmd_type.py index 34200b7..4e2338c 100644 --- a/lib/cmd_type.py +++ b/lib/cmd_type.py @@ -19,6 +19,9 @@ CMD_LOAD_REQUEST = 4 CMD_LOAD_RESPONSE = 5 CMD_STORE_REQUEST = 6 +CMD_CONST = 7 +CMD_CONST_CLEAR = 8 + CMD_SYMBOL_DICT = { CMD_LAUNCH: "(LAUNCH_KERNEL)", @@ -28,5 +31,7 @@ CMD_LOAD_REQUEST: "(LOAD_REQUEST)", CMD_LOAD_RESPONSE: "(LOAD_RESPONSE)", CMD_STORE_REQUEST: "(STORE_REQUEST)", + CMD_CONST: "(CONST_DATA)", + CMD_CONST_CLEAR: "(CLEAR_CONST_MEM)" } From 73c154f30adb9257c76a66a0f90cffb0e8906ca1 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Thu, 6 Feb 2025 22:24:32 +0800 Subject: [PATCH 05/33] Extend mk_ring_across_tiles_pkt to have controller_id, data, data_addr --- lib/messages.py | 125 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 11 deletions(-) diff --git a/lib/messages.py b/lib/messages.py index e6dd216..a69b72a 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -515,14 +515,117 @@ def str_func(s): namespace = {'__str__': str_func} ) -def mk_cpu_pkt(datatype_id, - # DataType - payload_nbits=16, predicate_nbits=1, bypass_nbits=1, - # CtrlPktType - nrouters = 4, ctrl_actions = 8, ctrl_mem_size = 4, ctrl_operations = 7, ctrl_fu_inports = 4, ctrl_fu_outports = 4, ctrl_tile_inports = 5, ctrl_tile_outports = 5, - prefix="CPUPkt"): - - if datatype_id == 0: - return mk_data(payload_nbits, predicate_nbits, bypass_nbits) - else: - return mk_ring_across_tiles_pkt(nrouters, ctrl_actions, ctrl_mem_size, ctrl_operations, ctrl_fu_inports, ctrl_fu_outports, ctrl_tile_inports, ctrl_tile_outports) \ No newline at end of file + +#========================================================================= +# Ring for delivering ctrl and data signals and commands across CGRAs +#========================================================================= + +def mk_intra_cgra_pkt(nrouters = 4, + cmd_nbits = 4, + cgraId_nbits = 4, + ctrl_actions = 8, + ctrl_mem_size = 16, + ctrl_operations = 64, + ctrl_fu_inports = 2, + ctrl_fu_outports = 2, + ctrl_tile_inports = 4, + ctrl_tile_outports = 4, + addr_nbits = 16, + data_nbits = 16, + predicate_nbits = 1, + prefix="PreloadCGRAsPacket"): + + CgraIdType = mk_bits(cgraId_nbits) + TileIdType = mk_bits(clog2(nrouters)) + opaque_nbits = 8 + OpqType = mk_bits(opaque_nbits) + CtrlActionType = mk_bits(clog2(ctrl_actions)) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + CtrlOperationType = mk_bits(clog2(ctrl_operations)) + CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) + CtrlTileOutType = mk_bits(clog2(ctrl_tile_outports + 1)) + num_routing_outports = ctrl_tile_outports + ctrl_fu_inports + CtrlRoutingOutType = mk_bits(clog2(num_routing_outports + 1)) + CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) + CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) + CtrlPredicateType = mk_bits(predicate_nbits) + VcIdType = mk_bits(1) + CmdType = mk_bits(cmd_nbits) + AddrType = mk_bits(addr_nbits) + DataType = mk_bits(data_nbits) + DataPredicateType = mk_bits(predicate_nbits) + + + new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_actions}_" \ + f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_"\ + f"{ctrl_fu_outports}_{ctrl_tile_inports}_{ctrl_tile_outports}" + + def str_func(s): + out_str = '(ctrl_operation)' + str(s.ctrl_operation) + out_str += '|(ctrl_fu_in)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_in[i])) + + out_str += '|(ctrl_predicate)' + out_str += str(int(s.ctrl_predicate)) + + out_str += '|(ctrl_routing_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_xbar_outport[i])) + + out_str += '|(ctrl_fu_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_xbar_outport[i])) + + out_str += '|(ctrl_predicate_in)' + for i in range(ctrl_tile_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_predicate_in[i])) + + return f"{s.srcTile}>{s.dstTile}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ + f"{out_str}" + + field_dict = {} + field_dict['cgraId'] = CgraIdType + field_dict['srcTile'] = TileIdType + field_dict['dstTile'] = TileIdType + field_dict['opaque'] = OpqType + field_dict['vc_id'] = VcIdType + field_dict['ctrl_action'] = CtrlActionType + field_dict['ctrl_addr'] = CtrlAddrType + field_dict['ctrl_operation'] = CtrlOperationType + # TODO: need fix to pair `predicate` with specific operation. + # The 'predicate' indicates whether the current operation is based on + # the partial predication or not. Note that 'predicate' is different + # from the following 'predicate_in', which contributes to the 'predicate' + # at the next cycle. + field_dict['ctrl_predicate'] = CtrlPredicateType + # The fu_in indicates the input register ID (i.e., operands) for the + # operation. + field_dict['ctrl_fu_in'] = [CtrlFuInType for _ in range(ctrl_fu_inports)] + + field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range( + num_routing_outports)] + field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range( + num_routing_outports)] + # I assume one tile supports single predicate during the entire execution + # time, as it is hard to distinguish predication for different operations + # (we automatically update, i.e., 'or', the predicate stored in the + # predicate register). This should be guaranteed by the compiler. + field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range( + ctrl_tile_inports)] + field_dict['cmd'] = CmdType + field_dict['addr'] = AddrType + field_dict['data'] = DataType + field_dict['data_predicate'] = DataPredicateType + + return mk_bitstruct(new_name, field_dict, + namespace = {'__str__': str_func} + ) \ No newline at end of file From c7c038022c4f4f71b786148761728303511323ae Mon Sep 17 00:00:00 2001 From: yuqisun Date: Thu, 6 Feb 2025 22:27:07 +0800 Subject: [PATCH 06/33] s.controller.send_to_ctrl_ring_pkt -> s.controller.send_to_intra_cgra_pkt --- systolic/CgraSystolicArrayRTL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/systolic/CgraSystolicArrayRTL.py b/systolic/CgraSystolicArrayRTL.py index 4dafd65..20ac24e 100644 --- a/systolic/CgraSystolicArrayRTL.py +++ b/systolic/CgraSystolicArrayRTL.py @@ -105,7 +105,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, for i in range(s.num_tiles): s.ctrl_ring.send[i] //= s.tile[i].recv_ctrl_pkt - s.ctrl_ring.recv[0] //= s.controller.send_to_ctrl_ring_pkt + s.ctrl_ring.recv[0] //= s.controller.send_to_intra_cgra_pkt for i in range(1, s.num_tiles): s.ctrl_ring.recv[i].val //= 0 s.ctrl_ring.recv[i].msg //= CtrlPktType() From 955033eb4d5c239df582274a7d1e1fb8bd3a74dc Mon Sep 17 00:00:00 2001 From: yuqisun Date: Thu, 6 Feb 2025 22:42:15 +0800 Subject: [PATCH 07/33] rename interfaces as per ControllerRTL --- controller/test/ControllerRTL_test.py | 45 ++++++++++++++++----------- lib/messages.py | 2 +- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/controller/test/ControllerRTL_test.py b/controller/test/ControllerRTL_test.py index 19d82a6..5147ab2 100644 --- a/controller/test/ControllerRTL_test.py +++ b/controller/test/ControllerRTL_test.py @@ -63,21 +63,21 @@ def construct(s, ControllerIdType, CpuPktType, CmdType, MsgType, idTo2d_map) # Connections - s.src_from_tile_load_request_pkt_en_rdy.send //= s.dut.recv_from_tile_load_request_pkt - s.src_from_tile_load_response_pkt_en_rdy.send //= s.dut.recv_from_tile_load_response_pkt - s.src_from_tile_store_request_pkt_en_rdy.send //= s.dut.recv_from_tile_store_request_pkt + s.src_from_tile_load_request_pkt_en_rdy.send //= s.dut.recv_from_local_cgra_load_request_pkt + s.src_from_tile_load_response_pkt_en_rdy.send //= s.dut.recv_from_local_cgra_load_response_pkt + s.src_from_tile_store_request_pkt_en_rdy.send //= s.dut.recv_from_local_cgra_store_request_pkt - s.dut.send_to_tile_load_request_addr //= s.sink_to_tile_load_request_addr_en_rdy.recv - s.dut.send_to_tile_load_response_data //= s.sink_to_tile_load_response_data_en_rdy.recv - s.dut.send_to_tile_store_request_addr //= s.sink_to_tile_store_request_addr_en_rdy.recv - s.dut.send_to_tile_store_request_data //= s.sink_to_tile_store_request_data_en_rdy.recv + s.dut.send_to_local_cgra_load_request_addr //= s.sink_to_tile_load_request_addr_en_rdy.recv + s.dut.send_to_local_cgra_load_response_data //= s.sink_to_tile_load_response_data_en_rdy.recv + s.dut.send_to_local_cgra_store_request_addr //= s.sink_to_tile_store_request_addr_en_rdy.recv + s.dut.send_to_local_cgra_store_request_data //= s.sink_to_tile_store_request_data_en_rdy.recv s.src_from_noc_val_rdy.send //= s.dut.recv_from_noc s.dut.send_to_noc //= s.sink_to_noc_val_rdy.recv s.dut.recv_from_cpu_pkt.val //= 0 s.dut.recv_from_cpu_pkt.msg //= CpuPktType() - s.dut.send_to_ctrl_ring_pkt.rdy //= 0 + s.dut.send_to_intra_cgra_pkt.rdy //= 0 def done(s): return s.src_from_tile_load_request_pkt_en_rdy.done() and \ @@ -141,6 +141,10 @@ def mk_src_pkts(nterminals, lst): DataType = mk_data(data_nbits, predicate_nbits) nterminals = 4 +cmd_nbits = 4 +CmdType = mk_bits(cmd_nbits) +cgraId_nbits = 4 +ControllerIdType = mk_bits(cgraId_nbits) CmdType = mk_bits(4) ControllerIdType = mk_bits(clog2(nterminals)) num_ctrl_actions = 8 @@ -178,17 +182,20 @@ def mk_src_pkts(nterminals, lst): # num_fu_outports, # num_tile_inports, # num_tile_outports) -# CpuPktType = mk_cpu_pkt(1, -# nterminals, -# num_ctrl_actions, -# ctrl_mem_size, -# num_ctrl_operations, -# num_fu_inports, -# num_fu_outports, -# num_tile_inports, -# num_tile_outports) - -CpuPktType = mk_cpu_pkt(0) + +CpuPktType = mk_intra_cgra_pkt(nterminals, + cmd_nbits, + cgraId_nbits, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + addr_nbits, + data_nbits, + predicate_nbits) Pkt = mk_multi_cgra_noc_pkt(nterminals, 1, addr_nbits = addr_nbits, diff --git a/lib/messages.py b/lib/messages.py index a69b72a..0383eb2 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -533,7 +533,7 @@ def mk_intra_cgra_pkt(nrouters = 4, addr_nbits = 16, data_nbits = 16, predicate_nbits = 1, - prefix="PreloadCGRAsPacket"): + prefix="IntraCGRAPacket"): CgraIdType = mk_bits(cgraId_nbits) TileIdType = mk_bits(clog2(nrouters)) From 7a8ca8606f64fabcb2e10f1484970bb655ac0b94 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Fri, 7 Feb 2025 06:37:03 +0800 Subject: [PATCH 08/33] rename prefix of mk_intra_cgra_pkt --- lib/messages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/messages.py b/lib/messages.py index 0383eb2..b4679c1 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -533,7 +533,7 @@ def mk_intra_cgra_pkt(nrouters = 4, addr_nbits = 16, data_nbits = 16, predicate_nbits = 1, - prefix="IntraCGRAPacket"): + prefix="IntraCgraPacket"): CgraIdType = mk_bits(cgraId_nbits) TileIdType = mk_bits(clog2(nrouters)) From 4bb2047ee68f8670bc3ba6c675242a7050ff89a1 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 8 Feb 2025 12:37:54 +0800 Subject: [PATCH 09/33] feed pkt to ctrl/const mems --- tile/TileRTL.py | 46 ++++++++++++++++++++++++++++++++------- tile/test/TileRTL_test.py | 42 +++++++++++++++++------------------ 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/tile/TileRTL.py b/tile/TileRTL.py index 232e9cd..e722e74 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -15,6 +15,9 @@ """ from pymtl3 import * + +from lib.cmd_type import CMD_CONFIG, CMD_CONST, CMD_CONST_CLEAR +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ..fu.single.AdderRTL import AdderRTL from ..fu.single.BranchRTL import BranchRTL @@ -33,12 +36,12 @@ class TileRTL(Component): - def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, + def construct(s, DataType, PredicateType, CpuPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, num_ctrl, total_steps, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, Fu = FlexibleFuRTL, - FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, - MemUnitRTL], const_list = None, id = 0): + FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, MemUnitRTL], + const_list = None, id = 0): # Constants. num_routing_xbar_inports = num_tile_inports @@ -57,7 +60,9 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, for _ in range (num_tile_outports)] # Ctrl. - s.recv_ctrl_pkt = RecvIfcRTL(CtrlPktType) + # todo + # Actually it contains both ctrl and const, change name later once component works. + s.recv_ctrl_pkt = RecvIfcRTL(CpuPktType) # Data. s.to_mem_raddr = SendIfcRTL(DataAddrType) @@ -69,8 +74,8 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.element = FlexibleFuRTL(DataType, PredicateType, CtrlSignalType, num_fu_inports, num_fu_outports, data_mem_size, FuList) - s.const_queue = ConstQueueRTL(DataType, const_list \ - if const_list != None else [DataType(0)]) + # s.const_queue = ConstQueueRTL(DataType, const_list \ + # if const_list != None else [DataType(0)]) s.routing_crossbar = CrossbarRTL(DataType, PredicateType, CtrlSignalType, num_routing_xbar_inports, @@ -80,11 +85,12 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, num_fu_xbar_inports, num_fu_xbar_outports, id, "fu") - s.ctrl_mem = CtrlMemDynamicRTL(CtrlPktType, CtrlSignalType, + s.ctrl_mem = CtrlMemDynamicRTL(CpuPktType, CtrlSignalType, ctrl_mem_size, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, num_ctrl, total_steps) + s.const_mem = ConstQueueDynamicRTL(DataType, data_mem_size) # The `tile_out_channel` indicates the outport channels that are # connected to the next tiles. s.tile_out_channel = [ChannelRTL(DataType, latency = 1) @@ -114,7 +120,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt # Constant queue. - s.element.recv_const //= s.const_queue.send_const + s.element.recv_const //= s.const_mem.send_const for i in range(len(FuList)): if FuList[i] == MemUnitRTL: @@ -175,6 +181,30 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.routing_crossbar.send_data[num_tile_outports + i] //= s.fu_in_or_link[i].recv_xbar s.fu_in_or_link[i].send //= s.fu_in_channel[i].recv + @update + def feed_pkt(): + s.ctrl_mem.recv_pkt.msg @= CpuPktType(0) + s.const_mem.recv_const.msg @= DataType(0) + s.ctrl_mem.recv_pkt.val @= 0 + s.const_mem.recv_const.val @= 0 + s.recv_ctrl_pkt.rdy @= 0 + + if s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.cmd_action == CMD_CONFIG): + s.ctrl_mem.recv_pkt.val @= 1 + s.ctrl_mem.recv_pkt.msg @= s.recv_ctrl_pkt.msg + s.recv_ctrl_pkt.rdy @= 1 + + elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.cmd_action == CMD_CONST): + s.const_mem.recv_pkt.val @= 1 + s.const_mem.recv_const.msg @= DataType(s.recv_ctrl_pkt.msg.data) + s.recv_ctrl_pkt.rdy @= 1 + + # todo + # Verify: Can reset be used to clear? + elif s.recv_ctrl_pkt.msg.cmd_action == CMD_CONST_CLEAR: + s.const_mem.reset() + + # Updates the configuration memory related signals. @update def update_opt(): diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 3da4bc0..ec4cc7e 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -48,25 +48,25 @@ class TestHarness(Component): def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, + CpuPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports, src_data, src_ctrl_pkt, sink_out): + num_tile_outports, src_data, src_cpu_pkt, sink_out): s.num_tile_inports = num_tile_inports s.num_tile_outports = num_tile_outports - s.src_ctrl_pkt = ValRdyTestSrcRTL(CtrlPktType, src_ctrl_pkt) + s.src_cpu_pkt = ValRdyTestSrcRTL(CpuPktType, src_cpu_pkt) s.src_data = [ValRdyTestSrcRTL(DataType, src_data[i]) for i in range(num_tile_inports)] s.sink_out = [ValRdyTestSinkRTL(DataType, sink_out[i]) for i in range(num_tile_outports)] - s.dut = DUT(DataType, PredicateType, CtrlPktType, CtrlSignalType, + s.dut = DUT(DataType, PredicateType, CpuPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, 3, 3, # 3 opts num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, FunctionUnit, FuList) - connect(s.src_ctrl_pkt.send, s.dut.recv_ctrl_pkt) + connect(s.src_cpu_pkt.send, s.dut.recv_ctrl_pkt) for i in range(num_tile_inports): connect(s.src_data[i].send, s.dut.recv_data[i]) @@ -128,43 +128,43 @@ def test_tile_alu(cmdline_opts): # 64-bit to satisfy the default bitwidth of vector FUs. DataType = mk_data(64, 1) PredicateType = mk_predicate(1, 1) - CtrlPktType = \ - mk_ring_across_tiles_pkt(num_terminals, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + CpuPktType = \ + mk_intra_cgra_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) CtrlSignalType = \ mk_separate_ctrl(num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports) - src_ctrl_pkt = [ + src_cpu_pkt = [ # src dst vc_id opq cmd_type addr operation predicate - CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, + CpuPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(3), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pick_register1, + CpuPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(1), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pick_register1, + CpuPktType(0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, + CpuPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -185,10 +185,10 @@ def test_tile_alu(cmdline_opts): [DataType(9, 1), DataType(4, 1)]] th = TestHarness(DUT, FunctionUnit, FuList, DataType, PredicateType, - CtrlPktType, CtrlSignalType, ctrl_mem_size, + CpuPktType, CtrlSignalType, ctrl_mem_size, data_mem_size, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, src_data, - src_ctrl_pkt, sink_out) + src_cpu_pkt, sink_out) th.elaborate() th.dut.set_metadata(VerilogVerilatorImportPass.vl_Wno_list, ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', From a8da0dcdbf45476775c33bc605b8659c457058b1 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 8 Feb 2025 12:50:31 +0800 Subject: [PATCH 10/33] feed pkt to ctrl/const mems --- tile/TileRTL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tile/TileRTL.py b/tile/TileRTL.py index e722e74..937caed 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -16,7 +16,7 @@ from pymtl3 import * -from lib.cmd_type import CMD_CONFIG, CMD_CONST, CMD_CONST_CLEAR +from ..lib.cmd_type import CMD_CONFIG, CMD_CONST, CMD_CONST_CLEAR from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ..fu.single.AdderRTL import AdderRTL From a592b11607bf42dd37a0b246b5d9174c8598c3c9 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 8 Feb 2025 23:34:58 +0800 Subject: [PATCH 11/33] Use ctrl mem and const mem in TileRTL --- lib/messages.py | 9 ++-- tile/TileRTL.py | 12 +++--- tile/test/TileRTL_test.py | 86 +++++++++++++++++++++++---------------- 3 files changed, 61 insertions(+), 46 deletions(-) diff --git a/lib/messages.py b/lib/messages.py index b4679c1..ae5a921 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -611,16 +611,13 @@ def str_func(s): # operation. field_dict['ctrl_fu_in'] = [CtrlFuInType for _ in range(ctrl_fu_inports)] - field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range( - num_routing_outports)] - field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range( - num_routing_outports)] + field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range(num_routing_outports)] + field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range(num_routing_outports)] # I assume one tile supports single predicate during the entire execution # time, as it is hard to distinguish predication for different operations # (we automatically update, i.e., 'or', the predicate stored in the # predicate register). This should be guaranteed by the compiler. - field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range( - ctrl_tile_inports)] + field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range(ctrl_tile_inports)] field_dict['cmd'] = CmdType field_dict['addr'] = AddrType field_dict['data'] = DataType diff --git a/tile/TileRTL.py b/tile/TileRTL.py index 937caed..b17bf82 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -117,7 +117,7 @@ def construct(s, DataType, PredicateType, CpuPktType, CtrlSignalType, # Connections. # Ctrl. - s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt + # s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt # Constant queue. s.element.recv_const //= s.const_mem.send_const @@ -189,20 +189,20 @@ def feed_pkt(): s.const_mem.recv_const.val @= 0 s.recv_ctrl_pkt.rdy @= 0 - if s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.cmd_action == CMD_CONFIG): + if s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONFIG): s.ctrl_mem.recv_pkt.val @= 1 s.ctrl_mem.recv_pkt.msg @= s.recv_ctrl_pkt.msg s.recv_ctrl_pkt.rdy @= 1 - elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.cmd_action == CMD_CONST): - s.const_mem.recv_pkt.val @= 1 + elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): + s.const_mem.recv_const.val @= 1 s.const_mem.recv_const.msg @= DataType(s.recv_ctrl_pkt.msg.data) s.recv_ctrl_pkt.rdy @= 1 # todo # Verify: Can reset be used to clear? - elif s.recv_ctrl_pkt.msg.cmd_action == CMD_CONST_CLEAR: - s.const_mem.reset() + # elif s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST_CLEAR: + # s.const_mem.reset() # Updates the configuration memory related signals. diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index ec4cc7e..25d7516 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -128,49 +128,67 @@ def test_tile_alu(cmdline_opts): # 64-bit to satisfy the default bitwidth of vector FUs. DataType = mk_data(64, 1) PredicateType = mk_predicate(1, 1) - CpuPktType = \ - mk_intra_cgra_pkt(num_terminals, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + # mk_intra_cgra_pkt(nrouters = 4, + # cmd_nbits = 4, + # cgraId_nbits = 4, + # ctrl_actions = 8, + # ctrl_mem_size = 16, + # ctrl_operations = 64, + # ctrl_fu_inports = 2, + # ctrl_fu_outports = 2, + # ctrl_tile_inports = 4, + # ctrl_tile_outports = 4, + # addr_nbits = 16, + # data_nbits = 16, + # predicate_nbits = 1, + # prefix = "IntraCgraPacket"): + CpuPktType = mk_intra_cgra_pkt(nrouters=num_terminals, + ctrl_mem_size=ctrl_mem_size, + ctrl_fu_inports=num_fu_inports, + ctrl_fu_outports=num_fu_outports, + ctrl_tile_inports=num_tile_inports, + ctrl_tile_outports=num_tile_outports, + ) CtrlSignalType = \ mk_separate_ctrl(num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports) src_cpu_pkt = [ - # src dst vc_id opq cmd_type addr operation predicate - CpuPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, - # routing_xbar_output + # cgraId, srcTile, dstTile, opaque, vc_id, ctrl_action, ctrl_addr, ctrl_operation, ctrl_predicate, ctrl_fu_in, + CpuPktType( 0, 0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, + # ctrl_routing_xbar_outport [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(3), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CpuPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pick_register1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(4), TileInType(1), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CpuPktType(0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pick_register1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CpuPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # ctrl_routing_predicate_in + [b1(1), b1(1), b1(1), b1(1)], + # cmd, addr, data, data_predicate + CMD_CONST, 0, 1, b1(1) + ), + # CpuPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pick_register1, + # # routing_xbar_output + # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # TileInType(4), TileInType(1), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + # CpuPktType(0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pick_register1, + # # routing_xbar_output + # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + # CpuPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, + # # routing_xbar_output + # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]) + ] src_data = [[DataType(3, 1)], [], [DataType(4, 1)], From ecd307b5d5244ff73fc699c24f62bad277cb6115 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Tue, 11 Feb 2025 23:36:16 +0800 Subject: [PATCH 12/33] set max cycle 20 for test --- lib/messages.py | 7 ++++ tile/TileRTL.py | 16 ++++----- tile/test/TileRTL_test.py | 76 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 86 insertions(+), 13 deletions(-) diff --git a/lib/messages.py b/lib/messages.py index ae5a921..4b264fe 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -539,8 +539,10 @@ def mk_intra_cgra_pkt(nrouters = 4, TileIdType = mk_bits(clog2(nrouters)) opaque_nbits = 8 OpqType = mk_bits(opaque_nbits) + # config or data or const CtrlActionType = mk_bits(clog2(ctrl_actions)) CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + # add, sub ... CtrlOperationType = mk_bits(clog2(ctrl_operations)) CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) CtrlTileOutType = mk_bits(clog2(ctrl_tile_outports + 1)) @@ -550,9 +552,12 @@ def mk_intra_cgra_pkt(nrouters = 4, CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) CtrlPredicateType = mk_bits(predicate_nbits) VcIdType = mk_bits(1) + # todo + # If not for read data/config same time, what is CmdType for? CmdType = mk_bits(cmd_nbits) AddrType = mk_bits(addr_nbits) DataType = mk_bits(data_nbits) + # todo DataPredicateType = mk_bits(predicate_nbits) @@ -621,6 +626,8 @@ def str_func(s): field_dict['cmd'] = CmdType field_dict['addr'] = AddrType field_dict['data'] = DataType + # todo + # predicate 用于控制是否有效? field_dict['data_predicate'] = DataPredicateType return mk_bitstruct(new_name, field_dict, diff --git a/tile/TileRTL.py b/tile/TileRTL.py index b17bf82..73ae1bf 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -27,7 +27,6 @@ from ..fu.single.MulRTL import MulRTL from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..mem.const.ConstQueueRTL import ConstQueueRTL from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL from ..noc.CrossbarRTL import CrossbarRTL from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL @@ -108,6 +107,8 @@ def construct(s, DataType, PredicateType, CpuPktType, CtrlSignalType, s.fu_in_or_link = [LinkOrRTL(DataType) for _ in range(num_fu_inports)] # Additional one register for partial predication + # todo + # what is this? s.reg_predicate = RegisterRTL(PredicateType) # Signals indicating whether certain modules already done their jobs. @@ -122,6 +123,7 @@ def construct(s, DataType, PredicateType, CpuPktType, CtrlSignalType, # Constant queue. s.element.recv_const //= s.const_mem.send_const + for i in range(len(FuList)): if FuList[i] == MemUnitRTL: s.to_mem_raddr //= s.element.to_mem_raddr[i] @@ -196,14 +198,11 @@ def feed_pkt(): elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): s.const_mem.recv_const.val @= 1 - s.const_mem.recv_const.msg @= DataType(s.recv_ctrl_pkt.msg.data) + # todo + # input data with 64 bits not work? + s.const_mem.recv_const.msg.payload @= s.recv_ctrl_pkt.msg.data s.recv_ctrl_pkt.rdy @= 1 - # todo - # Verify: Can reset be used to clear? - # elif s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST_CLEAR: - # s.const_mem.reset() - # Updates the configuration memory related signals. @update @@ -252,5 +251,6 @@ def line_trace(s): fu_in_channel_send_str = "|".join([str(x.send.msg) for x in s.fu_in_channel]) out_str = "|".join(["(" + str(x.msg.payload) + ", predicate: " + str(x.msg.predicate) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) ctrl_mem = s.ctrl_mem.line_trace() - return f"tile_inports: {recv_str} => [routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || tile_out_channels: {tile_out_channel_recv_str} => {tile_out_channel_send_str} || fu_in_channels: {fu_in_channel_recv_str} => {fu_in_channel_send_str}] => tile_outports: {out_str} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem} ## " + const_mem = s.const_mem.line_trace() + return f"tile_inports: {recv_str} => [routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || tile_out_channels: {tile_out_channel_recv_str} => {tile_out_channel_send_str} || fu_in_channels: {fu_in_channel_recv_str} => {fu_in_channel_send_str}] => tile_outports: {out_str} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 25d7516..bce6962 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -95,6 +95,7 @@ def line_trace(s): return s.dut.line_trace() def test_tile_alu(cmdline_opts): + cmdline_opts['max_cycles'] = 20 num_tile_inports = 4 num_tile_outports = 4 num_fu_inports = 4 @@ -126,7 +127,8 @@ def test_tile_alu(cmdline_opts): VectorMulComboRTL, VectorAdderComboRTL] # 64-bit to satisfy the default bitwidth of vector FUs. - DataType = mk_data(64, 1) + data_nbits = 64 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) # mk_intra_cgra_pkt(nrouters = 4, # cmd_nbits = 4, @@ -148,6 +150,7 @@ def test_tile_alu(cmdline_opts): ctrl_fu_outports=num_fu_outports, ctrl_tile_inports=num_tile_inports, ctrl_tile_outports=num_tile_outports, + data_nbits = data_nbits, ) CtrlSignalType = \ mk_separate_ctrl(num_ctrl_operations, num_fu_inports, @@ -157,16 +160,77 @@ def test_tile_alu(cmdline_opts): # cgraId, srcTile, dstTile, opaque, vc_id, ctrl_action, ctrl_addr, ctrl_operation, ctrl_predicate, ctrl_fu_in, CpuPktType( 0, 0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, # ctrl_routing_xbar_outport - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + [# to fu + TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # to tile + # why this maps with tile_inports: (0000000000000003.1.0.0, val: 1, rdy: 0)|(0000000000000000.0.0.0, val: 0, rdy: 0)|(0000000000000004? TileInType(4), TileInType(3), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], # ctrl_routing_predicate_in [b1(1), b1(1), b1(1), b1(1)], - # cmd, addr, data, data_predicate - CMD_CONST, 0, 1, b1(1) + # cmd, addr, data, data_predicate + # todo + # Shouldn't data use DataType? + 0, 0, 1, b1(1) ), + CpuPktType(0, 0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, + # ctrl_routing_xbar_outport + [ # to fu + TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # to tile + TileInType(4), TileInType(3), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # ctrl_routing_predicate_in + [b1(1), b1(1), b1(1), b1(1)], + # cmd, addr, data, data_predicate + 0, 0, 2, b1(1) + ), + CpuPktType(0, 0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pick_register0, + # ctrl_routing_xbar_outport + [ # to fu + TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # to tile + TileInType(4), TileInType(7), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # ctrl_routing_predicate_in + [b1(1), b1(1), b1(1), b1(1)], + # cmd, addr, data, data_predicate + 0, 1, 0, b1(1) + ), + # CpuPktType(0, 0, 0, 0, 0, CMD_CONFIG, 2, OPT_SUB, b1(0), pick_register0, + # # ctrl_routing_xbar_outport + # [ # to fu + # TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # # to tile + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # # ctrl_routing_predicate_in + # [b1(1), b1(1), b1(1), b1(1)], + # # cmd, addr, data, data_predicate + # CMD_CONST, 1, DataType(3, 1), b1(1) + # ), + # CpuPktType(0, 0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register0, + # # ctrl_routing_xbar_outport + # [ # to fu + # TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # # to tile + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # # ctrl_routing_predicate_in + # [b1(1), b1(1), b1(1), b1(1)], + # # cmd, addr, data, data_predicate + # CMD_CONST, 2, DataType(4, 1), b1(1) + # ), # CpuPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_ADD, b1(0), pick_register1, # # routing_xbar_output # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), @@ -212,5 +276,7 @@ def test_tile_alu(cmdline_opts): ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) - run_sim(th) + # todo + # cmdline_ops 不放在这里不起作用 + run_sim(th, cmdline_opts) From ea4a36726efebe3dcb9bd7e792f414f4d9ee8705 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Wed, 12 Feb 2025 20:44:56 +0800 Subject: [PATCH 13/33] add comments --- lib/messages.py | 1 + tile/TileRTL.py | 2 +- tile/test/TileRTL_test.py | 10 +++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/messages.py b/lib/messages.py index 4b264fe..055ef9b 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -550,6 +550,7 @@ def mk_intra_cgra_pkt(nrouters = 4, CtrlRoutingOutType = mk_bits(clog2(num_routing_outports + 1)) CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) + # 看是否关心传进来的op的 0和1 CtrlPredicateType = mk_bits(predicate_nbits) VcIdType = mk_bits(1) # todo diff --git a/tile/TileRTL.py b/tile/TileRTL.py index 73ae1bf..9c2f943 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -191,11 +191,11 @@ def feed_pkt(): s.const_mem.recv_const.val @= 0 s.recv_ctrl_pkt.rdy @= 0 + if s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONFIG): s.ctrl_mem.recv_pkt.val @= 1 s.ctrl_mem.recv_pkt.msg @= s.recv_ctrl_pkt.msg s.recv_ctrl_pkt.rdy @= 1 - elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): s.const_mem.recv_const.val @= 1 # todo diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index bce6962..18baf67 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -160,12 +160,16 @@ def test_tile_alu(cmdline_opts): # cgraId, srcTile, dstTile, opaque, vc_id, ctrl_action, ctrl_addr, ctrl_operation, ctrl_predicate, ctrl_fu_in, CpuPktType( 0, 0, 0, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register0, # ctrl_routing_xbar_outport - [# to fu + [ + # to tile + # why this maps with tile_inports: (0000000000000003.1.0.0, val: 1, rdy: 0)|(0000000000000000.0.0.0, val: 0, rdy: 0)|(0000000000000004? + # 第4个inport到 tile out 第一个(routing xbar 第五个) + # 从1开始,0代表没有 inport TileInType(0), TileInType(0), TileInType(0), TileInType(0), - # to tile - # why this maps with tile_inports: (0000000000000003.1.0.0, val: 1, rdy: 0)|(0000000000000000.0.0.0, val: 0, rdy: 0)|(0000000000000004? + # to fu TileInType(4), TileInType(3), TileInType(0), TileInType(0)], # fu_xbar_output + # fu out 对应的哪个in [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], # ctrl_routing_predicate_in From d05b61ea46c3033ff0ce8aa09e467cdf51b8653e Mon Sep 17 00:00:00 2001 From: yuqisun Date: Wed, 12 Feb 2025 23:46:52 +0800 Subject: [PATCH 14/33] looks test cannot end now --- lib/messages.py | 123 ++++++++++++++++++++++++-------------- tile/TileRTL.py | 33 ++++++++-- tile/test/TileRTL_test.py | 58 ++++++++++++++---- 3 files changed, 151 insertions(+), 63 deletions(-) diff --git a/lib/messages.py b/lib/messages.py index 1d5e013..2015475 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -711,50 +711,43 @@ def str_func(s): #========================================================================= def mk_intra_cgra_pkt(nrouters = 4, - cmd_nbits = 4, - cgraId_nbits = 4, - ctrl_actions = 8, - ctrl_mem_size = 16, - ctrl_operations = 64, - ctrl_fu_inports = 2, - ctrl_fu_outports = 2, - ctrl_tile_inports = 4, - ctrl_tile_outports = 4, - addr_nbits = 16, - data_nbits = 16, - predicate_nbits = 1, - prefix="IntraCgraPacket"): - - CgraIdType = mk_bits(cgraId_nbits) - TileIdType = mk_bits(clog2(nrouters)) - opaque_nbits = 8 + ctrl_actions = 8, + ctrl_mem_size = 4, + ctrl_operations = 7, + ctrl_fu_inports = 4, + ctrl_fu_outports = 4, + ctrl_tile_inports = 5, + ctrl_tile_outports = 5, + ctrl_registers_per_reg_bank = 16, + data_nbits = 16, + prefix="IntraCgraPacket"): + + IdType = mk_bits(clog2(nrouters)) + opaque_nbits = 1 OpqType = mk_bits(opaque_nbits) - # config or data or const CtrlActionType = mk_bits(clog2(ctrl_actions)) CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) - # add, sub ... CtrlOperationType = mk_bits(clog2(ctrl_operations)) - CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) + CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) CtrlTileOutType = mk_bits(clog2(ctrl_tile_outports + 1)) num_routing_outports = ctrl_tile_outports + ctrl_fu_inports CtrlRoutingOutType = mk_bits(clog2(num_routing_outports + 1)) CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) - # 看是否关心传进来的op的 0和1 - CtrlPredicateType = mk_bits(predicate_nbits) + CtrlPredicateType = mk_bits(1) + vector_factor_power_nbits = 3 + CtrlVectorFactorPowerType = mk_bits(vector_factor_power_nbits) + + # 3 inports of register file bank. + CtrlRegFromType = mk_bits(2) + CtrlRegIdxType = mk_bits(clog2(ctrl_registers_per_reg_bank)) VcIdType = mk_bits(1) - # todo - # If not for read data/config same time, what is CmdType for? - CmdType = mk_bits(cmd_nbits) - AddrType = mk_bits(addr_nbits) DataType = mk_bits(data_nbits) - # todo - DataPredicateType = mk_bits(predicate_nbits) - new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_actions}_" \ - f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_"\ - f"{ctrl_fu_outports}_{ctrl_tile_inports}_{ctrl_tile_outports}" + f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_" \ + f"{ctrl_fu_outports}_{ctrl_tile_inports}_" \ + f"{ctrl_tile_outports}_{ctrl_registers_per_reg_bank}" def str_func(s): out_str = '(ctrl_operation)' + str(s.ctrl_operation) @@ -785,13 +778,42 @@ def str_func(s): out_str += '-' out_str += str(int(s.ctrl_routing_predicate_in[i])) - return f"{s.srcTile}>{s.dstTile}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ + out_str += '|(ctrl_vector_factor_power)' + out_str += str(int(s.ctrl_vector_factor_power)) + + out_str += '|(ctrl_is_last_ctrl)' + out_str += str(int(s.ctrl_is_last_ctrl)) + + out_str += '|(ctrl_read_reg_from)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_read_reg_from[i])) + + out_str += '|(write_reg_from)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_write_reg_from[i])) + + out_str += '|(write_reg_idx)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_write_reg_idx[i])) + + out_str += '|(read_reg_idx)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_read_reg_idx[i])) + + return f"{s.src}>{s.dst}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ f"{out_str}" field_dict = {} - field_dict['cgraId'] = CgraIdType - field_dict['srcTile'] = TileIdType - field_dict['dstTile'] = TileIdType + field_dict['src'] = IdType + field_dict['dst'] = IdType field_dict['opaque'] = OpqType field_dict['vc_id'] = VcIdType field_dict['ctrl_action'] = CtrlActionType @@ -807,19 +829,32 @@ def str_func(s): # operation. field_dict['ctrl_fu_in'] = [CtrlFuInType for _ in range(ctrl_fu_inports)] - field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range(num_routing_outports)] - field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range(num_routing_outports)] + field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range( + num_routing_outports)] + field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range( + num_routing_outports)] + + field_dict['data'] = DataType + # I assume one tile supports single predicate during the entire execution # time, as it is hard to distinguish predication for different operations # (we automatically update, i.e., 'or', the predicate stored in the # predicate register). This should be guaranteed by the compiler. - field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range(ctrl_tile_inports)] - field_dict['cmd'] = CmdType - field_dict['addr'] = AddrType - field_dict['data'] = DataType - # todo - # predicate 用于控制是否有效? - field_dict['data_predicate'] = DataPredicateType + field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range( + ctrl_tile_inports)] + + field_dict['ctrl_vector_factor_power'] = CtrlVectorFactorPowerType + + field_dict['ctrl_is_last_ctrl'] = b1 + + # Register file related signals. + # Indicates whether to write data into the register bank, and the + # corresponding inport. + field_dict['ctrl_write_reg_from'] = [CtrlRegFromType for _ in range(ctrl_fu_inports)] + field_dict['ctrl_write_reg_idx'] = [CtrlRegIdxType for _ in range(ctrl_fu_inports)] + # Indicates whether to read data from the register bank. + field_dict['ctrl_read_reg_from'] = [b1 for _ in range(ctrl_fu_inports)] + field_dict['ctrl_read_reg_idx'] = [CtrlRegIdxType for _ in range(ctrl_fu_inports)] return mk_bitstruct(new_name, field_dict, namespace = {'__str__': str_func} diff --git a/tile/TileRTL.py b/tile/TileRTL.py index aef2bbc..c395c2f 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -15,6 +15,9 @@ """ from pymtl3 import * + +from ..lib.cmd_type import CMD_CONFIG, CMD_CONST +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ..fu.single.AdderRTL import AdderRTL from ..fu.single.BranchRTL import BranchRTL @@ -71,8 +74,9 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.element = FlexibleFuRTL(DataType, PredicateType, CtrlSignalType, num_fu_inports, num_fu_outports, data_mem_size, FuList) - s.const_queue = ConstQueueRTL(DataType, const_list \ - if const_list != None else [DataType(0)]) + # s.const_queue = ConstQueueRTL(DataType, const_list \ + # if const_list != None else [DataType(0)]) + s.const_mem = ConstQueueDynamicRTL(DataType, data_mem_size) s.routing_crossbar = CrossbarRTL(DataType, PredicateType, CtrlSignalType, num_routing_xbar_inports, @@ -110,11 +114,10 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, # Connections. # Ctrl. - s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt + # s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt # Constant queue. - # FIXME: @yuqi, https://github.com/tancheng/VectorCGRA/issues/11 - s.element.recv_const //= s.const_queue.send_const + s.element.recv_const //= s.const_mem.send_const for i in range(len(FuList)): if FuList[i] == MemUnitRTL: @@ -180,6 +183,23 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.element.recv_in[i] s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg + @update + def feed_pkt(): + s.ctrl_mem.recv_pkt.msg @= CtrlPktType(0) + s.const_mem.recv_const.msg @= DataType(0) + s.ctrl_mem.recv_pkt.val @= 0 + s.const_mem.recv_const.val @= 0 + s.recv_ctrl_pkt.rdy @= 0 + + if s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONFIG): + s.ctrl_mem.recv_pkt.val @= 1 + s.ctrl_mem.recv_pkt.msg @= s.recv_ctrl_pkt.msg + s.recv_ctrl_pkt.rdy @= 1 + elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): + s.const_mem.recv_const.val @= 1 + s.const_mem.recv_const.msg.payload @= s.recv_ctrl_pkt.msg.data + s.recv_ctrl_pkt.rdy @= 1 + # Updates the configuration memory related signals. @update def update_opt(): @@ -224,5 +244,6 @@ def line_trace(s): tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) out_str = "|".join(["(" + str(x.msg.payload) + ", predicate: " + str(x.msg.predicate) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) ctrl_mem = s.ctrl_mem.line_trace() - return f"tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem} ## " + const_mem = s.const_mem.line_trace() + return f"tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 902e19f..efcdbd4 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -131,15 +131,19 @@ def test_tile_alu(cmdline_opts): # 64-bit to satisfy the default bitwidth of vector FUs. DataType = mk_data(64, 1) PredicateType = mk_predicate(1, 1) + data_nbits = 64 + CtrlPktType = \ - mk_ring_across_tiles_pkt(num_terminals, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + mk_intra_cgra_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, @@ -176,32 +180,60 @@ def test_tile_alu(cmdline_opts): # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] src_ctrl_pkt = [ # src dst vc_id opq cmd_type addr operation predicate - CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pick_register0, + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pick_register0, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(3), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + 0), CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(1), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + 0), + # for const: 5, 0, 7 + # CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, + # # routing_xbar_output + # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # 5), + # CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, + # # routing_xbar_output + # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # 0), + # CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, + # # routing_xbar_output + # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # # fu_xbar_output + # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + # 7), CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + 0)] src_data = [[DataType(3, 1)], [], [DataType(4, 1)], [DataType(5, 1), DataType(7, 1)]] - src_const = [DataType(5, 1), DataType(0, 0), DataType(7, 1)] + # src_const = [DataType(5, 1), DataType(0, 0), DataType(7, 1)] sink_out = [ # 7 - 3 = 4. [DataType(4, 1)], From 9e1a87f472eb643a2fcc993a6f6ac138af537a89 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Thu, 13 Feb 2025 08:15:09 +0800 Subject: [PATCH 15/33] Update done() method. --- tile/test/TileRTL_test.py | 53 ++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index efcdbd4..b7e48a2 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -87,9 +87,11 @@ def done(s): if not s.src_data[i].done(): return False - for i in range(s.num_tile_outports): - if not s.sink_out[i].done(): - return False + # for i in range(s.num_tile_outports): + # print(f'>>>$$$ i: {i}') + # if not s.sink_out[i].done(): + # print(f'$$$ i: {i}') + # return False return True @@ -196,31 +198,23 @@ def test_tile_alu(cmdline_opts): [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], 0), - # for const: 5, 0, 7 - # CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, - # # routing_xbar_output - # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # # fu_xbar_output - # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - # 5), - # CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, - # # routing_xbar_output - # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # # fu_xbar_output - # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - # 0), - # CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, - # # routing_xbar_output - # [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - # TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # # fu_xbar_output - # [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - # 7), + # for const: 5, 7 + CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + 5), + CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, + # routing_xbar_output + [TileInType(0), TileInType(0), TileInType(0), TileInType(0), + TileInType(0), TileInType(0), TileInType(0), TileInType(0)], + # fu_xbar_output + [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], + 7), CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), @@ -253,5 +247,6 @@ def test_tile_alu(cmdline_opts): ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) - run_sim(th) + cmdline_opts['max_cycles'] = 20 + run_sim(th, cmdline_opts) From 04d3818118dc01ab01d8a81c3d5ec058688ff277 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Thu, 13 Feb 2025 08:17:38 +0800 Subject: [PATCH 16/33] align comment and value --- tile/test/TileRTL_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index b7e48a2..754c5a4 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -182,7 +182,7 @@ def test_tile_alu(cmdline_opts): # FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] src_ctrl_pkt = [ # src dst vc_id opq cmd_type addr operation predicate - CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pick_register0, + CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_ADD, b1(0), pick_register0, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(3), TileInType(0), TileInType(0)], From 8fc12c7707d1df1b2fcc1d373d2832c3cb9a4cbf Mon Sep 17 00:00:00 2001 From: yuqisun Date: Fri, 14 Feb 2025 07:42:20 +0800 Subject: [PATCH 17/33] Add LAUNCH condition to transfer to CtrlMemDynamicRTL to start iterate. --- tile/TileRTL.py | 10 ++++++---- tile/test/TileRTL_test.py | 8 +++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tile/TileRTL.py b/tile/TileRTL.py index c395c2f..d605430 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -16,6 +16,7 @@ from pymtl3 import * +from lib.cmd_type import CMD_LAUNCH from ..lib.cmd_type import CMD_CONFIG, CMD_CONST from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL @@ -191,14 +192,14 @@ def feed_pkt(): s.const_mem.recv_const.val @= 0 s.recv_ctrl_pkt.rdy @= 0 - if s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONFIG): + if s.recv_ctrl_pkt.val & ((s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONFIG) | (s.recv_ctrl_pkt.msg.ctrl_action == CMD_LAUNCH)): s.ctrl_mem.recv_pkt.val @= 1 s.ctrl_mem.recv_pkt.msg @= s.recv_ctrl_pkt.msg - s.recv_ctrl_pkt.rdy @= 1 + s.recv_ctrl_pkt.rdy @= s.ctrl_mem.recv_pkt.rdy elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): s.const_mem.recv_const.val @= 1 s.const_mem.recv_const.msg.payload @= s.recv_ctrl_pkt.msg.data - s.recv_ctrl_pkt.rdy @= 1 + s.recv_ctrl_pkt.rdy @= s.const_mem.recv_const.rdy # Updates the configuration memory related signals. @update @@ -239,11 +240,12 @@ def already_done(): # Line trace def line_trace(s): recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) + send_str = "|".join([str(x.msg) for x in s.send_data]) tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) out_str = "|".join(["(" + str(x.msg.payload) + ", predicate: " + str(x.msg.predicate) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) ctrl_mem = s.ctrl_mem.line_trace() const_mem = s.const_mem.line_trace() - return f"tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " + return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 754c5a4..3477147 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -87,11 +87,9 @@ def done(s): if not s.src_data[i].done(): return False - # for i in range(s.num_tile_outports): - # print(f'>>>$$$ i: {i}') - # if not s.sink_out[i].done(): - # print(f'$$$ i: {i}') - # return False + for i in range(s.num_tile_outports): + if not s.sink_out[i].done(): + return False return True From 1688ee1e9c75e17cf80607adc014449dc5a0c53e Mon Sep 17 00:00:00 2001 From: yuqisun Date: Fri, 14 Feb 2025 22:34:38 +0800 Subject: [PATCH 18/33] format code --- tile/TileRTL.py | 27 ++++++++----------------- tile/test/TileRTL_test.py | 42 +++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/tile/TileRTL.py b/tile/TileRTL.py index d605430..5b6c57c 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -14,27 +14,24 @@ Date : Nov 26, 2024 """ -from pymtl3 import * - -from lib.cmd_type import CMD_LAUNCH -from ..lib.cmd_type import CMD_CONFIG, CMD_CONST -from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ..fu.single.AdderRTL import AdderRTL from ..fu.single.BranchRTL import BranchRTL -from ..fu.single.PhiRTL import PhiRTL from ..fu.single.CompRTL import CompRTL from ..fu.single.MemUnitRTL import MemUnitRTL from ..fu.single.MulRTL import MulRTL +from ..fu.single.PhiRTL import PhiRTL from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..mem.const.ConstQueueRTL import ConstQueueRTL +from ..lib.cmd_type import * +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL from ..noc.CrossbarRTL import CrossbarRTL -from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL from ..noc.LinkOrRTL import LinkOrRTL +from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL from ..rf.RegisterRTL import RegisterRTL -from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL + class TileRTL(Component): @@ -43,8 +40,8 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, num_registers_per_reg_bank = 16, Fu = FlexibleFuRTL, - FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, - MemUnitRTL], const_list = None, id = 0): + FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, MemUnitRTL], + const_list = None, id = 0): # Constants. num_routing_xbar_inports = num_tile_inports @@ -75,8 +72,6 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.element = FlexibleFuRTL(DataType, PredicateType, CtrlSignalType, num_fu_inports, num_fu_outports, data_mem_size, FuList) - # s.const_queue = ConstQueueRTL(DataType, const_list \ - # if const_list != None else [DataType(0)]) s.const_mem = ConstQueueDynamicRTL(DataType, data_mem_size) s.routing_crossbar = CrossbarRTL(DataType, PredicateType, CtrlSignalType, @@ -113,10 +108,6 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.fu_crossbar_done = Wire(1) s.routing_crossbar_done = Wire(1) - # Connections. - # Ctrl. - # s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt - # Constant queue. s.element.recv_const //= s.const_mem.send_const @@ -175,8 +166,6 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.fu_crossbar.send_data[num_tile_outports + i] //= \ s.register_cluster.recv_data_from_fu_crossbar[i] - # FIXME: @yuqi, https://github.com/tancheng/VectorCGRA/issues/11 - # The const can be delivered here. s.register_cluster.recv_data_from_const[i].msg //= DataType() s.register_cluster.recv_data_from_const[i].val //= 0 diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 3477147..445ef11 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -10,36 +10,30 @@ Date : Nov 26, 2024 """ -from pymtl3 import * +from pymtl3.passes.backends.verilog import (VerilogVerilatorImportPass) from pymtl3.stdlib.test_utils import (run_sim, config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) + from ..TileRTL import TileRTL +from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ...fu.single.AdderRTL import AdderRTL +from ...fu.single.BranchRTL import BranchRTL +from ...fu.single.CompRTL import CompRTL +from ...fu.single.LogicRTL import LogicRTL +from ...fu.single.MemUnitRTL import MemUnitRTL +from ...fu.single.MulRTL import MulRTL +from ...fu.single.PhiRTL import PhiRTL +from ...fu.single.SelRTL import SelRTL +from ...fu.single.ShifterRTL import ShifterRTL from ...fu.triple.ThreeMulAdderShifterRTL import ThreeMulAdderShifterRTL -from ...fu.triple.ThreeMulAdderShifterRTL import ThreeMulAdderShifterRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.vector.VectorMulComboRTL import VectorMulComboRTL -from ...fu.vector.VectorAdderComboRTL import VectorAdderComboRTL -from ...fu.vector.VectorAllReduceRTL import VectorAllReduceRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.MulRTL import MulRTL -from ...fu.single.SelRTL import SelRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...fu.single.LogicRTL import LogicRTL -from ...fu.single.PhiRTL import PhiRTL -from ...fu.single.CompRTL import CompRTL -from ...fu.single.BranchRTL import BranchRTL -from ...fu.single.NahRTL import NahRTL -from ...fu.triple.ThreeMulAdderShifterRTL import ThreeMulAdderShifterRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL +from ...fu.vector.VectorAdderComboRTL import VectorAdderComboRTL +from ...fu.vector.VectorMulComboRTL import VectorMulComboRTL from ...lib.basic.val_rdy.SinkRTL import SinkRTL as ValRdyTestSinkRTL -from ...lib.messages import * +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL from ...lib.cmd_type import * +from ...lib.messages import * from ...lib.opt_type import * -from ...mem.ctrl.CtrlMemRTL import CtrlMemRTL + #------------------------------------------------------------------------- # Test harness @@ -225,7 +219,7 @@ def test_tile_alu(cmdline_opts): [], [DataType(4, 1)], [DataType(5, 1), DataType(7, 1)]] - # src_const = [DataType(5, 1), DataType(0, 0), DataType(7, 1)] + sink_out = [ # 7 - 3 = 4. [DataType(4, 1)], From c6594ac6f9e5870e142743a25c3d368f7d740d68 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Fri, 14 Feb 2025 22:47:56 +0800 Subject: [PATCH 19/33] revert controller rtl, yufei will add code for this --- controller/ControllerRTL.py | 141 +++++++++++++------------- controller/test/ControllerRTL_test.py | 61 ++++------- lib/cmd_type.py | 4 +- 3 files changed, 92 insertions(+), 114 deletions(-) diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index 850bc86..bdf953d 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -20,11 +20,11 @@ class ControllerRTL(Component): - def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, + def construct(s, ControllerIdType, CmdType, CtrlPktType, NocPktType, CGRADataType, CGRAAddrType, multi_cgra_rows, multi_cgra_columns, controller_id, controller2addr_map, idTo2d_map): - # Checks for ring network. + assert(multi_cgra_columns >= multi_cgra_rows) # Used for calculating the x/y position. @@ -36,31 +36,31 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, s.recv_from_noc = RecvIfcRTL(NocPktType) s.send_to_noc = SendIfcRTL(NocPktType) - s.recv_from_cpu_pkt = RecvIfcRTL(CpuPktType) - s.send_to_intra_cgra_pkt = SendIfcRTL(CpuPktType) + s.recv_from_cpu_ctrl_pkt = RecvIfcRTL(CtrlPktType) + s.send_to_ctrl_ring_ctrl_pkt = SendIfcRTL(CtrlPktType) # Request from/to tiles. - s.recv_from_local_cgra_load_request_pkt = RecvIfcRTL(NocPktType) - s.recv_from_local_cgra_load_response_pkt = RecvIfcRTL(NocPktType) - s.recv_from_local_cgra_store_request_pkt = RecvIfcRTL(NocPktType) + s.recv_from_tile_load_request_pkt = RecvIfcRTL(NocPktType) + s.recv_from_tile_load_response_pkt = RecvIfcRTL(NocPktType) + s.recv_from_tile_store_request_pkt = RecvIfcRTL(NocPktType) - s.send_to_local_cgra_load_request_addr = SendIfcRTL(CGRAAddrType) - s.send_to_local_cgra_load_response_data = SendIfcRTL(CGRADataType) - s.send_to_local_cgra_store_request_addr = SendIfcRTL(CGRAAddrType) - s.send_to_local_cgra_store_request_data = SendIfcRTL(CGRADataType) + s.send_to_tile_load_request_addr = SendIfcRTL(CGRAAddrType) + s.send_to_tile_load_response_data = SendIfcRTL(CGRADataType) + s.send_to_tile_store_request_addr = SendIfcRTL(CGRAAddrType) + s.send_to_tile_store_request_data = SendIfcRTL(CGRADataType) # Component - s.recv_from_local_cgra_load_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) - s.recv_from_local_cgra_load_response_pkt_queue = ChannelRTL(NocPktType, latency = 1) - s.recv_from_local_cgra_store_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) + s.recv_from_tile_load_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) + s.recv_from_tile_load_response_pkt_queue = ChannelRTL(NocPktType, latency = 1) + s.recv_from_tile_store_request_pkt_queue = ChannelRTL(NocPktType, latency = 1) - s.send_to_local_cgra_load_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) - s.send_to_local_cgra_load_response_data_queue = ChannelRTL(CGRADataType, latency = 1) - s.send_to_local_cgra_store_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) - s.send_to_local_cgra_store_request_data_queue = ChannelRTL(CGRADataType, latency = 1) + s.send_to_tile_load_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) + s.send_to_tile_load_response_data_queue = ChannelRTL(CGRADataType, latency = 1) + s.send_to_tile_store_request_addr_queue = ChannelRTL(CGRAAddrType, latency = 1) + s.send_to_tile_store_request_data_queue = ChannelRTL(CGRADataType, latency = 1) # s.recv_from_other_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) - # s.send_to_local_cgra_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) + # s.send_to_tile_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) # s.send_to_other_cmd_queue = ChannelRTL(CmdType, latency = 1, num_entries = 2) # Crossbar with 3 inports (load and store requests towards remote @@ -70,7 +70,7 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, # termination). s.crossbar = XbarBypassQueueRTL(NocPktType, 3, 1) - s.recv_pkt_queue = NormalQueueRTL(CpuPktType) + s.recv_ctrl_pkt_queue = NormalQueueRTL(CtrlPktType) # # TODO: below ifcs should be connected through another NoC within # # one CGRA, instead of per-tile and performing like a bus. @@ -114,15 +114,15 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, # Connections # Requests towards others, 1 cycle delay to improve timing. - s.recv_from_local_cgra_load_request_pkt_queue.recv //= s.recv_from_local_cgra_load_request_pkt - s.recv_from_local_cgra_load_response_pkt_queue.recv //= s.recv_from_local_cgra_load_response_pkt - s.recv_from_local_cgra_store_request_pkt_queue.recv //= s.recv_from_local_cgra_store_request_pkt + s.recv_from_tile_load_request_pkt_queue.recv //= s.recv_from_tile_load_request_pkt + s.recv_from_tile_load_response_pkt_queue.recv //= s.recv_from_tile_load_response_pkt + s.recv_from_tile_store_request_pkt_queue.recv //= s.recv_from_tile_store_request_pkt # Requests towards local from others, 1 cycle delay to improve timing. - s.send_to_local_cgra_load_request_addr_queue.send //= s.send_to_local_cgra_load_request_addr - s.send_to_local_cgra_load_response_data_queue.send //= s.send_to_local_cgra_load_response_data - s.send_to_local_cgra_store_request_addr_queue.send //= s.send_to_local_cgra_store_request_addr - s.send_to_local_cgra_store_request_data_queue.send //= s.send_to_local_cgra_store_request_data + s.send_to_tile_load_request_addr_queue.send //= s.send_to_tile_load_request_addr + s.send_to_tile_load_response_data_queue.send //= s.send_to_tile_load_response_data + s.send_to_tile_store_request_addr_queue.send //= s.send_to_tile_store_request_addr + s.send_to_tile_store_request_data_queue.send //= s.send_to_tile_store_request_data # For control signals delivery from CPU to tiles. # TODO: https://github.com/tancheng/VectorCGRA/issues/11 -- The request needs @@ -131,8 +131,8 @@ def construct(s, ControllerIdType, CmdType, CpuPktType, NocPktType, # other CGRAs can be delivered via the NoC across CGRAs. Note that the packet # format can be in a universal fashion to support both data and config. Later # on, the format can be packet-based or flit-based. - s.recv_from_cpu_pkt //= s.recv_pkt_queue.recv - s.recv_pkt_queue.send //= s.send_to_intra_cgra_pkt + s.recv_from_cpu_ctrl_pkt //= s.recv_ctrl_pkt_queue.recv + s.recv_ctrl_pkt_queue.send //= s.send_to_ctrl_ring_ctrl_pkt @update def update_received_msg(): @@ -141,8 +141,8 @@ def update_received_msg(): kStoreRequestInportIdx = 2 # For the load request from local tiles. - s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_local_cgra_load_request_pkt_queue.send.val - s.recv_from_local_cgra_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy + s.crossbar.recv[kLoadRequestInportIdx].val @= s.recv_from_tile_load_request_pkt_queue.send.val + s.recv_from_tile_load_request_pkt_queue.send.rdy @= s.crossbar.recv[kLoadRequestInportIdx].rdy s.crossbar.recv[kLoadRequestInportIdx].msg @= \ NocPktType(controller_id, 0, @@ -153,7 +153,7 @@ def update_received_msg(): 0, 0, CMD_LOAD_REQUEST, - s.recv_from_local_cgra_load_request_pkt_queue.send.msg.addr, + s.recv_from_tile_load_request_pkt_queue.send.msg.addr, 0, 1, 0) @@ -161,8 +161,8 @@ def update_received_msg(): # For the store request from local tiles. - s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_local_cgra_store_request_pkt_queue.send.val - s.recv_from_local_cgra_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy + s.crossbar.recv[kStoreRequestInportIdx].val @= s.recv_from_tile_store_request_pkt_queue.send.val + s.recv_from_tile_store_request_pkt_queue.send.rdy @= s.crossbar.recv[kStoreRequestInportIdx].rdy s.crossbar.recv[kStoreRequestInportIdx].msg @= \ NocPktType(controller_id, 0, @@ -173,16 +173,16 @@ def update_received_msg(): 0, 0, CMD_STORE_REQUEST, - s.recv_from_local_cgra_store_request_pkt_queue.send.msg.addr, - s.recv_from_local_cgra_store_request_pkt_queue.send.msg.data, - s.recv_from_local_cgra_store_request_pkt_queue.send.msg.predicate, + s.recv_from_tile_store_request_pkt_queue.send.msg.addr, + s.recv_from_tile_store_request_pkt_queue.send.msg.data, + s.recv_from_tile_store_request_pkt_queue.send.msg.predicate, 0) # For the load response (i.e., the data towards other) from local memory. s.crossbar.recv[kLoadResponseInportIdx].val @= \ - s.recv_from_local_cgra_load_response_pkt_queue.send.val - s.recv_from_local_cgra_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy + s.recv_from_tile_load_response_pkt_queue.send.val + s.recv_from_tile_load_response_pkt_queue.send.rdy @= s.crossbar.recv[kLoadResponseInportIdx].rdy s.crossbar.recv[kLoadResponseInportIdx].msg @= \ NocPktType(controller_id, 0, @@ -195,9 +195,9 @@ def update_received_msg(): CMD_LOAD_RESPONSE, # Retrieves the load (from NoC) address from the message. # The addr information is embedded in the message. - s.recv_from_local_cgra_load_response_pkt_queue.send.msg.addr, - s.recv_from_local_cgra_load_response_pkt_queue.send.msg.data, - s.recv_from_local_cgra_load_response_pkt_queue.send.msg.predicate, + s.recv_from_tile_load_response_pkt_queue.send.msg.addr, + s.recv_from_tile_load_response_pkt_queue.send.msg.data, + s.recv_from_tile_load_response_pkt_queue.send.msg.predicate, 0) # TODO: For the other cmd types. @@ -207,43 +207,43 @@ def update_received_msg(): # def update_received_msg_from_noc(): # Initiates the signals. - s.send_to_local_cgra_load_request_addr_queue.recv.val @= 0 - s.send_to_local_cgra_store_request_addr_queue.recv.val @= 0 - s.send_to_local_cgra_store_request_data_queue.recv.val @= 0 - s.send_to_local_cgra_load_response_data_queue.recv.val @= 0 - s.send_to_local_cgra_load_request_addr_queue.recv.msg @= CGRAAddrType() - s.send_to_local_cgra_store_request_addr_queue.recv.msg @= CGRAAddrType() - s.send_to_local_cgra_store_request_data_queue.recv.msg @= CGRADataType() - s.send_to_local_cgra_load_response_data_queue.recv.msg @= CGRADataType() + s.send_to_tile_load_request_addr_queue.recv.val @= 0 + s.send_to_tile_store_request_addr_queue.recv.val @= 0 + s.send_to_tile_store_request_data_queue.recv.val @= 0 + s.send_to_tile_load_response_data_queue.recv.val @= 0 + s.send_to_tile_load_request_addr_queue.recv.msg @= CGRAAddrType() + s.send_to_tile_store_request_addr_queue.recv.msg @= CGRAAddrType() + s.send_to_tile_store_request_data_queue.recv.msg @= CGRADataType() + s.send_to_tile_load_response_data_queue.recv.msg @= CGRADataType() s.recv_from_noc.rdy @= 0 # For the load request from NoC. received_pkt = s.recv_from_noc.msg if s.recv_from_noc.val: if s.recv_from_noc.msg.cmd == CMD_LOAD_REQUEST: - if s.send_to_local_cgra_load_request_addr_queue.recv.rdy: + if s.send_to_tile_load_request_addr_queue.recv.rdy: s.recv_from_noc.rdy @= 1 - s.send_to_local_cgra_load_request_addr_queue.recv.msg @= \ + s.send_to_tile_load_request_addr_queue.recv.msg @= \ CGRAAddrType(received_pkt.addr) - s.send_to_local_cgra_load_request_addr_queue.recv.val @= 1 + s.send_to_tile_load_request_addr_queue.recv.val @= 1 elif s.recv_from_noc.msg.cmd == CMD_STORE_REQUEST: - if s.send_to_local_cgra_store_request_addr_queue.recv.rdy & \ - s.send_to_local_cgra_store_request_data_queue.recv.rdy: + if s.send_to_tile_store_request_addr_queue.recv.rdy & \ + s.send_to_tile_store_request_data_queue.recv.rdy: s.recv_from_noc.rdy @= 1 - s.send_to_local_cgra_store_request_addr_queue.recv.msg @= \ + s.send_to_tile_store_request_addr_queue.recv.msg @= \ CGRAAddrType(received_pkt.addr) - s.send_to_local_cgra_store_request_data_queue.recv.msg @= \ + s.send_to_tile_store_request_data_queue.recv.msg @= \ CGRADataType(received_pkt.data, received_pkt.predicate, 0, 0) - s.send_to_local_cgra_store_request_addr_queue.recv.val @= 1 - s.send_to_local_cgra_store_request_data_queue.recv.val @= 1 + s.send_to_tile_store_request_addr_queue.recv.val @= 1 + s.send_to_tile_store_request_data_queue.recv.val @= 1 elif s.recv_from_noc.msg.cmd == CMD_LOAD_RESPONSE: - if s.send_to_local_cgra_load_response_data_queue.recv.rdy: + if s.send_to_tile_load_response_data_queue.recv.rdy: s.recv_from_noc.rdy @= 1 - s.send_to_local_cgra_load_response_data_queue.recv.msg @= \ + s.send_to_tile_load_response_data_queue.recv.msg @= \ CGRADataType(received_pkt.data, received_pkt.predicate, 0, 0) - s.send_to_local_cgra_load_response_data_queue.recv.val @= 1 + s.send_to_tile_load_response_data_queue.recv.val @= 1 # else: # # TODO: Handle other cmd types. @@ -271,15 +271,14 @@ def update_sending_to_noc_msg(): s.crossbar.send[0].msg.payload) def line_trace(s): - send_to_intra_cgra_pkt_str = "send_to_intra_cgra_pkt: " + str(s.send_to_intra_cgra_pkt.msg) - recv_from_local_cgra_load_request_pkt_str = "recv_from_local_cgra_load_request_pkt: " + str(s.recv_from_local_cgra_load_request_pkt.msg) - recv_from_local_cgra_load_response_pkt_str = "recv_from_local_cgra_load_response_pkt: " + str(s.recv_from_local_cgra_load_response_pkt.msg) - recv_from_local_cgra_store_request_pkt_str = "recv_from_local_cgra_store_request_pkt: " + str(s.recv_from_local_cgra_store_request_pkt.msg) + send_to_ctrl_ring_ctrl_pkt_str = "send_to_ctrl_ring_ctrl_pkt: " + str(s.send_to_ctrl_ring_ctrl_pkt.msg) + recv_from_tile_load_request_pkt_str = "recv_from_tile_load_request_pkt: " + str(s.recv_from_tile_load_request_pkt.msg) + recv_from_tile_load_response_pkt_str = "recv_from_tile_load_response_pkt: " + str(s.recv_from_tile_load_response_pkt.msg) + recv_from_tile_store_request_pkt_str = "recv_from_tile_store_request_pkt: " + str(s.recv_from_tile_store_request_pkt.msg) crossbar_str = "crossbar: {" + s.crossbar.line_trace() + "}" - send_to_local_cgra_load_request_addr_str = "send_to_local_cgra_load_request_addr: " + str(s.send_to_local_cgra_load_request_addr.msg) - send_to_local_cgra_store_request_addr_str = "send_to_local_cgra_store_request_addr: " + str(s.send_to_local_cgra_store_request_addr.msg) - send_to_local_cgra_store_request_data_str = "send_to_local_cgra_store_request_data: " + str(s.send_to_local_cgra_store_request_data.msg) + send_to_tile_load_request_addr_str = "send_to_tile_load_request_addr: " + str(s.send_to_tile_load_request_addr.msg) + send_to_tile_store_request_addr_str = "send_to_tile_store_request_addr: " + str(s.send_to_tile_store_request_addr.msg) + send_to_tile_store_request_data_str = "send_to_tile_store_request_data: " + str(s.send_to_tile_store_request_data.msg) recv_from_noc_str = "recv_from_noc_pkt: " + str(s.recv_from_noc.msg) send_to_noc_str = "send_to_noc_pkt: " + str(s.send_to_noc.msg) + "; rdy: " + str(s.send_to_noc.rdy) + "; val: " + str(s.send_to_noc.val) - return f'{send_to_intra_cgra_pkt_str} || {recv_from_local_cgra_load_request_pkt_str} || {recv_from_local_cgra_load_response_pkt_str} || {recv_from_local_cgra_store_request_pkt_str} || {crossbar_str} || {send_to_local_cgra_load_request_addr_str} || {send_to_local_cgra_store_request_addr_str} || {send_to_local_cgra_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' - + return f'{send_to_ctrl_ring_ctrl_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_tile_load_request_addr_str} || {send_to_tile_store_request_addr_str} || {send_to_tile_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' diff --git a/controller/test/ControllerRTL_test.py b/controller/test/ControllerRTL_test.py index 5147ab2..78be90b 100644 --- a/controller/test/ControllerRTL_test.py +++ b/controller/test/ControllerRTL_test.py @@ -26,7 +26,7 @@ class TestHarness(Component): - def construct(s, ControllerIdType, CpuPktType, CmdType, MsgType, + def construct(s, ControllerIdType, CtrlPktType, CmdType, MsgType, AddrType, PktType, controller_id, from_tile_load_request_pkt_msgs, from_tile_load_response_pkt_msgs, @@ -54,7 +54,7 @@ def construct(s, ControllerIdType, CpuPktType, CmdType, MsgType, s.src_from_noc_val_rdy = TestSrcRTL(PktType, from_noc_pkts) s.sink_to_noc_val_rdy = TestNetSinkRTL(PktType, expected_to_noc_pkts, cmp_fn = cmp_func) - s.dut = ControllerRTL(ControllerIdType, CmdType, CpuPktType, + s.dut = ControllerRTL(ControllerIdType, CmdType, CtrlPktType, PktType, MsgType, AddrType, # Number of controllers globally (x/y dimension). 1, num_terminals, @@ -63,21 +63,21 @@ def construct(s, ControllerIdType, CpuPktType, CmdType, MsgType, idTo2d_map) # Connections - s.src_from_tile_load_request_pkt_en_rdy.send //= s.dut.recv_from_local_cgra_load_request_pkt - s.src_from_tile_load_response_pkt_en_rdy.send //= s.dut.recv_from_local_cgra_load_response_pkt - s.src_from_tile_store_request_pkt_en_rdy.send //= s.dut.recv_from_local_cgra_store_request_pkt + s.src_from_tile_load_request_pkt_en_rdy.send //= s.dut.recv_from_tile_load_request_pkt + s.src_from_tile_load_response_pkt_en_rdy.send //= s.dut.recv_from_tile_load_response_pkt + s.src_from_tile_store_request_pkt_en_rdy.send //= s.dut.recv_from_tile_store_request_pkt - s.dut.send_to_local_cgra_load_request_addr //= s.sink_to_tile_load_request_addr_en_rdy.recv - s.dut.send_to_local_cgra_load_response_data //= s.sink_to_tile_load_response_data_en_rdy.recv - s.dut.send_to_local_cgra_store_request_addr //= s.sink_to_tile_store_request_addr_en_rdy.recv - s.dut.send_to_local_cgra_store_request_data //= s.sink_to_tile_store_request_data_en_rdy.recv + s.dut.send_to_tile_load_request_addr //= s.sink_to_tile_load_request_addr_en_rdy.recv + s.dut.send_to_tile_load_response_data //= s.sink_to_tile_load_response_data_en_rdy.recv + s.dut.send_to_tile_store_request_addr //= s.sink_to_tile_store_request_addr_en_rdy.recv + s.dut.send_to_tile_store_request_data //= s.sink_to_tile_store_request_data_en_rdy.recv s.src_from_noc_val_rdy.send //= s.dut.recv_from_noc s.dut.send_to_noc //= s.sink_to_noc_val_rdy.recv - s.dut.recv_from_cpu_pkt.val //= 0 - s.dut.recv_from_cpu_pkt.msg //= CpuPktType() - s.dut.send_to_intra_cgra_pkt.rdy //= 0 + s.dut.recv_from_cpu_ctrl_pkt.val //= 0 + s.dut.recv_from_cpu_ctrl_pkt.msg //= CtrlPktType() + s.dut.send_to_ctrl_ring_ctrl_pkt.rdy //= 0 def done(s): return s.src_from_tile_load_request_pkt_en_rdy.done() and \ @@ -141,10 +141,6 @@ def mk_src_pkts(nterminals, lst): DataType = mk_data(data_nbits, predicate_nbits) nterminals = 4 -cmd_nbits = 4 -CmdType = mk_bits(cmd_nbits) -cgraId_nbits = 4 -ControllerIdType = mk_bits(cgraId_nbits) CmdType = mk_bits(4) ControllerIdType = mk_bits(clog2(nterminals)) num_ctrl_actions = 8 @@ -174,28 +170,14 @@ def mk_src_pkts(nterminals, lst): 3: [12, 15], } -# CtrlPktType = mk_ring_across_tiles_pkt(nterminals, -# num_ctrl_actions, -# ctrl_mem_size, -# num_ctrl_operations, -# num_fu_inports, -# num_fu_outports, -# num_tile_inports, -# num_tile_outports) - -CpuPktType = mk_intra_cgra_pkt(nterminals, - cmd_nbits, - cgraId_nbits, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports, - addr_nbits, - data_nbits, - predicate_nbits) +CtrlPktType = mk_ring_across_tiles_pkt(nterminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) Pkt = mk_multi_cgra_noc_pkt(nterminals, 1, addr_nbits = addr_nbits, @@ -254,7 +236,7 @@ def mk_src_pkts(nterminals, lst): def test_simple(): print("controller2addr_map: ", controller2addr_map) - th = TestHarness(ControllerIdType, CpuPktType, + th = TestHarness(ControllerIdType, CtrlPktType, CmdType, DataType, AddrType, Pkt, controller_id, from_tile_load_request_pkts, @@ -272,4 +254,3 @@ def test_simple(): controller2addr_map, idTo2d_map, nterminals) run_sim(th) - diff --git a/lib/cmd_type.py b/lib/cmd_type.py index 4e2338c..41400db 100644 --- a/lib/cmd_type.py +++ b/lib/cmd_type.py @@ -20,7 +20,6 @@ CMD_LOAD_RESPONSE = 5 CMD_STORE_REQUEST = 6 CMD_CONST = 7 -CMD_CONST_CLEAR = 8 CMD_SYMBOL_DICT = { @@ -31,7 +30,6 @@ CMD_LOAD_REQUEST: "(LOAD_REQUEST)", CMD_LOAD_RESPONSE: "(LOAD_RESPONSE)", CMD_STORE_REQUEST: "(STORE_REQUEST)", - CMD_CONST: "(CONST_DATA)", - CMD_CONST_CLEAR: "(CLEAR_CONST_MEM)" + CMD_CONST: "(CONST_DATA)" } From 296f5f0f3a40adb151c221a4b626508bf3700b1f Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 08:02:59 +0800 Subject: [PATCH 20/33] replace mk_ring_across_tiles_pkt with mk_intra_cgra_pkt --- cgra/test/CgraRTL_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cgra/test/CgraRTL_test.py b/cgra/test/CgraRTL_test.py index 1499f2c..5f7aad7 100644 --- a/cgra/test/CgraRTL_test.py +++ b/cgra/test/CgraRTL_test.py @@ -144,14 +144,17 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL], data_bitwidth = 32): } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, + mk_intra_cgra_pkt(width * height, num_ctrl_actions, ctrl_mem_size, num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_bitwidth + ) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, From 674bb2853ef5f159645c801c6d844eaaffbe6ad1 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 10:48:17 +0800 Subject: [PATCH 21/33] replace mk_ring_across_tiles_pkt with mk_intra_cgra_pkt --- cgra/test/CgraRTL_test.py | 5 +--- cgra/test/CgraTemplateRTL_test.py | 16 ++++++------- scale_out/test/MeshMultiCgraRTL_test.py | 2 +- scale_out/test/RingMultiCgraRTL_test.py | 2 +- systolic/CgraSystolicArrayRTL.py | 7 +++--- ...3x3MemRightAndBottomRTL_matmul_2x2_test.py | 24 ++++++++++--------- 6 files changed, 27 insertions(+), 29 deletions(-) diff --git a/cgra/test/CgraRTL_test.py b/cgra/test/CgraRTL_test.py index 5f7aad7..bc68ca7 100644 --- a/cgra/test/CgraRTL_test.py +++ b/cgra/test/CgraRTL_test.py @@ -151,10 +151,7 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL], data_bitwidth = 32): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports, - num_registers_per_reg_bank, - data_bitwidth - ) + num_tile_outports) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, diff --git a/cgra/test/CgraTemplateRTL_test.py b/cgra/test/CgraTemplateRTL_test.py index a73ae7d..2534808 100644 --- a/cgra/test/CgraTemplateRTL_test.py +++ b/cgra/test/CgraTemplateRTL_test.py @@ -220,14 +220,14 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + mk_intra_cgra_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, diff --git a/scale_out/test/MeshMultiCgraRTL_test.py b/scale_out/test/MeshMultiCgraRTL_test.py index 750adcd..97e9278 100644 --- a/scale_out/test/MeshMultiCgraRTL_test.py +++ b/scale_out/test/MeshMultiCgraRTL_test.py @@ -96,7 +96,7 @@ def test_homo_2x2(cmdline_opts): 3: [24, 31], } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, + mk_intra_cgra_pkt(width * height, num_ctrl_actions, ctrl_mem_size, num_ctrl_operations, diff --git a/scale_out/test/RingMultiCgraRTL_test.py b/scale_out/test/RingMultiCgraRTL_test.py index b50a290..ef87a7c 100644 --- a/scale_out/test/RingMultiCgraRTL_test.py +++ b/scale_out/test/RingMultiCgraRTL_test.py @@ -96,7 +96,7 @@ def test_homo_2x2(cmdline_opts): 3: [24, 31], } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, + mk_intra_cgra_pkt(width * height, num_ctrl_actions, ctrl_mem_size, num_ctrl_operations, diff --git a/systolic/CgraSystolicArrayRTL.py b/systolic/CgraSystolicArrayRTL.py index 3ee0ece..8117181 100644 --- a/systolic/CgraSystolicArrayRTL.py +++ b/systolic/CgraSystolicArrayRTL.py @@ -46,7 +46,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, data_mem_size_global) # Interfaces - s.recv_from_cpu_pkt = RecvIfcRTL(CtrlPktType) + s.recv_from_cpu_ctrl_pkt = RecvIfcRTL(CtrlPktType) s.recv_from_noc = RecvIfcRTL(NocPktType) s.send_to_noc = SendIfcRTL(NocPktType) @@ -101,13 +101,13 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.send_to_noc //= s.controller.send_to_noc # Connects the ctrl interface between CPU and controller. - s.recv_from_cpu_pkt //= s.controller.recv_from_cpu_pkt + s.recv_from_cpu_ctrl_pkt //= s.controller.recv_from_cpu_ctrl_pkt # Connects ring with each control memory. for i in range(s.num_tiles): s.ctrl_ring.send[i] //= s.tile[i].recv_ctrl_pkt - s.ctrl_ring.recv[0] //= s.controller.send_to_intra_cgra_pkt + s.ctrl_ring.recv[0] //= s.controller.send_to_ctrl_ring_ctrl_pkt for i in range(1, s.num_tiles): s.ctrl_ring.recv[i].val //= 0 s.ctrl_ring.recv[i].msg //= CtrlPktType() @@ -178,4 +178,3 @@ def line_trace(s): for (i,x) in enumerate(s.tile)]) res += "\nData Memory: [" + s.data_mem.line_trace() + "] \n" return res - diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index 34ec953..62edf9e 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -60,7 +60,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, controller2addr_map, preload_data, preload_const) # Connections. - s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_pkt + s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt s.dut.send_to_noc.rdy //= 0 s.dut.recv_from_noc.val //= 0 @@ -176,15 +176,18 @@ def test_CGRA_systolic(cmdline_opts): 1: [16, 31], } + data_nbits = 64 CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + mk_intra_cgra_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -366,7 +369,7 @@ def test_CGRA_systolic(cmdline_opts): # The third column (except the bottom one) is used to store the # accumulated results. [DataType(12, 1), DataType(13, 1), DataType(0, 0)]] - + """ 1 3 2 6 14 20 x = @@ -397,4 +400,3 @@ def test_CGRA_systolic(cmdline_opts): cmdline_opts['dump_vcd'] or \ cmdline_opts['dump_vtb']) run_sim(th, enable_verification_pymtl) - From 04c76dc3d28b54cd740fb00e7f128f82a40780c4 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 10:54:00 +0800 Subject: [PATCH 22/33] replace mk_ring_across_tiles_pkt with mk_intra_cgra_pkt --- systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index 62edf9e..6c5505e 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -176,7 +176,6 @@ def test_CGRA_systolic(cmdline_opts): 1: [16, 31], } - data_nbits = 64 CtrlPktType = \ mk_intra_cgra_pkt(width * height, num_ctrl_actions, @@ -185,9 +184,7 @@ def test_CGRA_systolic(cmdline_opts): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports, - num_registers_per_reg_bank, - data_nbits) + num_tile_outports) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, From 757c09c1360e255b4f630ee7fb8638c60bf64d17 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 11:19:26 +0800 Subject: [PATCH 23/33] make data_nbits consistent --- .../test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index 6c5505e..3923943 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -155,6 +155,7 @@ def test_CGRA_systolic(cmdline_opts): num_ctrl_actions = 6 num_ctrl_operations = 64 num_registers_per_reg_bank = 16 + data_nbits = 32 TileInType = mk_bits(clog2(num_tile_inports + 1)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) @@ -184,7 +185,9 @@ def test_CGRA_systolic(cmdline_opts): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -196,7 +199,7 @@ def test_CGRA_systolic(cmdline_opts): NocPktType = mk_multi_cgra_noc_pkt(ncols = 1, nrows = 1, addr_nbits = addr_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pick_register = [FuInType(x + 1) for x in range(num_fu_inports)] From aad30ed524c3442d5fba578ba42eb2ffc15727c5 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 11:37:19 +0800 Subject: [PATCH 24/33] make data_nbits, num_registers_per_reg_bank consistent --- cgra/test/CgraRTL_test.py | 4 +++- cgra/test/CgraTemplateRTL_test.py | 9 ++++++--- scale_out/test/MeshMultiCgraRTL_test.py | 10 +++++++--- scale_out/test/RingMultiCgraRTL_test.py | 10 +++++++--- 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cgra/test/CgraRTL_test.py b/cgra/test/CgraRTL_test.py index bc68ca7..c789439 100644 --- a/cgra/test/CgraRTL_test.py +++ b/cgra/test/CgraRTL_test.py @@ -151,7 +151,9 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL], data_bitwidth = 32): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_bitwidth) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, diff --git a/cgra/test/CgraTemplateRTL_test.py b/cgra/test/CgraTemplateRTL_test.py index 2534808..3235449 100644 --- a/cgra/test/CgraTemplateRTL_test.py +++ b/cgra/test/CgraTemplateRTL_test.py @@ -199,7 +199,8 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): FunctionUnit = FlexibleFuRTL # FuList = [MemUnitRTL, AdderRTL] FuList = [PhiRTL, AdderRTL, ShifterRTL, MemUnitRTL, SelRTL, CompRTL, SeqMulAdderRTL, RetRTL, MulRTL, LogicRTL, BranchRTL] - DataType = mk_data(32, 1) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) CmdType = mk_bits(4) @@ -227,7 +228,9 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -239,7 +242,7 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): NocPktType = mk_multi_cgra_noc_pkt(ncols = num_terminals, nrows = 1, addr_nbits = addr_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pick_register = [FuInType(x + 1) for x in range(num_fu_inports)] tile_in_code = [TileInType(max(4 - x, 0)) for x in range(num_routing_outports)] diff --git a/scale_out/test/MeshMultiCgraRTL_test.py b/scale_out/test/MeshMultiCgraRTL_test.py index 97e9278..b3e21bf 100644 --- a/scale_out/test/MeshMultiCgraRTL_test.py +++ b/scale_out/test/MeshMultiCgraRTL_test.py @@ -84,7 +84,8 @@ def test_homo_2x2(cmdline_opts): DUT = MeshMultiCgraRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) cmd_nbits = 5 num_registers_per_reg_bank = 16 @@ -103,7 +104,10 @@ def test_homo_2x2(cmdline_opts): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_nbits + ) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -115,7 +119,7 @@ def test_homo_2x2(cmdline_opts): nrows = cgra_rows, cmd_nbits = cmd_nbits, addr_nbits = data_addr_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] src_opt_per_tile = [[ diff --git a/scale_out/test/RingMultiCgraRTL_test.py b/scale_out/test/RingMultiCgraRTL_test.py index ef87a7c..d2f5b45 100644 --- a/scale_out/test/RingMultiCgraRTL_test.py +++ b/scale_out/test/RingMultiCgraRTL_test.py @@ -84,7 +84,8 @@ def test_homo_2x2(cmdline_opts): DUT = RingMultiCgraRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) cmd_nbits = 5 num_registers_per_reg_bank = 16 @@ -103,7 +104,10 @@ def test_homo_2x2(cmdline_opts): num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_nbits + ) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -115,7 +119,7 @@ def test_homo_2x2(cmdline_opts): nrows = 1, addr_nbits = data_addr_nbits, cmd_nbits = cmd_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] src_opt_per_tile = [[ From 2bedd94fa15de025ee6a8efe6b73e203460a1479 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 11:50:15 +0800 Subject: [PATCH 25/33] use null op in launch cmd --- tile/test/TileRTL_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 445ef11..78b8d5d 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -207,7 +207,7 @@ def test_tile_alu(cmdline_opts): [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], 7), - CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_NAH, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], From 753be2d13c4948176856ebd15f45c7556b8f6d9a Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 14:50:26 +0800 Subject: [PATCH 26/33] Replace const_list with data from cpu pkt. --- cgra/CgraRTL.py | 8 +-- cgra/CgraTemplateRTL.py | 7 +- scale_out/MeshMultiCgraRTL.py | 5 +- scale_out/RingMultiCgraRTL.py | 5 +- systolic/CgraSystolicArrayRTL.py | 6 +- ...3x3MemRightAndBottomRTL_matmul_2x2_test.py | 69 ++++++++++++++++--- tile/TileRTL.py | 3 +- tile/test/TileRTL_test.py | 20 ++---- 8 files changed, 75 insertions(+), 48 deletions(-) diff --git a/cgra/CgraRTL.py b/cgra/CgraRTL.py index eae44ba..9e5f495 100644 --- a/cgra/CgraRTL.py +++ b/cgra/CgraRTL.py @@ -30,8 +30,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, cgra_topology, - controller2addr_map, idTo2d_map, preload_data = None, - preload_const = None): + controller2addr_map, idTo2d_map, preload_data = None): # Other topology can simply modify the tiles connections, or # leverage the template for modeling. @@ -66,15 +65,12 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(s.num_tiles)] s.tile = [TileRTL(DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size_global, num_ctrl, total_steps, 4, 2, s.num_mesh_ports, s.num_mesh_ports, num_registers_per_reg_bank, - FuList = FuList, - const_list = preload_const[i]) + FuList = FuList) for i in range(s.num_tiles)] s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, data_mem_size_global, diff --git a/cgra/CgraTemplateRTL.py b/cgra/CgraTemplateRTL.py index 218a32b..576dfda 100644 --- a/cgra/CgraTemplateRTL.py +++ b/cgra/CgraTemplateRTL.py @@ -31,7 +31,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, TileList, LinkList, dataSPM, controller2addr_map, idTo2d_map, - preload_data = None, preload_const = None): + preload_data = None): s.num_mesh_ports = 8 s.num_tiles = len(TileList) @@ -60,16 +60,13 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, # s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(s.num_tiles)] s.tile = [TileRTL(DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size_global, num_ctrl, total_steps, 4, 2, s.num_mesh_ports, s.num_mesh_ports, num_registers_per_reg_bank, - FuList = FuList, - const_list = preload_const[i]) + FuList = FuList) for i in range(s.num_tiles)] # FIXME: Need to enrish data-SPM-related user-controlled parameters, e.g., number of banks. s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, diff --git a/scale_out/MeshMultiCgraRTL.py b/scale_out/MeshMultiCgraRTL.py index 3b48da3..868da0e 100644 --- a/scale_out/MeshMultiCgraRTL.py +++ b/scale_out/MeshMultiCgraRTL.py @@ -24,8 +24,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, - controller2addr_map, preload_data = None, - preload_const = None): + controller2addr_map, preload_data = None): # Constant s.num_terminals = cgra_rows * cgra_columns @@ -55,7 +54,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, "Mesh", controller2addr_map, idTo2d_map, - preload_data = None, preload_const = None) + preload_data = None) for terminal_id in range(s.num_terminals)] # Latency is 1. s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) diff --git a/scale_out/RingMultiCgraRTL.py b/scale_out/RingMultiCgraRTL.py index 000a69b..4b6fb07 100644 --- a/scale_out/RingMultiCgraRTL.py +++ b/scale_out/RingMultiCgraRTL.py @@ -24,8 +24,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, - controller2addr_map, preload_data = None, - preload_const = None): + controller2addr_map, preload_data = None): # Constant idTo2d_map = {} @@ -55,7 +54,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, "Mesh", controller2addr_map, idTo2d_map, - preload_data = None, preload_const = None) + preload_data = None) for terminal_id in range(s.num_terminals)] s.ring = RingNetworkRTL(NocPktType, RingPos, s.num_terminals, 1) diff --git a/systolic/CgraSystolicArrayRTL.py b/systolic/CgraSystolicArrayRTL.py index 8117181..e7a0177 100644 --- a/systolic/CgraSystolicArrayRTL.py +++ b/systolic/CgraSystolicArrayRTL.py @@ -29,7 +29,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None): + preload_data = None): # Other topology can simply modify the tiles connections, or # leverage the template for modeling. @@ -62,14 +62,12 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(s.num_tiles)] s.tile = [TileRTL(DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size_global, num_ctrl, total_steps, 4, 2, s.num_mesh_ports, s.num_mesh_ports, num_registers_per_reg_bank, - FuList = FuList, const_list = preload_const[i], + FuList = FuList, id = i) for i in range(s.num_tiles)] s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index 3923943..7630137 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -44,7 +44,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, src_ctrl_pkt, ctrl_steps, controller2addr_map, - preload_data, preload_const, expected_out): + preload_data, expected_out): s.DataType = DataType s.num_tiles = width * height @@ -57,7 +57,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, 1, kMaxCycles, FunctionUnit, FuList, - controller2addr_map, preload_data, preload_const) + controller2addr_map, preload_data) # Connections. s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt @@ -89,8 +89,10 @@ def check_parity(s): for i in range(len(s.expected_out)): for j in range(len(s.expected_out[i])): # Outputs are stored in bank 2 and bank 3. + print(f"out 1: {s.dut.data_mem.reg_file[2+i].regs[j]} <-> out 2: {s.expected_out[i][j]}") if s.dut.data_mem.reg_file[2+i].regs[j] != s.expected_out[i][j]: - return False + print(f"xxxxxxxxxxxxxxxxxxx out 1: {s.dut.data_mem.reg_file[2 + i].regs[j]} <-> out 2: {s.expected_out[i][j]}") + return False return True def done(s): @@ -205,8 +207,13 @@ def test_CGRA_systolic(cmdline_opts): src_opt_per_tile = [ # On tile 0 ([0, 0]). - # src dst vc_id opq cmd_type addr operation predicate [ + # Const + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 1), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 0), + + # src dst vc_id opq cmd_type addr operation predicate CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_LD_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -219,8 +226,13 @@ def test_CGRA_systolic(cmdline_opts): FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0)])], # On tile 1 ([0, 1]). - # src dst vc_id opq cmd_type addr operation predicate [ + # Const + CtrlPktType(0, 1, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 1, 0, 0, ctrl_action = CMD_CONST, data = 5), + CtrlPktType(0, 1, 0, 0, ctrl_action = CMD_CONST, data = 0), + + # src dst vc_id opq cmd_type addr operation predicate CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 0, OPT_LD_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -233,7 +245,13 @@ def test_CGRA_systolic(cmdline_opts): FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0)])], # On tile 2 ([0, 2]). - [CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register, + [ + # Const + CtrlPktType(0, 2, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 2, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 2, 0, 0, ctrl_action = CMD_CONST, data = 0), + + CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], [FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0), @@ -245,7 +263,13 @@ def test_CGRA_systolic(cmdline_opts): FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0)])], # On tile 3 ([1, 0]). - [CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST, b1(0), pick_register, + [ + # Const + CtrlPktType(0, 3, 0, 0, ctrl_action = CMD_CONST, data = 2), + CtrlPktType(0, 3, 0, 0, ctrl_action = CMD_CONST, data = 2), + CtrlPktType(0, 3, 0, 0, ctrl_action = CMD_CONST, data = 2), + + CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST, b1(0), pick_register, [TileInType(2), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(0), TileInType(0)], [FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (1), @@ -258,6 +282,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 4 ([1, 1]). [ + # Const + CtrlPktType(0, 4, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 4, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 4, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 4, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST_ADD, b1(0), pick_register, [TileInType(2), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(3), TileInType(0)], @@ -271,6 +300,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 5 ([1, 2]). [ + # Const + CtrlPktType(0, 5, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 5, 0, 0, ctrl_action = CMD_CONST, data = 9), + CtrlPktType(0, 5, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 5, 0, 0, CMD_CONFIG, 0, OPT_STR_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(3), TileInType(0), TileInType(0), TileInType(0)], @@ -284,6 +318,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 6 ([2, 0]). [ + # Const + CtrlPktType(0, 6, 0, 0, ctrl_action = CMD_CONST, data = 6), + CtrlPktType(0, 6, 0, 0, ctrl_action = CMD_CONST, data = 6), + CtrlPktType(0, 6, 0, 0, ctrl_action = CMD_CONST, data = 6), + CtrlPktType(0, 6, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(0), TileInType(0)], @@ -297,6 +336,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 7 ([2, 1]). [ + # Const + CtrlPktType(0, 7, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 7, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 7, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 7, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST_ADD, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(3), TileInType(0)], @@ -310,6 +354,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 8 ([2, 2]). [ + # Const + CtrlPktType(0, 8, 0, 0, ctrl_action = CMD_CONST, data = 12), + CtrlPktType(0, 8, 0, 0, ctrl_action = CMD_CONST, data = 13), + CtrlPktType(0, 8, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 8, 0, 0, CMD_CONFIG, 0, OPT_STR_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(3), TileInType(0), TileInType(0), TileInType(0)], @@ -339,7 +388,7 @@ def test_CGRA_systolic(cmdline_opts): for i in range(data_mem_size_per_bank)] for j in range(num_banks_per_cgra)] - preload_const = [ + '''preload_const = [ # The offset address used for loading input activation. # We use a shared data memory here, indicating global address # space. Users can make each tile has its own address space. @@ -368,7 +417,7 @@ def test_CGRA_systolic(cmdline_opts): [DataType(8, 1), DataType(8, 1), DataType(8, 1)], # The third column (except the bottom one) is used to store the # accumulated results. - [DataType(12, 1), DataType(13, 1), DataType(0, 0)]] + [DataType(12, 1), DataType(13, 1), DataType(0, 0)]]''' """ 1 3 2 6 14 20 @@ -389,7 +438,7 @@ def test_CGRA_systolic(cmdline_opts): num_registers_per_reg_bank, src_ctrl_pkt, ctrl_mem_size, controller2addr_map, preload_data_per_bank, - preload_const, expected_out) + expected_out) th.elaborate() th.dut.set_metadata(VerilogTranslationPass.explicit_module_name, diff --git a/tile/TileRTL.py b/tile/TileRTL.py index 5b6c57c..6d0023c 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -41,7 +41,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, num_tile_outports, num_registers_per_reg_bank = 16, Fu = FlexibleFuRTL, FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, MemUnitRTL], - const_list = None, id = 0): + id = 0): # Constants. num_routing_xbar_inports = num_tile_inports @@ -188,6 +188,7 @@ def feed_pkt(): elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): s.const_mem.recv_const.val @= 1 s.const_mem.recv_const.msg.payload @= s.recv_ctrl_pkt.msg.data + s.const_mem.recv_const.msg.predicate @= 1 s.recv_ctrl_pkt.rdy @= s.const_mem.recv_const.rdy # Updates the configuration memory related signals. diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 78b8d5d..cefbf2c 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -190,23 +190,11 @@ def test_tile_alu(cmdline_opts): [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], 0), + # for const: 5, 7 - CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - 5), - CtrlPktType(0, 0, 0, 0, CMD_CONST, 0, OPT_NAH, b1(0), pick_register1, - # routing_xbar_output - [TileInType(0), TileInType(0), TileInType(0), TileInType(0), - TileInType(0), TileInType(0), TileInType(0), TileInType(0)], - # fu_xbar_output - [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - 7), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 5), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 7), + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_NAH, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), From 9f6354880dd2df4b8bd28d19fd31fbc8271e4f09 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 17:25:43 +0800 Subject: [PATCH 27/33] Set all fields. --- tile/TileRTL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tile/TileRTL.py b/tile/TileRTL.py index 6d0023c..015281b 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -175,8 +175,8 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, @update def feed_pkt(): - s.ctrl_mem.recv_pkt.msg @= CtrlPktType(0) - s.const_mem.recv_const.msg @= DataType(0) + s.ctrl_mem.recv_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) s.ctrl_mem.recv_pkt.val @= 0 s.const_mem.recv_const.val @= 0 s.recv_ctrl_pkt.rdy @= 0 From cc904cbcaec11c5bbc19af090f3555d45779ddfb Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 21:40:00 +0800 Subject: [PATCH 28/33] Ignore data for non-data CtrlPktType. --- tile/test/TileRTL_test.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index cefbf2c..439512c 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -180,16 +180,14 @@ def test_tile_alu(cmdline_opts): TileInType(4), TileInType(3), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - 0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 1, OPT_SUB, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(4), TileInType(1), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - 0), + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), # for const: 5, 7 CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 5), @@ -201,8 +199,7 @@ def test_tile_alu(cmdline_opts): TileInType(0), TileInType(0), TileInType(0), TileInType(0)], # fu_xbar_output [FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0), - FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)], - 0)] + FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)])] src_data = [[DataType(3, 1)], [], [DataType(4, 1)], From 7a75f4338b9a3d8986eaaaadce3996288142d3c3 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 21:43:08 +0800 Subject: [PATCH 29/33] For checking if error same as local. --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d20680a..416e8b4 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -62,7 +62,7 @@ jobs: # Simulation across all tests. pytest .. -v --tb=short # Tile translation. - pytest ../tile/test/TileRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd + pytest ../tile/test/TileRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd --tb=short # CGRA template translation. pytest ../cgra/test/CgraTemplateRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd # TODO: Need to check vector/heterogneous modules exist in the generated Verilog. From 4dc42641b047fd7dd77488d634eb4395b97487ed Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 22:25:15 +0800 Subject: [PATCH 30/33] Update namespace for mk_intra_cgra_pkt. --- lib/messages.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/messages.py b/lib/messages.py index 2015475..02ca455 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -772,6 +772,8 @@ def str_func(s): out_str += '-' out_str += str(int(s.ctrl_fu_xbar_outport[i])) + out_str = '(data)' + str(s.data) + out_str += '|(ctrl_predicate_in)' for i in range(ctrl_tile_inports): if i != 0: From b5eadd43b809b98ae22c1e3d8a06e565cdb056bb Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 22:58:56 +0800 Subject: [PATCH 31/33] Fix unhashable type: 'list' error. --- tile/test/TileRTL_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 439512c..cbd4188 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -224,6 +224,5 @@ def test_tile_alu(cmdline_opts): ['UNSIGNED', 'UNOPTFLAT', 'WIDTH', 'WIDTHCONCAT', 'ALWCOMBORDER']) th = config_model_with_cmdline_opts(th, cmdline_opts, duts = ['dut']) - cmdline_opts['max_cycles'] = 20 - run_sim(th, cmdline_opts) + run_sim(th) From b53cb069712d2d1b333481c99075d82a0a8f5ce5 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 23:09:55 +0800 Subject: [PATCH 32/33] Remove trace. --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 416e8b4..d20680a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -62,7 +62,7 @@ jobs: # Simulation across all tests. pytest .. -v --tb=short # Tile translation. - pytest ../tile/test/TileRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd --tb=short + pytest ../tile/test/TileRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd # CGRA template translation. pytest ../cgra/test/CgraTemplateRTL_test.py -xvs --test-verilog --dump-vtb --dump-vcd # TODO: Need to check vector/heterogneous modules exist in the generated Verilog. From 362bf36892de1e45a9b7cdc4a600f8cdf8cc8fc9 Mon Sep 17 00:00:00 2001 From: yuqisun Date: Sat, 15 Feb 2025 23:18:38 +0800 Subject: [PATCH 33/33] Remove trace. --- systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index 7630137..2217294 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -89,9 +89,7 @@ def check_parity(s): for i in range(len(s.expected_out)): for j in range(len(s.expected_out[i])): # Outputs are stored in bank 2 and bank 3. - print(f"out 1: {s.dut.data_mem.reg_file[2+i].regs[j]} <-> out 2: {s.expected_out[i][j]}") if s.dut.data_mem.reg_file[2+i].regs[j] != s.expected_out[i][j]: - print(f"xxxxxxxxxxxxxxxxxxx out 1: {s.dut.data_mem.reg_file[2 + i].regs[j]} <-> out 2: {s.expected_out[i][j]}") return False return True