diff --git a/cgra/CgraRTL.py b/cgra/CgraRTL.py index eae44ba..9e5f495 100644 --- a/cgra/CgraRTL.py +++ b/cgra/CgraRTL.py @@ -30,8 +30,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, cgra_topology, - controller2addr_map, idTo2d_map, preload_data = None, - preload_const = None): + controller2addr_map, idTo2d_map, preload_data = None): # Other topology can simply modify the tiles connections, or # leverage the template for modeling. @@ -66,15 +65,12 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(s.num_tiles)] s.tile = [TileRTL(DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size_global, num_ctrl, total_steps, 4, 2, s.num_mesh_ports, s.num_mesh_ports, num_registers_per_reg_bank, - FuList = FuList, - const_list = preload_const[i]) + FuList = FuList) for i in range(s.num_tiles)] s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, data_mem_size_global, diff --git a/cgra/CgraTemplateRTL.py b/cgra/CgraTemplateRTL.py index 218a32b..576dfda 100644 --- a/cgra/CgraTemplateRTL.py +++ b/cgra/CgraTemplateRTL.py @@ -31,7 +31,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, TileList, LinkList, dataSPM, controller2addr_map, idTo2d_map, - preload_data = None, preload_const = None): + preload_data = None): s.num_mesh_ports = 8 s.num_tiles = len(TileList) @@ -60,16 +60,13 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, # s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(s.num_tiles)] s.tile = [TileRTL(DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size_global, num_ctrl, total_steps, 4, 2, s.num_mesh_ports, s.num_mesh_ports, num_registers_per_reg_bank, - FuList = FuList, - const_list = preload_const[i]) + FuList = FuList) for i in range(s.num_tiles)] # FIXME: Need to enrish data-SPM-related user-controlled parameters, e.g., number of banks. s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, diff --git a/cgra/test/CgraRTL_test.py b/cgra/test/CgraRTL_test.py index 1499f2c..c789439 100644 --- a/cgra/test/CgraRTL_test.py +++ b/cgra/test/CgraRTL_test.py @@ -144,14 +144,16 @@ def init_param(topology, FuList = [MemUnitRTL, AdderRTL], data_bitwidth = 32): } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, + mk_intra_cgra_pkt(width * height, num_ctrl_actions, ctrl_mem_size, num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_bitwidth) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, diff --git a/cgra/test/CgraTemplateRTL_test.py b/cgra/test/CgraTemplateRTL_test.py index a73ae7d..3235449 100644 --- a/cgra/test/CgraTemplateRTL_test.py +++ b/cgra/test/CgraTemplateRTL_test.py @@ -199,7 +199,8 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): FunctionUnit = FlexibleFuRTL # FuList = [MemUnitRTL, AdderRTL] FuList = [PhiRTL, AdderRTL, ShifterRTL, MemUnitRTL, SelRTL, CompRTL, SeqMulAdderRTL, RetRTL, MulRTL, LogicRTL, BranchRTL] - DataType = mk_data(32, 1) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) CmdType = mk_bits(4) @@ -220,14 +221,16 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + mk_intra_cgra_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -239,7 +242,7 @@ def test_cgra_universal(cmdline_opts, paramCGRA = None): NocPktType = mk_multi_cgra_noc_pkt(ncols = num_terminals, nrows = 1, addr_nbits = addr_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pick_register = [FuInType(x + 1) for x in range(num_fu_inports)] tile_in_code = [TileInType(max(4 - x, 0)) for x in range(num_routing_outports)] diff --git a/controller/ControllerRTL.py b/controller/ControllerRTL.py index f164f1d..bdf953d 100644 --- a/controller/ControllerRTL.py +++ b/controller/ControllerRTL.py @@ -282,4 +282,3 @@ def line_trace(s): recv_from_noc_str = "recv_from_noc_pkt: " + str(s.recv_from_noc.msg) send_to_noc_str = "send_to_noc_pkt: " + str(s.send_to_noc.msg) + "; rdy: " + str(s.send_to_noc.rdy) + "; val: " + str(s.send_to_noc.val) return f'{send_to_ctrl_ring_ctrl_pkt_str} || {recv_from_tile_load_request_pkt_str} || {recv_from_tile_load_response_pkt_str} || {recv_from_tile_store_request_pkt_str} || {crossbar_str} || {send_to_tile_load_request_addr_str} || {send_to_tile_store_request_addr_str} || {send_to_tile_store_request_data_str} || {recv_from_noc_str} || {send_to_noc_str}\n' - diff --git a/controller/test/ControllerRTL_test.py b/controller/test/ControllerRTL_test.py index 2942ab8..78be90b 100644 --- a/controller/test/ControllerRTL_test.py +++ b/controller/test/ControllerRTL_test.py @@ -254,4 +254,3 @@ def test_simple(): controller2addr_map, idTo2d_map, nterminals) run_sim(th) - diff --git a/lib/cmd_type.py b/lib/cmd_type.py index 34200b7..41400db 100644 --- a/lib/cmd_type.py +++ b/lib/cmd_type.py @@ -19,6 +19,8 @@ CMD_LOAD_REQUEST = 4 CMD_LOAD_RESPONSE = 5 CMD_STORE_REQUEST = 6 +CMD_CONST = 7 + CMD_SYMBOL_DICT = { CMD_LAUNCH: "(LAUNCH_KERNEL)", @@ -28,5 +30,6 @@ CMD_LOAD_REQUEST: "(LOAD_REQUEST)", CMD_LOAD_RESPONSE: "(LOAD_RESPONSE)", CMD_STORE_REQUEST: "(STORE_REQUEST)", + CMD_CONST: "(CONST_DATA)" } diff --git a/lib/messages.py b/lib/messages.py index a16cf47..02ca455 100644 --- a/lib/messages.py +++ b/lib/messages.py @@ -705,3 +705,159 @@ def str_func(s): namespace = {'__str__': str_func} ) + +#========================================================================= +# Ring for delivering ctrl and data signals and commands across CGRAs +#========================================================================= + +def mk_intra_cgra_pkt(nrouters = 4, + ctrl_actions = 8, + ctrl_mem_size = 4, + ctrl_operations = 7, + ctrl_fu_inports = 4, + ctrl_fu_outports = 4, + ctrl_tile_inports = 5, + ctrl_tile_outports = 5, + ctrl_registers_per_reg_bank = 16, + data_nbits = 16, + prefix="IntraCgraPacket"): + + IdType = mk_bits(clog2(nrouters)) + opaque_nbits = 1 + OpqType = mk_bits(opaque_nbits) + CtrlActionType = mk_bits(clog2(ctrl_actions)) + CtrlAddrType = mk_bits(clog2(ctrl_mem_size)) + CtrlOperationType = mk_bits(clog2(ctrl_operations)) + CtrlTileInType = mk_bits(clog2(ctrl_tile_inports + 1)) + CtrlTileOutType = mk_bits(clog2(ctrl_tile_outports + 1)) + num_routing_outports = ctrl_tile_outports + ctrl_fu_inports + CtrlRoutingOutType = mk_bits(clog2(num_routing_outports + 1)) + CtrlFuInType = mk_bits(clog2(ctrl_fu_inports + 1)) + CtrlFuOutType = mk_bits(clog2(ctrl_fu_outports + 1)) + CtrlPredicateType = mk_bits(1) + vector_factor_power_nbits = 3 + CtrlVectorFactorPowerType = mk_bits(vector_factor_power_nbits) + + # 3 inports of register file bank. + CtrlRegFromType = mk_bits(2) + CtrlRegIdxType = mk_bits(clog2(ctrl_registers_per_reg_bank)) + VcIdType = mk_bits(1) + DataType = mk_bits(data_nbits) + + new_name = f"{prefix}_{nrouters}_{opaque_nbits}_{ctrl_actions}_" \ + f"{ctrl_mem_size}_{ctrl_operations}_{ctrl_fu_inports}_" \ + f"{ctrl_fu_outports}_{ctrl_tile_inports}_" \ + f"{ctrl_tile_outports}_{ctrl_registers_per_reg_bank}" + + def str_func(s): + out_str = '(ctrl_operation)' + str(s.ctrl_operation) + out_str += '|(ctrl_fu_in)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_in[i])) + + out_str += '|(ctrl_predicate)' + out_str += str(int(s.ctrl_predicate)) + + out_str += '|(ctrl_routing_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_xbar_outport[i])) + + out_str += '|(ctrl_fu_xbar_out)' + for i in range(num_routing_outports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_fu_xbar_outport[i])) + + out_str = '(data)' + str(s.data) + + out_str += '|(ctrl_predicate_in)' + for i in range(ctrl_tile_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_routing_predicate_in[i])) + + out_str += '|(ctrl_vector_factor_power)' + out_str += str(int(s.ctrl_vector_factor_power)) + + out_str += '|(ctrl_is_last_ctrl)' + out_str += str(int(s.ctrl_is_last_ctrl)) + + out_str += '|(ctrl_read_reg_from)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_read_reg_from[i])) + + out_str += '|(write_reg_from)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_write_reg_from[i])) + + out_str += '|(write_reg_idx)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_write_reg_idx[i])) + + out_str += '|(read_reg_idx)' + for i in range(ctrl_fu_inports): + if i != 0: + out_str += '-' + out_str += str(int(s.ctrl_read_reg_idx[i])) + + return f"{s.src}>{s.dst}:{s.opaque}:{s.ctrl_action}.{s.ctrl_addr}." \ + f"{out_str}" + + field_dict = {} + field_dict['src'] = IdType + field_dict['dst'] = IdType + field_dict['opaque'] = OpqType + field_dict['vc_id'] = VcIdType + field_dict['ctrl_action'] = CtrlActionType + field_dict['ctrl_addr'] = CtrlAddrType + field_dict['ctrl_operation'] = CtrlOperationType + # TODO: need fix to pair `predicate` with specific operation. + # The 'predicate' indicates whether the current operation is based on + # the partial predication or not. Note that 'predicate' is different + # from the following 'predicate_in', which contributes to the 'predicate' + # at the next cycle. + field_dict['ctrl_predicate'] = CtrlPredicateType + # The fu_in indicates the input register ID (i.e., operands) for the + # operation. + field_dict['ctrl_fu_in'] = [CtrlFuInType for _ in range(ctrl_fu_inports)] + + field_dict['ctrl_routing_xbar_outport'] = [CtrlTileInType for _ in range( + num_routing_outports)] + field_dict['ctrl_fu_xbar_outport'] = [CtrlFuOutType for _ in range( + num_routing_outports)] + + field_dict['data'] = DataType + + # I assume one tile supports single predicate during the entire execution + # time, as it is hard to distinguish predication for different operations + # (we automatically update, i.e., 'or', the predicate stored in the + # predicate register). This should be guaranteed by the compiler. + field_dict['ctrl_routing_predicate_in'] = [CtrlPredicateType for _ in range( + ctrl_tile_inports)] + + field_dict['ctrl_vector_factor_power'] = CtrlVectorFactorPowerType + + field_dict['ctrl_is_last_ctrl'] = b1 + + # Register file related signals. + # Indicates whether to write data into the register bank, and the + # corresponding inport. + field_dict['ctrl_write_reg_from'] = [CtrlRegFromType for _ in range(ctrl_fu_inports)] + field_dict['ctrl_write_reg_idx'] = [CtrlRegIdxType for _ in range(ctrl_fu_inports)] + # Indicates whether to read data from the register bank. + field_dict['ctrl_read_reg_from'] = [b1 for _ in range(ctrl_fu_inports)] + field_dict['ctrl_read_reg_idx'] = [CtrlRegIdxType for _ in range(ctrl_fu_inports)] + + return mk_bitstruct(new_name, field_dict, + namespace = {'__str__': str_func} + ) \ No newline at end of file diff --git a/scale_out/MeshMultiCgraRTL.py b/scale_out/MeshMultiCgraRTL.py index 3b48da3..868da0e 100644 --- a/scale_out/MeshMultiCgraRTL.py +++ b/scale_out/MeshMultiCgraRTL.py @@ -24,8 +24,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, - controller2addr_map, preload_data = None, - preload_const = None): + controller2addr_map, preload_data = None): # Constant s.num_terminals = cgra_rows * cgra_columns @@ -55,7 +54,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, "Mesh", controller2addr_map, idTo2d_map, - preload_data = None, preload_const = None) + preload_data = None) for terminal_id in range(s.num_terminals)] # Latency is 1. s.mesh = MeshNetworkRTL(NocPktType, MeshPos, cgra_columns, cgra_rows, 1) diff --git a/scale_out/RingMultiCgraRTL.py b/scale_out/RingMultiCgraRTL.py index 000a69b..4b6fb07 100644 --- a/scale_out/RingMultiCgraRTL.py +++ b/scale_out/RingMultiCgraRTL.py @@ -24,8 +24,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, data_mem_size_global, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, - controller2addr_map, preload_data = None, - preload_const = None): + controller2addr_map, preload_data = None): # Constant idTo2d_map = {} @@ -55,7 +54,7 @@ def construct(s, CGRADataType, PredicateType, CtrlPktType, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, "Mesh", controller2addr_map, idTo2d_map, - preload_data = None, preload_const = None) + preload_data = None) for terminal_id in range(s.num_terminals)] s.ring = RingNetworkRTL(NocPktType, RingPos, s.num_terminals, 1) diff --git a/scale_out/test/MeshMultiCgraRTL_test.py b/scale_out/test/MeshMultiCgraRTL_test.py index 750adcd..b3e21bf 100644 --- a/scale_out/test/MeshMultiCgraRTL_test.py +++ b/scale_out/test/MeshMultiCgraRTL_test.py @@ -84,7 +84,8 @@ def test_homo_2x2(cmdline_opts): DUT = MeshMultiCgraRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) cmd_nbits = 5 num_registers_per_reg_bank = 16 @@ -96,14 +97,17 @@ def test_homo_2x2(cmdline_opts): 3: [24, 31], } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, + mk_intra_cgra_pkt(width * height, num_ctrl_actions, ctrl_mem_size, num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_nbits + ) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -115,7 +119,7 @@ def test_homo_2x2(cmdline_opts): nrows = cgra_rows, cmd_nbits = cmd_nbits, addr_nbits = data_addr_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] src_opt_per_tile = [[ diff --git a/scale_out/test/RingMultiCgraRTL_test.py b/scale_out/test/RingMultiCgraRTL_test.py index b50a290..d2f5b45 100644 --- a/scale_out/test/RingMultiCgraRTL_test.py +++ b/scale_out/test/RingMultiCgraRTL_test.py @@ -84,7 +84,8 @@ def test_homo_2x2(cmdline_opts): DUT = RingMultiCgraRTL FunctionUnit = FlexibleFuRTL FuList = [MemUnitRTL, AdderRTL] - DataType = mk_data(32, 1) + data_nbits = 32 + DataType = mk_data(data_nbits, 1) PredicateType = mk_predicate(1, 1) cmd_nbits = 5 num_registers_per_reg_bank = 16 @@ -96,14 +97,17 @@ def test_homo_2x2(cmdline_opts): 3: [24, 31], } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, + mk_intra_cgra_pkt(width * height, num_ctrl_actions, ctrl_mem_size, num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, - num_tile_outports) + num_tile_outports, + num_registers_per_reg_bank, + data_nbits + ) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -115,7 +119,7 @@ def test_homo_2x2(cmdline_opts): nrows = 1, addr_nbits = data_addr_nbits, cmd_nbits = cmd_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pickRegister = [FuInType(x + 1) for x in range(num_fu_inports)] src_opt_per_tile = [[ diff --git a/systolic/CgraSystolicArrayRTL.py b/systolic/CgraSystolicArrayRTL.py index 3d172d1..e7a0177 100644 --- a/systolic/CgraSystolicArrayRTL.py +++ b/systolic/CgraSystolicArrayRTL.py @@ -29,7 +29,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, num_ctrl, total_steps, FunctionUnit, FuList, controller2addr_map, - preload_data = None, preload_const = None): + preload_data = None): # Other topology can simply modify the tiles connections, or # leverage the template for modeling. @@ -62,14 +62,12 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.send_data_on_boundary_west = [SendIfcRTL(DataType) for _ in range(height)] # Components - if preload_const == None: - preload_const = [[DataType(0, 0)] for _ in range(s.num_tiles)] s.tile = [TileRTL(DataType, PredicateType, CtrlPktType, CtrlSignalType, ctrl_mem_size, data_mem_size_global, num_ctrl, total_steps, 4, 2, s.num_mesh_ports, s.num_mesh_ports, num_registers_per_reg_bank, - FuList = FuList, const_list = preload_const[i], + FuList = FuList, id = i) for i in range(s.num_tiles)] s.data_mem = DataMemWithCrossbarRTL(NocPktType, DataType, @@ -178,4 +176,3 @@ def line_trace(s): for (i,x) in enumerate(s.tile)]) res += "\nData Memory: [" + s.data_mem.line_trace() + "] \n" return res - diff --git a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py index 56d5284..2217294 100644 --- a/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py +++ b/systolic/test/Cgra3x3MemRightAndBottomRTL_matmul_2x2_test.py @@ -44,7 +44,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, src_ctrl_pkt, ctrl_steps, controller2addr_map, - preload_data, preload_const, expected_out): + preload_data, expected_out): s.DataType = DataType s.num_tiles = width * height @@ -57,7 +57,7 @@ def construct(s, DUT, FunctionUnit, FuList, DataType, PredicateType, data_mem_size_per_bank, num_banks_per_cgra, num_registers_per_reg_bank, 1, kMaxCycles, FunctionUnit, FuList, - controller2addr_map, preload_data, preload_const) + controller2addr_map, preload_data) # Connections. s.src_ctrl_pkt.send //= s.dut.recv_from_cpu_ctrl_pkt @@ -90,7 +90,7 @@ def check_parity(s): for j in range(len(s.expected_out[i])): # Outputs are stored in bank 2 and bank 3. if s.dut.data_mem.reg_file[2+i].regs[j] != s.expected_out[i][j]: - return False + return False return True def done(s): @@ -155,6 +155,7 @@ def test_CGRA_systolic(cmdline_opts): num_ctrl_actions = 6 num_ctrl_operations = 64 num_registers_per_reg_bank = 16 + data_nbits = 32 TileInType = mk_bits(clog2(num_tile_inports + 1)) FuInType = mk_bits(clog2(num_fu_inports + 1)) FuOutType = mk_bits(clog2(num_fu_outports + 1)) @@ -177,14 +178,16 @@ def test_CGRA_systolic(cmdline_opts): } CtrlPktType = \ - mk_ring_across_tiles_pkt(width * height, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + mk_intra_cgra_pkt(width * height, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, @@ -196,14 +199,19 @@ def test_CGRA_systolic(cmdline_opts): NocPktType = mk_multi_cgra_noc_pkt(ncols = 1, nrows = 1, addr_nbits = addr_nbits, - data_nbits = 32, + data_nbits = data_nbits, predicate_nbits = 1) pick_register = [FuInType(x + 1) for x in range(num_fu_inports)] src_opt_per_tile = [ # On tile 0 ([0, 0]). - # src dst vc_id opq cmd_type addr operation predicate [ + # Const + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 1), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 0), + + # src dst vc_id opq cmd_type addr operation predicate CtrlPktType(0, 0, 0, 0, CMD_CONFIG, 0, OPT_LD_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -216,8 +224,13 @@ def test_CGRA_systolic(cmdline_opts): FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0)])], # On tile 1 ([0, 1]). - # src dst vc_id opq cmd_type addr operation predicate [ + # Const + CtrlPktType(0, 1, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 1, 0, 0, ctrl_action = CMD_CONST, data = 5), + CtrlPktType(0, 1, 0, 0, ctrl_action = CMD_CONST, data = 0), + + # src dst vc_id opq cmd_type addr operation predicate CtrlPktType(0, 1, 0, 0, CMD_CONFIG, 0, OPT_LD_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -230,7 +243,13 @@ def test_CGRA_systolic(cmdline_opts): FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0)])], # On tile 2 ([0, 2]). - [CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register, + [ + # Const + CtrlPktType(0, 2, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 2, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 2, 0, 0, ctrl_action = CMD_CONST, data = 0), + + CtrlPktType(0, 2, 0, 0, CMD_CONFIG, 0, OPT_NAH, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], [FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0), @@ -242,7 +261,13 @@ def test_CGRA_systolic(cmdline_opts): FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (0)])], # On tile 3 ([1, 0]). - [CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST, b1(0), pick_register, + [ + # Const + CtrlPktType(0, 3, 0, 0, ctrl_action = CMD_CONST, data = 2), + CtrlPktType(0, 3, 0, 0, ctrl_action = CMD_CONST, data = 2), + CtrlPktType(0, 3, 0, 0, ctrl_action = CMD_CONST, data = 2), + + CtrlPktType(0, 3, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST, b1(0), pick_register, [TileInType(2), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(0), TileInType(0)], [FuOutType (0), FuOutType (0), FuOutType (0), FuOutType (1), @@ -255,6 +280,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 4 ([1, 1]). [ + # Const + CtrlPktType(0, 4, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 4, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 4, 0, 0, ctrl_action = CMD_CONST, data = 4), + CtrlPktType(0, 4, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST_ADD, b1(0), pick_register, [TileInType(2), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(3), TileInType(0)], @@ -268,6 +298,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 5 ([1, 2]). [ + # Const + CtrlPktType(0, 5, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 5, 0, 0, ctrl_action = CMD_CONST, data = 9), + CtrlPktType(0, 5, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 5, 0, 0, CMD_CONFIG, 0, OPT_STR_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(3), TileInType(0), TileInType(0), TileInType(0)], @@ -281,6 +316,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 6 ([2, 0]). [ + # Const + CtrlPktType(0, 6, 0, 0, ctrl_action = CMD_CONST, data = 6), + CtrlPktType(0, 6, 0, 0, ctrl_action = CMD_CONST, data = 6), + CtrlPktType(0, 6, 0, 0, ctrl_action = CMD_CONST, data = 6), + CtrlPktType(0, 6, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(0), TileInType(0)], @@ -294,6 +334,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 7 ([2, 1]). [ + # Const + CtrlPktType(0, 7, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 7, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 7, 0, 0, ctrl_action = CMD_CONST, data = 8), + CtrlPktType(0, 7, 0, 0, CMD_CONFIG, 0, OPT_MUL_CONST_ADD, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(2), TileInType(0), TileInType(3), TileInType(0)], @@ -307,6 +352,11 @@ def test_CGRA_systolic(cmdline_opts): # On tile 8 ([2, 2]). [ + # Const + CtrlPktType(0, 8, 0, 0, ctrl_action = CMD_CONST, data = 12), + CtrlPktType(0, 8, 0, 0, ctrl_action = CMD_CONST, data = 13), + CtrlPktType(0, 8, 0, 0, ctrl_action = CMD_CONST, data = 0), + CtrlPktType(0, 8, 0, 0, CMD_CONFIG, 0, OPT_STR_CONST, b1(0), pick_register, [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(3), TileInType(0), TileInType(0), TileInType(0)], @@ -336,7 +386,7 @@ def test_CGRA_systolic(cmdline_opts): for i in range(data_mem_size_per_bank)] for j in range(num_banks_per_cgra)] - preload_const = [ + '''preload_const = [ # The offset address used for loading input activation. # We use a shared data memory here, indicating global address # space. Users can make each tile has its own address space. @@ -365,8 +415,8 @@ def test_CGRA_systolic(cmdline_opts): [DataType(8, 1), DataType(8, 1), DataType(8, 1)], # The third column (except the bottom one) is used to store the # accumulated results. - [DataType(12, 1), DataType(13, 1), DataType(0, 0)]] - + [DataType(12, 1), DataType(13, 1), DataType(0, 0)]]''' + """ 1 3 2 6 14 20 x = @@ -386,7 +436,7 @@ def test_CGRA_systolic(cmdline_opts): num_registers_per_reg_bank, src_ctrl_pkt, ctrl_mem_size, controller2addr_map, preload_data_per_bank, - preload_const, expected_out) + expected_out) th.elaborate() th.dut.set_metadata(VerilogTranslationPass.explicit_module_name, @@ -397,4 +447,3 @@ def test_CGRA_systolic(cmdline_opts): cmdline_opts['dump_vcd'] or \ cmdline_opts['dump_vtb']) run_sim(th, enable_verification_pymtl) - diff --git a/tile/TileRTL.py b/tile/TileRTL.py index aef2bbc..015281b 100644 --- a/tile/TileRTL.py +++ b/tile/TileRTL.py @@ -14,23 +14,24 @@ Date : Nov 26, 2024 """ -from pymtl3 import * from ..fu.flexible.FlexibleFuRTL import FlexibleFuRTL from ..fu.single.AdderRTL import AdderRTL from ..fu.single.BranchRTL import BranchRTL -from ..fu.single.PhiRTL import PhiRTL from ..fu.single.CompRTL import CompRTL from ..fu.single.MemUnitRTL import MemUnitRTL from ..fu.single.MulRTL import MulRTL +from ..fu.single.PhiRTL import PhiRTL from ..lib.basic.val_rdy.ifcs import ValRdyRecvIfcRTL as RecvIfcRTL from ..lib.basic.val_rdy.ifcs import ValRdySendIfcRTL as SendIfcRTL -from ..mem.const.ConstQueueRTL import ConstQueueRTL +from ..lib.cmd_type import * +from ..mem.const.ConstQueueDynamicRTL import ConstQueueDynamicRTL from ..mem.ctrl.CtrlMemDynamicRTL import CtrlMemDynamicRTL +from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL from ..noc.CrossbarRTL import CrossbarRTL -from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL from ..noc.LinkOrRTL import LinkOrRTL +from ..noc.PyOCN.pymtl3_net.channel.ChannelRTL import ChannelRTL from ..rf.RegisterRTL import RegisterRTL -from ..mem.register_cluster.RegisterClusterRTL import RegisterClusterRTL + class TileRTL(Component): @@ -39,8 +40,8 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, num_fu_inports, num_fu_outports, num_tile_inports, num_tile_outports, num_registers_per_reg_bank = 16, Fu = FlexibleFuRTL, - FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, - MemUnitRTL], const_list = None, id = 0): + FuList = [PhiRTL, AdderRTL, CompRTL, MulRTL, BranchRTL, MemUnitRTL], + id = 0): # Constants. num_routing_xbar_inports = num_tile_inports @@ -71,8 +72,7 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.element = FlexibleFuRTL(DataType, PredicateType, CtrlSignalType, num_fu_inports, num_fu_outports, data_mem_size, FuList) - s.const_queue = ConstQueueRTL(DataType, const_list \ - if const_list != None else [DataType(0)]) + s.const_mem = ConstQueueDynamicRTL(DataType, data_mem_size) s.routing_crossbar = CrossbarRTL(DataType, PredicateType, CtrlSignalType, num_routing_xbar_inports, @@ -108,13 +108,8 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.fu_crossbar_done = Wire(1) s.routing_crossbar_done = Wire(1) - # Connections. - # Ctrl. - s.ctrl_mem.recv_pkt //= s.recv_ctrl_pkt - # Constant queue. - # FIXME: @yuqi, https://github.com/tancheng/VectorCGRA/issues/11 - s.element.recv_const //= s.const_queue.send_const + s.element.recv_const //= s.const_mem.send_const for i in range(len(FuList)): if FuList[i] == MemUnitRTL: @@ -171,8 +166,6 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.fu_crossbar.send_data[num_tile_outports + i] //= \ s.register_cluster.recv_data_from_fu_crossbar[i] - # FIXME: @yuqi, https://github.com/tancheng/VectorCGRA/issues/11 - # The const can be delivered here. s.register_cluster.recv_data_from_const[i].msg //= DataType() s.register_cluster.recv_data_from_const[i].val //= 0 @@ -180,6 +173,24 @@ def construct(s, DataType, PredicateType, CtrlPktType, CtrlSignalType, s.element.recv_in[i] s.register_cluster.inport_opt //= s.ctrl_mem.send_ctrl.msg + @update + def feed_pkt(): + s.ctrl_mem.recv_pkt.msg @= CtrlPktType(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + s.const_mem.recv_const.msg @= DataType(0, 0, 0, 0) + s.ctrl_mem.recv_pkt.val @= 0 + s.const_mem.recv_const.val @= 0 + s.recv_ctrl_pkt.rdy @= 0 + + if s.recv_ctrl_pkt.val & ((s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONFIG) | (s.recv_ctrl_pkt.msg.ctrl_action == CMD_LAUNCH)): + s.ctrl_mem.recv_pkt.val @= 1 + s.ctrl_mem.recv_pkt.msg @= s.recv_ctrl_pkt.msg + s.recv_ctrl_pkt.rdy @= s.ctrl_mem.recv_pkt.rdy + elif s.recv_ctrl_pkt.val & (s.recv_ctrl_pkt.msg.ctrl_action == CMD_CONST): + s.const_mem.recv_const.val @= 1 + s.const_mem.recv_const.msg.payload @= s.recv_ctrl_pkt.msg.data + s.const_mem.recv_const.msg.predicate @= 1 + s.recv_ctrl_pkt.rdy @= s.const_mem.recv_const.rdy + # Updates the configuration memory related signals. @update def update_opt(): @@ -219,10 +230,12 @@ def already_done(): # Line trace def line_trace(s): recv_str = "|".join(["(" + str(x.msg) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.recv_data]) + send_str = "|".join([str(x.msg) for x in s.send_data]) tile_in_channel_recv_str = "|".join([str(x.recv.msg) for x in s.tile_in_channel]) tile_in_channel_send_str = "|".join([str(x.send.msg) for x in s.tile_in_channel]) tile_in_channel_str = "|".join([str(x.line_trace()) for x in s.tile_in_channel]) out_str = "|".join(["(" + str(x.msg.payload) + ", predicate: " + str(x.msg.predicate) + ", val: " + str(x.val) + ", rdy: " + str(x.rdy) + ")" for x in s.send_data]) ctrl_mem = s.ctrl_mem.line_trace() - return f"tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem} ## " + const_mem = s.const_mem.line_trace() + return f"send_str: {send_str}, tile_inports: {recv_str} => [tile_in_channel: {tile_in_channel_str} || routing_crossbar: {s.routing_crossbar.recv_opt.msg} || fu_crossbar: {s.fu_crossbar.recv_opt.msg} || element: {s.element.line_trace()} || s.element_done: {s.element_done}, s.fu_crossbar_done: {s.fu_crossbar_done}, s.routing_crossbar_done: {s.routing_crossbar_done} || ctrl_mem: {ctrl_mem}, const_mem: {const_mem} ## " diff --git a/tile/test/TileRTL_test.py b/tile/test/TileRTL_test.py index 902e19f..cbd4188 100644 --- a/tile/test/TileRTL_test.py +++ b/tile/test/TileRTL_test.py @@ -10,36 +10,30 @@ Date : Nov 26, 2024 """ -from pymtl3 import * +from pymtl3.passes.backends.verilog import (VerilogVerilatorImportPass) from pymtl3.stdlib.test_utils import (run_sim, config_model_with_cmdline_opts) -from pymtl3.passes.backends.verilog import (VerilogTranslationPass, - VerilogVerilatorImportPass) + from ..TileRTL import TileRTL +from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL +from ...fu.single.AdderRTL import AdderRTL +from ...fu.single.BranchRTL import BranchRTL +from ...fu.single.CompRTL import CompRTL +from ...fu.single.LogicRTL import LogicRTL +from ...fu.single.MemUnitRTL import MemUnitRTL +from ...fu.single.MulRTL import MulRTL +from ...fu.single.PhiRTL import PhiRTL +from ...fu.single.SelRTL import SelRTL +from ...fu.single.ShifterRTL import ShifterRTL from ...fu.triple.ThreeMulAdderShifterRTL import ThreeMulAdderShifterRTL -from ...fu.triple.ThreeMulAdderShifterRTL import ThreeMulAdderShifterRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...fu.vector.VectorMulComboRTL import VectorMulComboRTL -from ...fu.vector.VectorAdderComboRTL import VectorAdderComboRTL -from ...fu.vector.VectorAllReduceRTL import VectorAllReduceRTL -from ...fu.single.AdderRTL import AdderRTL -from ...fu.single.MemUnitRTL import MemUnitRTL -from ...fu.single.MulRTL import MulRTL -from ...fu.single.SelRTL import SelRTL -from ...fu.single.ShifterRTL import ShifterRTL -from ...fu.single.LogicRTL import LogicRTL -from ...fu.single.PhiRTL import PhiRTL -from ...fu.single.CompRTL import CompRTL -from ...fu.single.BranchRTL import BranchRTL -from ...fu.single.NahRTL import NahRTL -from ...fu.triple.ThreeMulAdderShifterRTL import ThreeMulAdderShifterRTL -from ...fu.flexible.FlexibleFuRTL import FlexibleFuRTL -from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL +from ...fu.vector.VectorAdderComboRTL import VectorAdderComboRTL +from ...fu.vector.VectorMulComboRTL import VectorMulComboRTL from ...lib.basic.val_rdy.SinkRTL import SinkRTL as ValRdyTestSinkRTL -from ...lib.messages import * +from ...lib.basic.val_rdy.SourceRTL import SourceRTL as ValRdyTestSrcRTL from ...lib.cmd_type import * +from ...lib.messages import * from ...lib.opt_type import * -from ...mem.ctrl.CtrlMemRTL import CtrlMemRTL + #------------------------------------------------------------------------- # Test harness @@ -131,15 +125,19 @@ def test_tile_alu(cmdline_opts): # 64-bit to satisfy the default bitwidth of vector FUs. DataType = mk_data(64, 1) PredicateType = mk_predicate(1, 1) + data_nbits = 64 + CtrlPktType = \ - mk_ring_across_tiles_pkt(num_terminals, - num_ctrl_actions, - ctrl_mem_size, - num_ctrl_operations, - num_fu_inports, - num_fu_outports, - num_tile_inports, - num_tile_outports) + mk_intra_cgra_pkt(num_terminals, + num_ctrl_actions, + ctrl_mem_size, + num_ctrl_operations, + num_fu_inports, + num_fu_outports, + num_tile_inports, + num_tile_outports, + num_registers_per_reg_bank, + data_nbits) CtrlSignalType = \ mk_separate_reg_ctrl(num_ctrl_operations, num_fu_inports, num_fu_outports, num_tile_inports, @@ -190,7 +188,12 @@ def test_tile_alu(cmdline_opts): # fu_xbar_output [FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(1), FuOutType(0), FuOutType(0), FuOutType(0), FuOutType(0)]), - CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_ADD, b1(0), pick_register1, + + # for const: 5, 7 + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 5), + CtrlPktType(0, 0, 0, 0, ctrl_action = CMD_CONST, data = 7), + + CtrlPktType(0, 0, 0, 0, CMD_LAUNCH, 0, OPT_NAH, b1(0), pick_register1, # routing_xbar_output [TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0), TileInType(0)], @@ -201,7 +204,7 @@ def test_tile_alu(cmdline_opts): [], [DataType(4, 1)], [DataType(5, 1), DataType(7, 1)]] - src_const = [DataType(5, 1), DataType(0, 0), DataType(7, 1)] + sink_out = [ # 7 - 3 = 4. [DataType(4, 1)],