Skip to content

Commit

Permalink
Rework CPU pinning for Android benchmarks (#15478)
Browse files Browse the repository at this point in the history
This change adds the configurations of CPU pinning to the benchmark
definitions.

Also follows #15452 to remap multi-thread benchmarks on the 2
homogeneous big cores on Pixel 6
  • Loading branch information
Jerry Wu authored Nov 13, 2023
1 parent e8c6432 commit 65912b5
Show file tree
Hide file tree
Showing 18 changed files with 78 additions and 182 deletions.
39 changes: 19 additions & 20 deletions build_tools/benchmarks/run_benchmarks_on_android.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@
)
import common.common_arguments
from e2e_test_artifacts import iree_artifacts
from e2e_test_framework.definitions import common_definitions, iree_definitions
from e2e_test_framework.device_specs import device_parameters
from e2e_test_framework.definitions import iree_definitions

# Root directory to perform benchmarks in on the Android device.
ANDROID_TMPDIR = pathlib.PurePosixPath("/data/local/tmp/iree-benchmarks")
Expand Down Expand Up @@ -308,6 +307,8 @@ def run_benchmark_case(
)

run_config = benchmark_case.run_config
# TODO(#15452): Change to `--task_topology_cpu_ids` once we figure out
# the right mapping.
taskset = self.__deduce_taskset_from_run_config(run_config)
run_args = run_config.materialize_run_flags(inputs_dir=inputs_dir)
run_args.append(f"--module={module_device_path}")
Expand Down Expand Up @@ -440,29 +441,27 @@ def __run_capture(
stdout_redirect = None if self.verbose else subprocess.DEVNULL
execute_cmd(capture_cmd, verbose=self.verbose, stdout=stdout_redirect)

# TODO(#13187): These logics are inherited from the legacy benchmark suites,
# which only work for a few specific phones. We should define the topology
# in their device specs.
def __deduce_taskset_from_run_config(
self, run_config: iree_definitions.E2EModelRunConfig
) -> str:
"""Deduces the CPU mask according to device and execution config."""

device_spec = run_config.target_device_spec
# For GPU benchmarks, use the most performant core.
if device_spec.architecture.type == common_definitions.ArchitectureType.GPU:
return "80"

device_params = device_spec.device_parameters
single_thread = "1-thread" in run_config.module_execution_config.tags
if device_parameters.ARM_BIG_CORES in device_params:
return "80" if single_thread else "f0"
elif device_parameters.ARM_LITTLE_CORES in device_params:
return "08" if single_thread else "0f"
elif device_parameters.ALL_CORES in device_params:
return "80" if single_thread else "ff"

raise ValueError(f"Unsupported config to deduce taskset: '{run_config}'.")
cpu_params = run_config.target_device_spec.device_parameters.cpu_params
if not cpu_params:
# Assume the mobile CPUs have <= 16 cores.
return "ffff"

exec_config = run_config.module_execution_config
pinned_cores = cpu_params.pinned_cores
# Use the fastest cores in the spec for single-thread benchmarks.
if (
exec_config.driver == iree_definitions.RuntimeDriver.LOCAL_SYNC
or "1-thread" in exec_config.tags
):
pinned_cores = pinned_cores[-1:]

cpu_mask = sum(1 << core_id for core_id in cpu_params.pinned_cores)
return f"{cpu_mask:04x}"

def __check_and_push_file(
self, host_path: pathlib.Path, device_dir: pathlib.PurePosixPath
Expand Down
12 changes: 4 additions & 8 deletions build_tools/benchmarks/run_benchmarks_on_linux.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
)
from common.linux_device_utils import get_linux_device_info
from e2e_test_artifacts import iree_artifacts
from e2e_model_tests import run_module_utils

import common.common_arguments

Expand Down Expand Up @@ -118,17 +117,14 @@ def __build_tool_cmds(
inputs_dir: Optional[pathlib.Path] = None,
) -> List[Any]:
run_config = benchmark_case.run_config
cmds: List[Any] = run_module_utils.build_linux_wrapper_cmds_for_device_spec(
run_config.target_device_spec
)
cmds.append(tool_path)

cmds += [f"--module={module_path}"]
cmds = [tool_path, f"--module={module_path}"]
cmds += run_config.materialize_run_flags(
gpu_id=self.gpu_id,
inputs_dir=inputs_dir,
)

cpu_params = run_config.target_device_spec.device_parameters.cpu_params
if cpu_params:
raise ValueError("CPU pinning is not supported yet.")
return cmds

def __fetch_and_unpack_npy(self, uri: str, dest_dir: pathlib.Path) -> pathlib.Path:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def generate(
module_execution_configs.get_elf_system_scheduling_local_task_config(
thread_num
)
for thread_num in [1, 4]
for thread_num in [1, 2]
]

default_gen_confings = [
Expand Down Expand Up @@ -97,7 +97,7 @@ def generate(
device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs(
architecture=common_definitions.DeviceArchitecture.ARMV8_2_A_GENERIC,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters={"big-cores"},
tags=["big-cores"],
)
)
run_configs = utils.generate_e2e_model_run_configs(
Expand Down
4 changes: 2 additions & 2 deletions build_tools/python/benchmark_suites/iree/vmvx_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ def generate(
]
default_execution_configs = [
module_execution_configs.get_vmvx_system_scheduling_local_task_config(
thread_num=4
thread_num=2
)
]
big_cores_devices = (
device_collections.DEFAULT_DEVICE_COLLECTION.query_device_specs(
architecture=common_definitions.DeviceArchitecture.ARMV8_2_A_GENERIC,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters={"big-cores"},
tags=["big-cores"],
)
)
run_configs = utils.generate_e2e_model_run_configs(
Expand Down
12 changes: 0 additions & 12 deletions build_tools/python/e2e_model_tests/CMakeLists.txt

This file was deleted.

28 changes: 0 additions & 28 deletions build_tools/python/e2e_model_tests/run_module_utils.py

This file was deleted.

31 changes: 0 additions & 31 deletions build_tools/python/e2e_model_tests/run_module_utils_test.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,23 @@ class ModelSourceType(Enum):
EXPORTED_TFLITE = "exported_tflite"


@serialization.serializable
@dataclass(frozen=True)
class CPUParameters:
"""Describes CPU related parameters."""

# CPU cores to pin at, ordered from the slowest to the fastest.
pinned_cores: List[int]


@serialization.serializable
@dataclass(frozen=True)
class DeviceParameters:
"""Describes device parameters."""

cpu_params: Optional[CPUParameters] = None


@serialization.serializable(type_key="device_specs")
@dataclass(frozen=True)
class DeviceSpec(object):
Expand Down Expand Up @@ -138,7 +155,7 @@ class DeviceSpec(object):
# This is for modeling the spec of a heterogeneous processor. Depending on
# which cores you run, the device has a different spec. Benchmark machines use
# these parameters to set up the devices. E.g. set CPU mask.
device_parameters: List[str] = dataclasses.field(default_factory=list)
device_parameters: DeviceParameters

def __str__(self):
return self.name
Expand All @@ -150,7 +167,7 @@ def build(
device_name: str,
host_environment: HostEnvironment,
architecture: DeviceArchitecture,
device_parameters: Sequence[str] = (),
device_parameters: DeviceParameters = DeviceParameters(),
tags: Sequence[str] = (),
):
tag_part = ",".join(tags)
Expand All @@ -163,7 +180,7 @@ def build(
device_name=device_name,
host_environment=host_environment,
architecture=architecture,
device_parameters=list(device_parameters),
device_parameters=device_parameters,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from e2e_test_framework.device_specs import (
gcp_specs,
moto_edge_x30_specs,
pixel_4_specs,
pixel_6_pro_specs,
riscv_specs,
)
Expand All @@ -26,25 +25,26 @@ def query_device_specs(
self,
architecture: common_definitions.DeviceArchitecture,
host_environment: common_definitions.HostEnvironment,
device_parameters: Set[str] = set(),
tags: Sequence[str] = (),
) -> List[common_definitions.DeviceSpec]:
"""Query the device specs.
Args:
architecture: device architecture to match.
platform: device platform to match.
device_parameters: parameters that devices need to have.
tags: tags that devices need to have.
Returns:
List of matched device specs.
"""

matched_device_specs = []
tag_set = set(tags)
for device_spec in self.device_specs:
if device_spec.architecture != architecture:
continue
if device_spec.host_environment != host_environment:
continue
if not device_parameters.issubset(device_spec.device_parameters):
if not tag_set.issubset(device_spec.tags):
continue
matched_device_specs.append(device_spec)

Expand All @@ -53,7 +53,6 @@ def query_device_specs(

ALL_DEVICE_SPECS = [
# Pixel 6 Pro
pixel_6_pro_specs.LITTLE_CORES,
pixel_6_pro_specs.BIG_CORES,
pixel_6_pro_specs.ALL_CORES,
pixel_6_pro_specs.GPU,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,14 @@ def test_query_device_specs(self):
device_name="c",
architecture=common_definitions.DeviceArchitecture.ARMV9_A_GENERIC,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters=["little-cores"],
tags=[],
tags=["little-cores"],
)
big_cores_device_spec = common_definitions.DeviceSpec.build(
id="android_big",
device_name="d",
architecture=common_definitions.DeviceArchitecture.ARMV9_A_GENERIC,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters=["big-cores"],
tags=[],
tags=["big-cores"],
)
devices = device_collections.DeviceCollection(
device_specs=[
Expand All @@ -61,12 +59,12 @@ def test_query_device_specs(self):
little_cores_devices = devices.query_device_specs(
architecture=common_definitions.DeviceArchitecture.ARMV9_A_GENERIC,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters={"little-cores"},
tags=["little-cores"],
)
big_cores_devices = devices.query_device_specs(
architecture=common_definitions.DeviceArchitecture.ARMV9_A_GENERIC,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters={"big-cores"},
tags=["big-cores"],
)
all_arm_devices = devices.query_device_specs(
architecture=common_definitions.DeviceArchitecture.ARMV9_A_GENERIC,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@

from e2e_test_framework import unique_ids
from e2e_test_framework.definitions import common_definitions
from e2e_test_framework.device_specs import device_parameters

GCP_C2_STANDARD_16 = common_definitions.DeviceSpec.build(
id=unique_ids.DEVICE_SPEC_GCP_C2_STANDARD_16,
device_name="c2-standard-16",
host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
architecture=common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
device_parameters=[device_parameters.ALL_CORES],
tags=["cpu"],
)

Expand All @@ -23,7 +21,6 @@
device_name="c2-standard-60",
host_environment=common_definitions.HostEnvironment.LINUX_X86_64,
architecture=common_definitions.DeviceArchitecture.X86_64_CASCADELAKE,
device_parameters=[device_parameters.ALL_CORES],
tags=["cpu"],
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,9 @@
device_name=DEVICE_NAME,
architecture=common_definitions.DeviceArchitecture.QUALCOMM_ADRENO,
host_environment=common_definitions.HostEnvironment.ANDROID_ARMV8_2_A,
device_parameters=common_definitions.DeviceParameters(
# Pin on the fastest CPU core.
cpu_params=common_definitions.CPUParameters(pinned_cores=[7])
),
tags=["gpu"],
)
Loading

0 comments on commit 65912b5

Please sign in to comment.