Update on "allow customized head_dim"

This is for resolving the ask in this [post](https://fb.workplace.com/groups/pytorch.edge.users/permalink/1574875706716050/). Similar change in HF: huggingface/transformers#32502 Differential Revision: [D65974454](https://our.internmc.facebook.com/intern/diff/D65974454/) [ghstack-poisoned]
pytorch · Nov 25, 2024 · 811ad8b · 811ad8b
2 parents 2a1ace5 + 3f500d5
commit 811ad8b
Show file tree

Hide file tree

Showing 232 changed files with 14,152 additions and 2,226 deletions.
diff --git a/.ci/docker/common/install_cache.sh b/.ci/docker/common/install_cache.sh
@@ -12,6 +12,26 @@ set -ex
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
+install_ubuntu() {
+  echo "Preparing to build sccache from source"
+  apt-get update
+  # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
+  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
+  apt-get install -y cargo
+  echo "Checking out sccache repo"
+  git clone https://github.com/mozilla/sccache -b v0.8.2
+
+  cd sccache
+  echo "Building sccache"
+  cargo build --release
+  cp target/release/sccache /opt/cache/bin
+  echo "Cleaning up"
+  cd ..
+  rm -rf sccache
+  apt-get remove -y cargo rustc
+  apt-get autoclean && apt-get clean
+}
+
 install_binary() {
   echo "Downloading sccache binary from S3 repo"
   curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
@@ -22,15 +42,33 @@ mkdir -p /opt/cache/bin
 sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
 export PATH="/opt/cache/bin:$PATH"
 
-# NB: Install the pre-built binary from S3 as building from source
-# https://github.com/pytorch/sccache has started failing mysteriously
-# in which sccache server couldn't start with the following error:
-#   sccache: error: Invalid argument (os error 22)
-install_binary
+install_ubuntu
 
 function write_sccache_stub() {
   BINARY=$1
-  printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n  exec sccache %s \"\$@\"\nelse\n  exec %s \"\$@\"\nfi" "$(which "${BINARY}")" "$(which "${BINARY}")" > "/opt/cache/bin/${BINARY}"
+  if [ $1 == "gcc" ]; then
+    # Do not call sccache recursively when dumping preprocessor argument
+    # For some reason it's very important for the first cached nvcc invocation
+    cat >"/opt/cache/bin/$1" <<EOF
+#!/bin/sh
+if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
+  exec $(which $1) "\$@"
+elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
+  exec sccache $(which $1) "\$@"
+else
+  exec $(which $1) "\$@"
+fi
+EOF
+  else
+    cat >"/opt/cache/bin/$1" <<EOF
+#!/bin/sh
+if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
+  exec sccache $(which $1) "\$@"
+else
+  exec $(which $1) "\$@"
+fi
+EOF
+  fi
   chmod a+x "/opt/cache/bin/${BINARY}"
 }
 
@@ -44,7 +82,7 @@ init_sccache() {
 
   # NB: This function is adopted from PyTorch core at
   # https://github.com/pytorch/pytorch/blob/main/.ci/pytorch/common-build.sh
-  as_ci_user sccache --stop-server > /dev/null 2>&1 || true
+  as_ci_user sccache --stop-server >/dev/null 2>&1 || true
   rm -f "${SCCACHE_ERROR_LOG}" || true
 
   # Clear sccache stats before using it

diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile
@@ -57,6 +57,7 @@ COPY ./common/utils.sh utils.sh
 RUN bash ./install_cache.sh && rm install_cache.sh utils.sh
 ENV SCCACHE_BUCKET ossci-compiler-cache-circleci-v2
 ENV SCCACHE_S3_KEY_PREFIX executorch
+ENV SCCACHE_REGION us-east-1
 
 ARG TORCH_VERSION
 COPY ./common/install_pytorch.sh install_pytorch.sh

diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -9,11 +9,41 @@ set -exu
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
-MODEL_NAME=$1 # stories110M
-BUILD_TOOL=$2 # buck2 or cmake
-DTYPE=$3 # fp16, bf16, or fp32
-MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
-UPLOAD_DIR=${5:-}
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    -model)
+      MODEL_NAME="$2" # stories110M
+      shift 2
+      ;;
+    -build_tool)
+      BUILD_TOOL="$2" # buck2 or cmake
+      shift 2
+      ;;
+    -dtype)
+      DTYPE="$2" # fp16, bf16, or fp32
+      shift 2
+      ;;
+    -mode)
+      MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
+      shift 2
+      ;;
+    -upload)
+      UPLOAD_DIR="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1"
+      usage
+      ;;
+  esac
+done
+
+# Default mode to xnnpack+custom if not set
+MODE=${MODE:-"xnnpack+custom"}
+
+# Default UPLOAD_DIR to empty string if not set
+UPLOAD_DIR="${UPLOAD_DIR:-}"
+
 if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
     echo "Expecting atleast 4 positional arguments"
     echo "Usage: [...]"
@@ -150,7 +180,7 @@ cleanup_files() {
 }
 
 prepare_artifacts_upload() {
-  if [ -n "$UPLOAD_DIR" ]; then
+  if [ -n "${UPLOAD_DIR}" ]; then
     echo "Preparing for uploading generated artifacs"
     zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
     mkdir -p "${UPLOAD_DIR}"

diff --git a/.github/scripts/check_labels.py b/.github/scripts/check_labels.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""Check whether a PR has required labels."""
+
+import sys
+from typing import Any
+
+from github_utils import gh_delete_comment, gh_post_pr_comment
+from gitutils import get_git_remote_name, get_git_repo_dir, GitRepo
+from label_utils import has_required_labels, is_label_err_comment, LABEL_ERR_MSG
+from trymerge import GitHubPR
+
+
+def delete_all_label_err_comments(pr: "GitHubPR") -> None:
+    for comment in pr.get_comments():
+        if is_label_err_comment(comment):
+            gh_delete_comment(pr.org, pr.project, comment.database_id)
+
+
+def add_label_err_comment(pr: "GitHubPR") -> None:
+    # Only make a comment if one doesn't exist already
+    if not any(is_label_err_comment(comment) for comment in pr.get_comments()):
+        gh_post_pr_comment(pr.org, pr.project, pr.pr_num, LABEL_ERR_MSG)
+
+
+def parse_args() -> Any:
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser("Check PR labels")
+    parser.add_argument("pr_num", type=int)
+    # add a flag to return a non-zero exit code if the PR does not have the required labels
+    parser.add_argument(
+        "--exit-non-zero",
+        action="store_true",
+        help="Return a non-zero exit code if the PR does not have the required labels",
+    )
+
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    repo = GitRepo(get_git_repo_dir(), get_git_remote_name())
+    org, project = repo.gh_owner_and_name()
+    pr = GitHubPR(org, project, args.pr_num)
+
+    try:
+        if not has_required_labels(pr):
+            print(LABEL_ERR_MSG, flush=True)
+            add_label_err_comment(pr)
+            if args.exit_non_zero:
+                raise RuntimeError("PR does not have required labels")
+        else:
+            delete_all_label_err_comments(pr)
+    except Exception as e:
+        if args.exit_non_zero:
+            raise RuntimeError(f"Error checking labels: {e}") from e
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/github_utils.py b/.github/scripts/github_utils.py
@@ -72,10 +72,10 @@ def gh_fetch_url(
     headers: Optional[Dict[str, str]] = None,
     data: Union[Optional[Dict[str, Any]], str] = None,
     method: Optional[str] = None,
-    reader: Callable[[Any], Any] = lambda x: x.read(),
+    reader: Callable[[Any], Any] = json.load,
 ) -> Any:
     return gh_fetch_url_and_headers(
-        url, headers=headers, data=data, reader=json.load, method=method
+        url, headers=headers, data=data, reader=reader, method=method
     )[1]
 
 
@@ -169,7 +169,7 @@ def gh_post_commit_comment(
 
 def gh_delete_comment(org: str, repo: str, comment_id: int) -> None:
     url = f"{GITHUB_API_URL}/repos/{org}/{repo}/issues/comments/{comment_id}"
-    gh_fetch_url(url, method="DELETE")
+    gh_fetch_url(url, method="DELETE", reader=lambda x: x.read())
 
 
 def gh_fetch_merge_base(org: str, repo: str, base: str, head: str) -> str:

diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -136,7 +136,7 @@ jobs:
       fail-fast: false
     with:
       runner: linux.4xlarge
-      docker-image: executorch-ubuntu-22.04-clang12-android
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'true'
       timeout: 60
       upload-artifact: android-models

diff --git a/.github/workflows/check-labels.yml b/.github/workflows/check-labels.yml
@@ -0,0 +1,54 @@
+name: Check Labels
+
+on:
+  # We need pull_request_target to be able to post comments on PRs from forks.
+  # Only allow pull_request_target when merging to main, not some historical branch.
+  #
+  # Make sure to don't introduce explicit checking out and installing/running
+  # untrusted user code into this workflow!
+  pull_request_target:
+    types: [opened, synchronize, reopened, labeled, unlabeled]
+    branches: [main]
+
+  # To check labels on ghstack PRs.
+  # Note: as pull_request doesn't trigger on PRs targeting main,
+  # to test changes to the workflow itself one needs to create
+  # a PR that targets a gh/**/base branch.
+  pull_request:
+    types: [opened, synchronize, reopened, labeled, unlabeled]
+    branches: [gh/**/base]
+
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to check labels for'
+        required: true
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  check-labels:
+    permissions:
+      contents: read
+      pull-requests: write
+    name: Check labels
+    if: github.repository_owner == 'pytorch'
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      # Not the direct dependencies but the script uses trymerge
+      - run: pip install pyyaml==6.0 rockset==1.0.3
+      - name: Check labels
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUM: ${{ github.event.number || github.event.inputs.pr_number }}
+        run: |
+          set -ex
+          python3 .github/scripts/check_labels.py --exit-non-zero "${PR_NUM}"
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
@@ -26,6 +26,9 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
   cancel-in-progress: true
 
+env:
+  AWS_DEFAULT_REGION: us-east-1
+
 jobs:
   docker-build:
     runs-on: [self-hosted, linux.2xlarge]

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -117,7 +117,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}" "${ARTIFACTS_DIR_NAME}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
 
   test-llama-runner-linux-android:
     name: test-llama-runner-linux-android
@@ -393,7 +393,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
 
   test-phi-3-mini-runner-linux:
     name: test-phi-3-mini-runner-linux

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -261,7 +261,7 @@ jobs:
         # Install requirements for export_llama
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
         # Test llama2
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M cmake "${DTYPE}" "${MODE}"
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
 
   # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
   # test-llava-runner-macos:
@@ -302,7 +302,7 @@ jobs:
       fail-fast: false
     with:
       runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12-android
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 900

diff --git a/.gitmodules b/.gitmodules
@@ -1,9 +1,9 @@
 [submodule "backends/arm/third-party/ethos-u-core-driver"]
 	path = backends/arm/third-party/ethos-u-core-driver
-	url = https://review.mlplatform.org/ml/ethos-u/ethos-u-core-driver
+	url = https://github.com/pytorch-labs/ethos-u-core-driver-mirror
 [submodule "backends/arm/third-party/serialization_lib"]
 	path = backends/arm/third-party/serialization_lib
-	url = https://review.mlplatform.org/tosa/serialization_lib
+	url = https://github.com/pytorch-labs/tosa_serialization_lib-mirror
 [submodule "backends/vulkan/third-party/Vulkan-Headers"]
 	path = backends/vulkan/third-party/Vulkan-Headers
 	url = https://github.com/KhronosGroup/Vulkan-Headers

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -283,10 +283,15 @@ for basics.
    - If the reviewers have requests or questions, follow up with them.
    - The goal of the reviewer is to ensure that the code in the `main` branch of
      the repo is consistent, maintainable, and of high quality.
-1. Once approved, your reviewer will import the PR into Meta's internal system
-   and merge it from there.
-   - If the PR is approved and not merged within a few business days, please
-     comment on the PR to ask about its status.
+1. Once the PR has been approved,
+   - If you have the "write permission" in this repo, you can merge it yourself
+     by clicking the "Squash and merge" button once it is green and all CI
+     signals are passing.
+   - If you don't have "write permission" in this repo, the reviewer will take
+     care of the PR. The reviewer may import the PR into Meta's internal system
+     to validate it against internal CI.
+   - If the PR is approved but not merged within 5 business days, please comment
+     on the PR to ask about its status.
    - Note that if the `main` [CI](#continuous-integration) jobs are broken, we
      will only merge PRs that fix the broken jobs until all critical jobs are
      fixed.