forked from openvinotoolkit/openvino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GPU] Micro sdpa (openvinotoolkit#24656)
### Details: - Added SDPA impl based on microkernels using internal onednn API and related infra - Current limitations: - fused transpose shouldn't change order of innermost dim (head size). - is_causal = true is not supported - fp16 only - num heads dimension must be static - no indirect kv support - Initial version of KV Cache + SDPA func test - Enabled Transpose+SDPA fusion for static shape too ### Tickets: - CVS-141761
- Loading branch information
1 parent
a3d2b6a
commit 2918322
Showing
35 changed files
with
2,216 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
...gins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/generic_vector_ops.cl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/******************************************************************************* | ||
* Copyright 2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
#ifndef GPU_INTEL_OCL_GENERIC_VECTOR_OPS_H | ||
#define GPU_INTEL_OCL_GENERIC_VECTOR_OPS_H | ||
|
||
typedef half __attribute__((ext_vector_type(1))) half1; | ||
typedef uint __attribute__((ext_vector_type(1))) uint1; | ||
typedef float __attribute__((ext_vector_type(1))) float1; | ||
|
||
float1 __attribute__((overloadable)) vmad(float1 a, float1 b, float1 c) { | ||
c[0] = mad(a[0], b[0], c[0]); | ||
return c; | ||
} | ||
float2 __attribute__((overloadable)) vmad(float2 a, float2 b, float2 c) { | ||
return mad(a, b, c); | ||
} | ||
float4 __attribute__((overloadable)) vmad(float4 a, float4 b, float4 c) { | ||
return mad(a, b, c); | ||
} | ||
float8 __attribute__((overloadable)) vmad(float8 a, float8 b, float8 c) { | ||
return mad(a, b, c); | ||
} | ||
float16 __attribute__((overloadable)) vmad(float16 a, float16 b, float16 c) { | ||
return mad(a, b, c); | ||
} | ||
|
||
float1 __attribute__((overloadable)) native_vrecip(float1 x) { | ||
x[0] = native_recip(x[0]); | ||
return x; | ||
} | ||
float2 __attribute__((overloadable)) native_vrecip(float2 x) { | ||
return native_recip(x); | ||
} | ||
float4 __attribute__((overloadable)) native_vrecip(float4 x) { | ||
return native_recip(x); | ||
} | ||
float8 __attribute__((overloadable)) native_vrecip(float8 x) { | ||
return native_recip(x); | ||
} | ||
float16 __attribute__((overloadable)) native_vrecip(float16 x) { | ||
return native_recip(x); | ||
} | ||
|
||
float1 __attribute__((overloadable)) native_vexp2(float1 x) { | ||
x[0] = native_exp2(x[0]); | ||
return x; | ||
} | ||
float2 __attribute__((overloadable)) native_vexp2(float2 x) { | ||
return native_exp2(x); | ||
} | ||
float4 __attribute__((overloadable)) native_vexp2(float4 x) { | ||
return native_exp2(x); | ||
} | ||
float8 __attribute__((overloadable)) native_vexp2(float8 x) { | ||
return native_exp2(x); | ||
} | ||
float16 __attribute__((overloadable)) native_vexp2(float16 x) { | ||
return native_exp2(x); | ||
} | ||
|
||
#endif |
35 changes: 35 additions & 0 deletions
35
src/plugins/intel_gpu/src/kernel_selector/cl_kernels/include/batch_headers/sdpa_utils.cl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/******************************************************************************* | ||
* Copyright 2024 Intel Corporation | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*******************************************************************************/ | ||
|
||
#ifndef GPU_OCL_SDPA_UTILS_H | ||
#define GPU_OCL_SDPA_UTILS_H | ||
|
||
#define _4D_OFF(tag, x0, x1, x2, x3) \ | ||
(((x0) % tag##_B0) * tag##_SB0 + ((x0) / tag##_B0) * tag##_S0 \ | ||
+ ((x1) % tag##_B1) * tag##_SB1 + ((x1) / tag##_B1) * tag##_S1 \ | ||
+ ((x2) % tag##_B2) * tag##_SB2 + ((x2) / tag##_B2) * tag##_S2 \ | ||
+ ((x3) % tag##_B3) * tag##_SB3 + ((x3) / tag##_B3) * tag##_S3) | ||
|
||
#define QRY_OFF(x0, x1, x2, x3) _4D_OFF(QRY, x0, x1, x2, x3) | ||
#define KEY_OFF(x0, x1, x2, x3) _4D_OFF(KEY, x0, x1, x2, x3) | ||
#define VAL_OFF(x0, x1, x2, x3) _4D_OFF(VAL, x0, x1, x2, x3) | ||
#define MSK_OFF(x0, x1, x2, x3) _4D_OFF(MSK, x0, x1, x2, x3) | ||
|
||
#define DST_OFF(x0, x1, d, h, w) \ | ||
(((x0) % DST_B0) * DST_SB0 + ((x0) / DST_B0) * DST_S0 \ | ||
+ ((x1) % DST_B1) * DST_SB1 + ((x1) / DST_B1) * DST_S1) | ||
|
||
#endif |
Oops, something went wrong.