-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add class for documented Nvtx Ranges #12035
base: branch-25.04
Are you sure you want to change the base?
Changes from all commits
5fbcb3b
74d1044
6936c9b
72b3f01
d34458a
66a62b0
201a832
36d84de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
--- | ||
layout: page | ||
title: Compute Sanitizer | ||
nav_order: 7 | ||
nav_order: 9 | ||
parent: Developer Overview | ||
--- | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
--- | ||
layout: page | ||
title: The Local Replay Framework | ||
nav_order: 13 | ||
nav_order: 15 | ||
parent: Developer Overview | ||
--- | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
--- | ||
layout: page | ||
title: Memory Debugging | ||
nav_order: 10 | ||
nav_order: 12 | ||
parent: Developer Overview | ||
--- | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
--- | ||
layout: page | ||
title: NVTX Ranges | ||
nav_order: 5 | ||
parent: Developer Overview | ||
--- | ||
<!-- Generated by NvtxRangeDocs.help. DO NOT EDIT! --> | ||
# RAPIDS Accelerator for Apache Spark Nvtx Range Glossary | ||
The following is the list of Nvtx ranges that are used throughout | ||
the plugin. To add your own Nvtx range to the code, create an NvtxId | ||
entry in NvtxRangeWithDoc.scala and create an `NvtxRangeWithDoc` in the | ||
code location that you want to cover, passing in the newly created NvtxId. | ||
|
||
See [nvtx_profiling.md](https://nvidia.github.io/spark-rapids/docs/dev/nvtx_profiling.html) for more info. | ||
|
||
|
||
|
||
## Nvtx Ranges | ||
|
||
Name | Description | ||
-----|------------- | ||
Acquire GPU|Time waiting for GPU semaphore to be acquired | ||
Release GPU|Releasing the GPU semaphore |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
--- | ||
layout: page | ||
title: Shim Development | ||
nav_order: 4 | ||
nav_order: 6 | ||
parent: Developer Overview | ||
--- | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,84 @@ | ||||||
/* | ||||||
* Copyright (c) 2025, NVIDIA CORPORATION. | ||||||
* | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
* you may not use this file except in compliance with the License. | ||||||
* You may obtain a copy of the License at | ||||||
* | ||||||
* http://www.apache.org/licenses/LICENSE-2.0 | ||||||
* | ||||||
* Unless required by applicable law or agreed to in writing, software | ||||||
* distributed under the License is distributed on an "AS IS" BASIS, | ||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
* See the License for the specific language governing permissions and | ||||||
* limitations under the License. | ||||||
*/ | ||||||
|
||||||
package com.nvidia.spark.rapids | ||||||
|
||||||
import ai.rapids.cudf.{NvtxColor, NvtxRange} | ||||||
import java.io.{File, FileOutputStream} | ||||||
import scala.collection.mutable.ListBuffer | ||||||
|
||||||
sealed class NvtxId private(val name: String, val doc: String) { | ||||||
def help(): Unit = println(s"$name|$doc") | ||||||
} | ||||||
|
||||||
object NvtxId { | ||||||
val registeredRanges = new ListBuffer[NvtxId]() | ||||||
|
||||||
private def register(nvtxId: NvtxId): Unit = registeredRanges += nvtxId | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would make it a Map and detect collisions |
||||||
|
||||||
private def apply(name: String, doc: String): NvtxId = { | ||||||
val ret = new NvtxId(name, doc) | ||||||
register(ret) | ||||||
ret | ||||||
} | ||||||
|
||||||
val ACQUIRE_GPU: NvtxId = NvtxId(name = "Acquire GPU", doc = "Time waiting for GPU semaphore " + | ||||||
"to be acquired") | ||||||
|
||||||
val RELEASE_GPU: NvtxId = NvtxId(name = "Release GPU", doc = "Releasing the GPU semaphore") | ||||||
} | ||||||
|
||||||
object NvtxRangeDocs { | ||||||
def helpCommon(): Unit = { | ||||||
println("---") | ||||||
println("layout: page") | ||||||
println("title: NVTX Ranges") | ||||||
println("nav_order: 5") | ||||||
println("parent: Developer Overview") | ||||||
println("---") | ||||||
println(s"<!-- Generated by NvtxRangeDocs.help. DO NOT EDIT! -->") | ||||||
// scalastyle:off line.size.limit | ||||||
println("""# RAPIDS Accelerator for Apache Spark Nvtx Range Glossary | ||||||
|The following is the list of Nvtx ranges that are used throughout | ||||||
|the plugin. To add your own Nvtx range to the code, create an NvtxId | ||||||
|entry in NvtxRangeWithDoc.scala and create an `NvtxRangeWithDoc` in the | ||||||
|code location that you want to cover, passing in the newly created NvtxId. | ||||||
| | ||||||
|See [nvtx_profiling.md](https://nvidia.github.io/spark-rapids/docs/dev/nvtx_profiling.html) for more info. | ||||||
| | ||||||
|""".stripMargin) | ||||||
// scalastyle:on line.size.limit | ||||||
println("\n## Nvtx Ranges\n") | ||||||
println("Name | Description") | ||||||
println("-----|-------------") | ||||||
} | ||||||
|
||||||
def main(args: Array[String]): Unit = { | ||||||
val configs = new FileOutputStream(new File(args(0))) | ||||||
Console.withOut(configs) { | ||||||
Console.withErr(configs) { | ||||||
helpCommon() | ||||||
NvtxId.registeredRanges.foreach(_.help()) | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
class NvtxRangeWithDoc(val id: NvtxId, color: NvtxColor) extends AutoCloseable { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we please change the API? The Then we can have each of the
You might have guessed from this that I would like the color to be a part of the |
||||||
private val nvtxRange: NvtxRange = new NvtxRange(id.name, color) | ||||||
|
||||||
override def close(): Unit = nvtxRange.close() | ||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,5 +1,5 @@ | ||||||
/* | ||||||
* Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||||||
* Copyright (c) 2023-2025, NVIDIA CORPORATION. | ||||||
* | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
* you may not use this file except in compliance with the License. | ||||||
|
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeUnit | |||||
import scala.collection.mutable | ||||||
|
||||||
import ai.rapids.cudf.{NvtxColor, NvtxRange} | ||||||
import com.nvidia.spark.rapids.{NvtxId, NvtxRangeWithDoc} | ||||||
import com.nvidia.spark.rapids.Arm.withResource | ||||||
import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion | ||||||
import com.nvidia.spark.rapids.jni.RmmSpark | ||||||
|
@@ -289,11 +290,26 @@ class GpuTaskMetrics extends Serializable { | |||||
} | ||||||
} | ||||||
|
||||||
private def timeIt[A](timer: NanoSecondAccumulator, | ||||||
range: NvtxId, | ||||||
color: NvtxColor, | ||||||
f: => A): A = { | ||||||
val start = System.nanoTime() | ||||||
withResource(new NvtxRangeWithDoc(range, color)) { _ => | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: if you make it a case class or add a companion with factory apply manually
Suggested change
|
||||||
try { | ||||||
f | ||||||
} finally { | ||||||
timer.add(System.nanoTime() - start) | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
def addSemaphoreHoldingTime(duration: Long): Unit = semaphoreHoldingTime.add(duration) | ||||||
|
||||||
def getSemWaitTime(): Long = semWaitTimeNs.value.value | ||||||
|
||||||
def semWaitTime[A](f: => A): A = timeIt(semWaitTimeNs, "Acquire GPU", NvtxColor.RED, f) | ||||||
def semWaitTime[A](f: => A): A = timeIt(semWaitTimeNs, NvtxId.ACQUIRE_GPU, | ||||||
NvtxColor.RED, f) | ||||||
|
||||||
def spillToHostTime[A](f: => A): A = { | ||||||
timeIt(spillToHostTimeNs, "spillToHostTime", NvtxColor.RED, f) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are adding in a new page in the developer docs. Additionally, I noticed that there were two existing pages with the same
nav_order
, which I am also fixing, so the end result is all the back pages get moved back by 2.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a separate concern worth its own PR