Skip to content

Commit

Permalink
[SPARK-49491][SQL] Replace AnyRefMap with HashMap
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Replace AnyRefMap with HashMap.

### Why are the changes needed?

HashMap has better performance in Scala 2.13: https://issues.apache.org/jira/browse/SPARK-49491.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing Unit Tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#48128 from George314159/SPARK-49491.

Authored-by: George314159 <hua16732@gmail.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
  • Loading branch information
George314159 authored and wangyum committed Jan 1, 2025
1 parent 5334494 commit 1c79b54
Show file tree
Hide file tree
Showing 3 changed files with 216 additions and 216 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import java.time.temporal.ChronoField
import java.util.{Calendar, TimeZone}
import java.util.Calendar.{DAY_OF_MONTH, DST_OFFSET, ERA, HOUR_OF_DAY, MINUTE, MONTH, SECOND, YEAR, ZONE_OFFSET}

import scala.collection.mutable.AnyRefMap
import scala.collection.mutable.HashMap

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
Expand Down Expand Up @@ -285,12 +285,12 @@ object RebaseDateTime {
}

// Loads rebasing info from an JSON file. JSON records in the files should conform to
// `JsonRebaseRecord`. AnyRefMap is used here instead of Scala's immutable map because
// it is 2 times faster in DateTimeRebaseBenchmark.
private[sql] def loadRebaseRecords(fileName: String): AnyRefMap[String, RebaseInfo] = {
// `JsonRebaseRecord`. Mutable HashMap is used here instead of AnyRefMap due to SPARK-49491.
private[sql] def loadRebaseRecords(fileName: String): HashMap[String, RebaseInfo] = {
val file = SparkClassUtils.getSparkClassLoader.getResource(fileName)
val jsonRebaseRecords = mapper.readValue[Seq[JsonRebaseRecord]](file)
val anyRefMap = new AnyRefMap[String, RebaseInfo]((3 * jsonRebaseRecords.size) / 2)
val hashMap = new HashMap[String, RebaseInfo]
hashMap.sizeHint(jsonRebaseRecords.size)
jsonRebaseRecords.foreach { jsonRecord =>
val rebaseInfo = RebaseInfo(jsonRecord.switches, jsonRecord.diffs)
var i = 0
Expand All @@ -299,9 +299,9 @@ object RebaseDateTime {
rebaseInfo.diffs(i) = rebaseInfo.diffs(i) * MICROS_PER_SECOND
i += 1
}
anyRefMap.update(jsonRecord.tz, rebaseInfo)
hashMap.update(jsonRecord.tz, rebaseInfo)
}
anyRefMap
hashMap
}

/**
Expand All @@ -313,7 +313,7 @@ object RebaseDateTime {
*/
private val gregJulianRebaseMap = loadRebaseRecords("gregorian-julian-rebase-micros.json")

private def getLastSwitchTs(rebaseMap: AnyRefMap[String, RebaseInfo]): Long = {
private def getLastSwitchTs(rebaseMap: HashMap[String, RebaseInfo]): Long = {
val latestTs = rebaseMap.values.map(_.switches.last).max
require(
rebaseMap.values.forall(_.diffs.last == 0),
Expand Down Expand Up @@ -404,7 +404,7 @@ object RebaseDateTime {
if (micros >= lastSwitchGregorianTs) {
micros
} else {
val rebaseRecord = gregJulianRebaseMap.getOrNull(timeZoneId)
val rebaseRecord = gregJulianRebaseMap.get(timeZoneId).orNull
if (rebaseRecord == null || micros < rebaseRecord.switches(0)) {
rebaseGregorianToJulianMicros(TimeZone.getTimeZone(timeZoneId), micros)
} else {
Expand Down Expand Up @@ -526,7 +526,7 @@ object RebaseDateTime {
if (micros >= lastSwitchJulianTs) {
micros
} else {
val rebaseRecord = julianGregRebaseMap.getOrNull(timeZoneId)
val rebaseRecord = julianGregRebaseMap.get(timeZoneId).orNull
if (rebaseRecord == null || micros < rebaseRecord.switches(0)) {
rebaseJulianToGregorianMicros(TimeZone.getTimeZone(timeZoneId), micros)
} else {
Expand Down
Loading

0 comments on commit 1c79b54

Please sign in to comment.