From a0c9d05b20a854ea59e7837d4544935ed1863e1d Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Mon, 20 Jun 2022 15:54:13 -0500 Subject: [PATCH] Add support for new serialization format TZJFile (#380) * Sync up with TZFile changes * Write tests * Update TZJFile to use get_designation * Add comment for future version test --- src/TimeZones.jl | 1 + src/tzjfile/TZJFile.jl | 13 ++++ src/tzjfile/read.jl | 82 ++++++++++++++++++++++ src/tzjfile/utils.jl | 9 +++ src/tzjfile/write.jl | 113 +++++++++++++++++++++++++++++++ test/runtests.jl | 2 + test/tzjfile/data/Europe/Moscow | Bin 0 -> 848 bytes test/tzjfile/data/Europe/Warsaw | Bin 0 -> 1588 bytes test/tzjfile/data/Future_Version | 1 + test/tzjfile/data/UTC | Bin 0 -> 37 bytes test/tzjfile/read.jl | 53 +++++++++++++++ test/tzjfile/write.jl | 45 ++++++++++++ 12 files changed, 319 insertions(+) create mode 100644 src/tzjfile/TZJFile.jl create mode 100644 src/tzjfile/read.jl create mode 100644 src/tzjfile/utils.jl create mode 100644 src/tzjfile/write.jl create mode 100644 test/tzjfile/data/Europe/Moscow create mode 100644 test/tzjfile/data/Europe/Warsaw create mode 100644 test/tzjfile/data/Future_Version create mode 100644 test/tzjfile/data/UTC create mode 100644 test/tzjfile/read.jl create mode 100644 test/tzjfile/write.jl diff --git a/src/TimeZones.jl b/src/TimeZones.jl index e0bd92988..7c81e2d96 100644 --- a/src/TimeZones.jl +++ b/src/TimeZones.jl @@ -63,6 +63,7 @@ include(joinpath("types", "fixedtimezone.jl")) include(joinpath("types", "variabletimezone.jl")) include(joinpath("types", "zoneddatetime.jl")) include(joinpath("tzfile", "TZFile.jl")) +include(joinpath("tzjfile", "TZJFile.jl")) include("exceptions.jl") include(joinpath("tzdata", "TZData.jl")) Sys.iswindows() && include(joinpath("winzone", "WindowsTimeZoneIDs.jl")) diff --git a/src/tzjfile/TZJFile.jl b/src/tzjfile/TZJFile.jl new file mode 100644 index 000000000..35fca98da --- /dev/null +++ b/src/tzjfile/TZJFile.jl @@ -0,0 +1,13 @@ +module TZJFile + +using Dates: Dates, DateTime, Second, datetime2unix, unix2datetime +using ...TimeZones: FixedTimeZone, VariableTimeZone, Class, Transition +using ...TimeZones.TZFile: combine_designations, get_designation, timestamp_min + +const DEFAULT_VERSION = 1 + +include("utils.jl") +include("read.jl") +include("write.jl") + +end diff --git a/src/tzjfile/read.jl b/src/tzjfile/read.jl new file mode 100644 index 000000000..8e94b9d95 --- /dev/null +++ b/src/tzjfile/read.jl @@ -0,0 +1,82 @@ +struct TZJTransition + utc_offset::Int32 # Resolution in seconds + dst_offset::Int16 # Resolution in seconds + designation_index::UInt8 +end + +function read(io::IO) + read_signature(io) + version = read_version(io) + return read_content(io, Val(version)) +end + +function read_signature(io::IO) + magic = Base.read(io, 4) # Read the 4 byte magic identifier + magic == b"TZjf" || throw(ArgumentError("Magic file identifier \"TZjf\" not found.")) + return magic +end + +read_version(io::IO) = Int(ntoh(Base.read(io, UInt8))) + +function read_content(io::IO, version::Val{1}) + tzh_timecnt = ntoh(Base.read(io, Int32)) # Number of transition dates + tzh_typecnt = ntoh(Base.read(io, Int32)) # Number of transition types (must be > 0) + tzh_charcnt = ntoh(Base.read(io, Int32)) # Number of time zone designation characters + class = Class(ntoh(Base.read(io, UInt8))) + + transition_times = Vector{Int64}(undef, tzh_timecnt) + for i in eachindex(transition_times) + transition_times[i] = ntoh(Base.read(io, Int64)) + end + cutoff_time = ntoh(Base.read(io, Int64)) + + transition_indices = Vector{UInt8}(undef, tzh_timecnt) + for i in eachindex(transition_indices) + transition_indices[i] = ntoh(Base.read(io, UInt8)) + 1 # Julia uses 1 indexing + end + + transition_types = Vector{TZJTransition}(undef, tzh_typecnt) + for i in eachindex(transition_types) + transition_types[i] = TZJTransition( + ntoh(Base.read(io, Int32)), + ntoh(Base.read(io, Int16)), + ntoh(Base.read(io, UInt8)) + 1 # Julia uses 1 indexing + ) + end + combined_designations = Vector{UInt8}(undef, tzh_charcnt) + for i in eachindex(combined_designations) + combined_designations[i] = ntoh(Base.read(io, UInt8)) + end + + # Now build the time zone transitions + tz_constructor = if tzh_timecnt == 0 || (tzh_timecnt == 1 && transition_types[1] == TIMESTAMP_MIN) + tzj_info = transition_types[1] + name -> (FixedTimeZone(name, tzj_info.utc_offset, tzj_info.dst_offset), class) + else + transitions = Transition[] + cutoff = timestamp2datetime(cutoff_time, nothing) + + prev_zone = nothing + for i in eachindex(transition_times) + timestamp = transition_times[i] + tzj_info = transition_types[transition_indices[i]] + + # Sometimes tzfiles save on storage by having multiple names in one for example: + # "WSST\0" at index 1 turns into "WSST" where as index 2 results in "SST" + # for "Pacific/Apia". + name = get_designation(combined_designations, tzj_info.designation_index) + zone = FixedTimeZone(name, tzj_info.utc_offset, tzj_info.dst_offset) + + if zone != prev_zone + utc_datetime = timestamp2datetime(timestamp, typemin(DateTime)) + push!(transitions, Transition(utc_datetime, zone)) + end + + prev_zone = zone + end + + name -> (VariableTimeZone(name, transitions, cutoff), class) + end + + return tz_constructor +end diff --git a/src/tzjfile/utils.jl b/src/tzjfile/utils.jl new file mode 100644 index 000000000..ed981194b --- /dev/null +++ b/src/tzjfile/utils.jl @@ -0,0 +1,9 @@ +const TIMESTAMP_MIN = timestamp_min(Int64) + +function datetime2timestamp(x, sentinel) + return x != sentinel ? convert(Int64, datetime2unix(x)) : TIMESTAMP_MIN +end + +function timestamp2datetime(x::Int64, sentinel) + return x != TIMESTAMP_MIN ? unix2datetime(x) : sentinel +end diff --git a/src/tzjfile/write.jl b/src/tzjfile/write.jl new file mode 100644 index 000000000..d1560bdfd --- /dev/null +++ b/src/tzjfile/write.jl @@ -0,0 +1,113 @@ +function write(io::IO, tz::VariableTimeZone; class::Class, version::Integer=DEFAULT_VERSION) + combined_designation, designation_indices = combine_designations(t.zone.name for t in tz.transitions) + + # TODO: Sorting provides us a way to avoid checking for the sentinel on each loop + transition_times = map(tz.transitions) do t + datetime2timestamp(t.utc_datetime, typemin(DateTime)) + end + transition_types = map(enumerate(tz.transitions)) do (i, t) + TZJTransition( + Dates.value(Second(t.zone.offset.std)), + Dates.value(Second(t.zone.offset.dst)), + designation_indices[i] + ) + end + + cutoff = datetime2timestamp(tz.cutoff, nothing) + + write_signature(io) + write_version(io; version) + write_content( + io, + version; + class=class.val, + transition_times, + transition_types, + cutoff, + combined_designation, + ) +end + +function write(io::IO, tz::FixedTimeZone; class::Class, version::Integer=DEFAULT_VERSION) + combined_designation, designation_indices = combine_designations([tz.name]) + + transition_times = Vector{Int64}() + + transition_types = [ + TZJTransition( + Dates.value(Second(tz.offset.std)), + Dates.value(Second(tz.offset.dst)), + designation_indices[1], + ) + ] + + cutoff = datetime2timestamp(nothing, nothing) + + write_signature(io) + write_version(io; version) + write_content( + io, + version; + class=class.val, + transition_times, + transition_types, + cutoff, + combined_designation, + ) +end + +write_signature(io::IO) = Base.write(io, b"TZjf") +write_version(io::IO; version::Integer) = Base.write(io, hton(UInt8(version))) + +function write_content(io::IO, version::Integer; kwargs...) + return write_content(io, Val(Int(version)); kwargs...) +end + +function write_content( + io::IO, + version::Val{1}; + class::UInt8, + transition_times::Vector{Int64}, + transition_types::Vector{TZJTransition}, + cutoff::Int64, + combined_designation::AbstractString, +) + if length(transition_times) > 0 + unique_transition_types = unique(transition_types) + transition_indices = indexin(transition_types, unique_transition_types) + transition_types = unique_transition_types + + @assert length(transition_times) == length(transition_indices) + else + transition_indices = Vector{Int}() + transition_types = unique(transition_types) + end + + # Three four-byte integer values + Base.write(io, hton(Int32(length(transition_times)))) # tzh_timecnt + Base.write(io, hton(Int32(length(transition_types)))) # tzh_typecnt + Base.write(io, hton(Int32(length(combined_designation)))) # tzh_charcnt + Base.write(io, hton(class)) + + for timestamp in transition_times + Base.write(io, hton(timestamp)) + end + Base.write(io, hton(cutoff)) + + for index in transition_indices + Base.write(io, hton(UInt8(index - 1))) # Convert 1-indexing to 0-indexing + end + + # tzh_typecnt ttinfo entries + for tzj_info in transition_types + Base.write(io, hton(Int32(tzj_info.utc_offset))) + Base.write(io, hton(Int16(tzj_info.dst_offset))) + Base.write(io, hton(UInt8(tzj_info.designation_index - 1))) + end + + for char in combined_designation + Base.write(io, hton(UInt8(char))) + end + + return nothing +end diff --git a/test/runtests.jl b/test/runtests.jl index d1569a393..65265ddd0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -56,6 +56,8 @@ include("helpers.jl") include("io.jl") include(joinpath("tzfile", "read.jl")) include(joinpath("tzfile", "write.jl")) + include(joinpath("tzjfile", "read.jl")) + include(joinpath("tzjfile", "write.jl")) include("adjusters.jl") include("conversions.jl") include("ranges.jl") diff --git a/test/tzjfile/data/Europe/Moscow b/test/tzjfile/data/Europe/Moscow new file mode 100644 index 0000000000000000000000000000000000000000..1014b09b5cc00a982dfef0d5a536ebee588a0f58 GIT binary patch literal 848 zcmZwFUr1A79LDk2ZD-nq(+Jcp&yLGEXUodgKbM;w+m2%HkmRYriljs;j79}1bTMLh zRe>0EVcDXo7nXIQ-DovLh{&RgE`k{Z(S-#vf~b_T_xBvSvNPWE+4p_V-i`6Cz%)I;0YPtssGkXJ>*~f1%Cw3kav?u(5d&`3)J^mFabj& z3YoT_%fN8pF*#gx+AwU!{HUz9()?uF*+Ji3&|RM zp{qyHVjfnfU&GBepTRB3M{w(<6s+m&hue-%=V-sc zge`>tnc%} zy@L!YUHPyfDdlLted+=_a_SE{>V1V)BOlR?!b5bEX9nH0_!2go(}r)x;Qmj2ux0!@ zjLqGT)Bf6x^JqQh>iBo=Sf0sjvY0J`Rmc~t1y=5=*P8!NuYS;GWy}*~%w`~RE;9y1 z`#LJg&3}i#Mzj|b{3sEU+rkeKUy%vXF7Urr;cHs7O=qggZ!t}08V&wlYm4&H!WVS* E7b+6tk^lez literal 0 HcmV?d00001 diff --git a/test/tzjfile/data/Europe/Warsaw b/test/tzjfile/data/Europe/Warsaw new file mode 100644 index 0000000000000000000000000000000000000000..5b45c79e231aa9e2654711e9a8d1265d105b149d GIT binary patch literal 1588 zcmd7S|4-C)9Ki9n;H5F54xFx*?C4o0Z8|cwN4novzYd)bhNK5vZJM;%dK$m^Uc-Zy zz1Xs^gE_4Q=g?MF$@#9bpA>nn`%xO+II3sAAojeOt#%VB(cs}Tn# zH1hqxnN#@3F)I%4+k=1J&BdV|pW+jH68@E&gToCG_%v}Lj#MSq$lv)pB%HtPE7q(U z`QZ1JFa4@z9<7wdpuc$%2B-|(G?(K@3h z_eGDyW6W<+80!qisaM9~oA&1zXYI#n4G(a-G zXvDeU&6qH|+~a69&JS(S*)pmmeGmljP$}Ev*{#TJ|=f2;#;+ganb%Te7n$q zi!BQjIq#hm9c?s+((gt+qD{t8Iz@MlPBr{Yrw;y(?|EPF=;*@tAAXBzZO1UZ`@AOS zXY8({mmF`UmlhY(%j$O0%UA8f6-5P@Ij;!S^i<5!FTs^@u^ykz#O$CHMb6K@A3*1f z#L&4NPwCZ8A9_uTKjzs7aBcN4<~KNSUA`07S3TC`ybX)Kqd&;&rp=K}bU|_(T{!+U zei*KK?AwbQM@w*1R|RhFRdLIa)mU_OnwlK(64m4-!rMXYLT#?LFPSiQR% zYjSEeF~-YVe#Yv?dLI6lcOe4Xh4As<=l&0*2oWHW4 TZJFile.write_version(io, version=1)) == [ntoh(UInt8(1))] + @test bprint(io -> TZJFile.write_version(io, version=255)) == [ntoh(UInt8(255))] + @test_throws InexactError bprint(io -> TZJFile.write_version(io, version=256)) +end + +@testset "write" begin + # Tests the basic `FixedTimeZone` code path + @testset "UTC" begin + utc, class = FixedTimeZone("UTC", 0), Class(:FIXED) + io = IOBuffer() + TZJFile.write(io, utc; class) + tzj_utc, tzj_class = TZJFile.read(seekstart(io))("UTC") + + @test tzj_utc == utc + @test tzj_class == class + end + + # Tests the basic `VariableTimeZone` code path + @testset "Europe/Warsaw" begin + warsaw, class = compile("Europe/Warsaw", tzdata["europe"]) + io = IOBuffer() + TZJFile.write(io, warsaw; class) + tzj_warsaw, tzj_class = TZJFile.read(seekstart(io))("Europe/Warsaw") + + @test tzj_warsaw == warsaw + @test tzj_class == class + end + + @testset "Europe/Moscow" begin + moscow, class = compile("Europe/Moscow", tzdata["europe"]) + io = IOBuffer() + TZJFile.write(io, moscow; class) + tzj_moscow, tzj_class = TZJFile.read(seekstart(io))("Europe/Moscow") + + @test tzj_moscow == moscow + @test tzj_class == class + end +end