Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for reading tzfiles #8

Merged
merged 14 commits into from
Oct 13, 2015
Merged
1 change: 1 addition & 0 deletions src/TimeZones.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ include("timezones/types.jl")
include("timezones/accessors.jl")
include("timezones/arithmetic.jl")
include("timezones/io.jl")
include("timezones/tzfile.jl")
include("timezones/adjusters.jl")
include("timezones/Olson.jl")
include("timezones/conversions.jl")
Expand Down
132 changes: 132 additions & 0 deletions src/timezones/tzfile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Parsing tzfiles references:
# - http://man7.org/linux/man-pages/man5/tzfile.5.html
# - ftp://ftp.iana.org/tz/code/tzfile.5.txt

immutable TransitionTimeInfo
gmtoff::Int32 # tt_gmtoff
isdst::Int8 # tt_isdst
abbrindex::UInt8 # tt_abbrind
end

abbreviation(chars::Array{UInt8}, offset::Integer=1) = ascii(pointer(chars[offset:end]))

function read_tzfile(io::IO, name::AbstractString)
version, tz = read_tzfile_internal(io, name)
if version != '\0'
# Another even better transition table after this first one
version, tz = read_tzfile_internal(io, name, version)
end
return tz
end

function read_tzfile_internal(io::IO, name::AbstractString, force_version::Char='\0')
magic = readbytes(io, 4)
@assert magic == b"TZif" "Magic file identifier \"TZif\" not found."

# A byte indicating the version of the file's format: '\0', '2', '3'
version = Char(read(io, UInt8))
readbytes(io, 15) # Fifteen bytes reserved for future use

tzh_ttisgmtcnt = ntoh(read(io, Int32)) # Number of UTC/local indicators
tzh_ttisstdcnt = ntoh(read(io, Int32)) # Number of standard/wall indicators
tzh_leapcnt = ntoh(read(io, Int32)) # Number of leap seconds
tzh_timecnt = ntoh(read(io, Int32)) # Number of transition dates
tzh_typecnt = ntoh(read(io, Int32)) # Number of TransitionTimeInfos (must be > 0)
tzh_charcnt = ntoh(read(io, Int32)) # Number of timezone abbreviation characters

time_type = force_version == '\0' ? Int32 : Int64

# Transition time that represents negative infinity
initial_epoch = time_type == Int64 ? -Int64(2)^59 : typemin(Int32)

transition_times = Array{time_type}(tzh_timecnt)
for i in eachindex(transition_times)
transition_times[i] = ntoh(read(io, time_type))
end
lindexes = Array{UInt8}(tzh_timecnt)
for i in eachindex(lindexes)
lindexes[i] = ntoh(read(io, UInt8)) + 1 # Julia uses 1 indexing
end
ttinfo = Array{TransitionTimeInfo}(tzh_typecnt)
for i in eachindex(ttinfo)
ttinfo[i] = TransitionTimeInfo(
ntoh(read(io, Int32)),
ntoh(read(io, Int8)),
ntoh(read(io, UInt8)) + 1 # Julia uses 1 indexing
)
end
abbrs = Array{UInt8}(tzh_charcnt)
for i in eachindex(abbrs)
abbrs[i] = ntoh(read(io, UInt8))
end

# leap seconds (unused)
leapseconds_time = Array{time_type}(tzh_leapcnt)
leapseconds_seconds = Array{Int32}(tzh_leapcnt)
for i in eachindex(leapseconds_time)
leapseconds_time[i] = ntoh(read(io, time_type))
leapseconds_seconds[i] = ntoh(read(io, Int32))
end

# standard/wall and UTC/local indicators (unused)
isstd = Array{Int8}(tzh_ttisstdcnt)
for i in eachindex(isstd)
isstd[i] = ntoh(read(io, Int8))
end
isgmt = Array{Int8}(tzh_ttisgmtcnt)
for i in eachindex(isgmt)
isgmt[i] = ntoh(read(io, Int8))
end

# POSIX TZ variable string used for transistions after the last ttinfo (unused)
if force_version != '\0'
readline(io)
posix_tz_str = chomp(readline(io))
end

# Now build the timezone transitions
if tzh_timecnt == 0
abbr = abbreviation(abbrs, ttinfo[1].abbrindex)
timezone = FixedTimeZone(Symbol(abbr), Offset(ttinfo[1].gmtoff))
else
# Calculate transition info
transitions = Transition[]
utc = dst = 0
for i in eachindex(transition_times)
info = ttinfo[lindexes[i]]

# Since the tzfile does not contain the DST offset we need to
# attempt to calculate it.
if info.isdst == 0
utc = info.gmtoff
dst = 0
elseif dst == 0
# isdst == false and the last DST offset was 0:
# assume that only the DST offset has changed
dst = info.gmtoff - utc
else
# isdst == false and the last DST offset was not 0:
# assume that only the GMT offset has changed
utc = info.gmtoff - dst
end

# Sometimes tzfiles save on storage by having multiple names in one for example
# "WSST\0" at index 1 turns into "WSST" where as index 2 results in "SST".
abbr = abbreviation(abbrs, info.abbrindex)
tz = FixedTimeZone(abbr, utc, dst)

if isempty(transitions) || last(transitions).zone != tz
if transition_times[i] == initial_epoch
utc_datetime = typemin(DateTime)
else
utc_datetime = unix2datetime(Int64(transition_times[i]))
end

push!(transitions, Transition(utc_datetime, tz))
end
end
timezone = VariableTimeZone(Symbol(name), transitions)
end

return version, timezone
end
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ using Base.Test
import TimeZones: TZDATA_DIR
import TimeZones.Olson: ZoneDict, RuleDict, tzparse, resolve

const TZFILE_DIR = normpath(joinpath(dirname(@__FILE__), "tzfile"))

# For testing we'll reparse the tzdata every time to instead of using the serialized data.
# This should make the development/testing cycle simplier since you won't be forced to
# re-build the cache every time you make a change.
Expand All @@ -21,6 +23,7 @@ include("timezones/Olson.jl")
include("timezones/accessors.jl")
include("timezones/arithmetic.jl")
include("timezones/io.jl")
include("timezones/tzfile.jl")
include("timezones/adjusters.jl")
include("timezones/conversions.jl")
include("timezone_names.jl")
138 changes: 138 additions & 0 deletions test/timezones/tzfile.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import TimeZones: Transition, Offset

# Extracts Transitions such that the two arrays start and stop at the
# same DateTimes.
function overlap(a::Array{Transition}, b::Array{Transition})
start_dt = max(first(a).utc_datetime, first(b).utc_datetime)
end_dt = min(last(a).utc_datetime, last(b).utc_datetime)

# Start may not be equal as the initial transition date is arbitrary
within = t -> start_dt < t.utc_datetime <= end_dt
return a[find(within, a)], b[find(within, b)]
end

function issimilar(x::Transition, y::Transition)
x == y || x.utc_datetime == y.utc_datetime && x.zone.name == y.zone.name && issimilar(x.zone.offset, y.zone.offset)
end

function issimilar(x::Offset, y::Offset)
x == y || Second(x) == Second(y) && (x.dst == y.dst || x.dst > Second(0) && y.dst > Second(0))
end

@test_throws AssertionError TimeZones.read_tzfile(IOBuffer(), "")

# Compare tzfile transitions with those we resolved directly from the Olson zones/rules

# Ensure that read_tzfile returns a FixedTimeZone with the right data
utc = FixedTimeZone("UTC", 0)
open(joinpath(TZFILE_DIR, "Etc", "UTC")) do f
tz = TimeZones.read_tzfile(f, "UTC")
@test tz == utc
end


warsaw = resolve("Europe/Warsaw", tzdata["europe"]...)
open(joinpath(TZFILE_DIR, "Europe", "Warsaw")) do f
tz = TimeZones.read_tzfile(f, "Europe/Warsaw")
@test string(tz) == "Europe/Warsaw"
@test first(tz.transitions).utc_datetime == DateTime(1915,8,4,22,36)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)
@test ==(overlap(tz.transitions, warsaw.transitions)...)
end

# Read version 1 compatible data
open(joinpath(TZFILE_DIR, "Europe", "Warsaw (Version 2)")) do f
version, tz = TimeZones.read_tzfile_internal(f, "Europe/Warsaw")
@test version == '2'
@test string(tz) == "Europe/Warsaw"
@test first(tz.transitions).utc_datetime == typemin(DateTime)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)
@test ==(overlap(tz.transitions, warsaw.transitions[2:end])...)
end

# Read version 2 data
open(joinpath(TZFILE_DIR, "Europe", "Warsaw (Version 2)")) do f
tz = TimeZones.read_tzfile(f, "Europe/Warsaw")
@test string(tz) == "Europe/Warsaw"
@test first(tz.transitions).utc_datetime == typemin(DateTime)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)
@test ==(overlap(tz.transitions, warsaw.transitions)...)
end


godthab = resolve("America/Godthab", tzdata["europe"]...)
open(joinpath(TZFILE_DIR, "America", "Godthab")) do f
tz = TimeZones.read_tzfile(f, "America/Godthab")
@test string(tz) == "America/Godthab"
@test first(tz.transitions).utc_datetime == DateTime(1916,7,28,3,26,56)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)
@test ==(overlap(tz.transitions, godthab.transitions)...)
end

# Read version 1 compatible data
open(joinpath(TZFILE_DIR, "America", "Godthab (Version 3)")) do f
version, tz = TimeZones.read_tzfile_internal(f, "America/Godthab")
@test version == '3'
@test string(tz) == "America/Godthab"
@test first(tz.transitions).utc_datetime == typemin(DateTime)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)
@test ==(overlap(tz.transitions, godthab.transitions)...)
end

# Read version 3 data
open(joinpath(TZFILE_DIR, "America", "Godthab (Version 3)")) do f
tz = TimeZones.read_tzfile(f, "America/Godthab")
@test string(tz) == "America/Godthab"
@test first(tz.transitions).utc_datetime == typemin(DateTime)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)
@test ==(overlap(tz.transitions, godthab.transitions)...)
end


# "Pacific/Apia" was the timezone I was thinking could be an issue for the
# DST calculation. The entire day of 2011/12/30 was skipped when they changed from a
# -11:00 GMT offset to 13:00 GMT offset
apia = resolve("Pacific/Apia", tzdata["australasia"]...)
open(joinpath(TZFILE_DIR, "Pacific", "Apia")) do f
tz = TimeZones.read_tzfile(f, "Pacific/Apia")
@test string(tz) == "Pacific/Apia"
@test first(tz.transitions).utc_datetime == DateTime(1911,1,1,11,26,56)
@test last(tz.transitions).utc_datetime == DateTime(2037,9,26,14)
@test ==(overlap(tz.transitions, apia.transitions)...)
end

# Because read_tzfile files only store a single offset if both utc and dst change at the same
# time then the resulting utc and dst might not be quite right. Most notably during
# midsomer back in 1940's there were 2 different dst one after another, we get a
# different utc and dst than Olson.
paris = resolve("Europe/Paris", tzdata["europe"]...)
open(joinpath(TZFILE_DIR, "Europe", "Paris")) do f
tz = TimeZones.read_tzfile(f, "Europe/Paris")
@test string(tz) == "Europe/Paris"
@test first(tz.transitions).utc_datetime == DateTime(1911,3,10,23,51,39)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)

tz_transitions, paris_transitions = overlap(tz.transitions, paris.transitions)

# Indices 56:2:58 don't match due to issues with Midsummer time.
mask = tz_transitions .== paris_transitions
@test sum(!mask) == 2
@test tz_transitions[mask] == paris_transitions[mask]
@test all(map(issimilar, tz_transitions[!mask], paris_transitions[!mask]))
end

madrid = resolve("Europe/Madrid", tzdata["europe"]...)
open(joinpath(TZFILE_DIR, "Europe", "Madrid")) do f
tz = TimeZones.read_tzfile(f, "Europe/Madrid")
@test string(tz) == "Europe/Madrid"
@test first(tz.transitions).utc_datetime == DateTime(1917,5,5,23)
@test last(tz.transitions).utc_datetime == DateTime(2037,10,25,1)

tz_transitions, madrid_transitions = overlap(tz.transitions, madrid.transitions)

# Indices 24:2:32 don't match due to issues with Midsummer time.
mask = tz_transitions .== madrid_transitions
@test sum(!mask) == 5
@test tz_transitions[mask] == madrid_transitions[mask]
@test all(map(issimilar, tz_transitions[!mask], madrid_transitions[!mask]))
end
Binary file added test/tzfile/America/Godthab
Binary file not shown.
Binary file added test/tzfile/America/Godthab (Version 3)
Binary file not shown.
Binary file added test/tzfile/Etc/UTC
Binary file not shown.
Binary file added test/tzfile/Europe/Madrid
Binary file not shown.
Binary file added test/tzfile/Europe/Paris
Binary file not shown.
Binary file added test/tzfile/Europe/Warsaw
Binary file not shown.
Binary file added test/tzfile/Europe/Warsaw (Version 2)
Binary file not shown.
Binary file added test/tzfile/Pacific/Apia
Binary file not shown.