diff --git a/src/lib.rs b/src/lib.rs index ae3f210..1004d3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,32 @@ +//! Rust library for reading the text files comprising the [zoneinfo +//! database][w], which records time zone changes and offsets across the world +//! from multiple sources. +//! +//! The zoneinfo database is distributed in one of two formats: a raw text +//! format with one file per continent, and a compiled binary format with one +//! file per time zone. This crate deals with the former; for the latter, see +//! the [`zoneinfo_compiled` crate][zc] instead. +//! +//! The database itself is maintained by IANA. For more information, see +//! [IANA’s page on the time zone database][iana]. You can also find the text +//! files themselves in [the tz repository][tz]. +//! +//! [iana]: https://www.iana.org/time-zones +//! [tz]: https://github.com/eggert/tz +//! [w]: https://en.wikipedia.org/wiki/Tz_database +//! [zc]: https://github.com/rust-datetime/zoneinfo-compiled +//! +//! ## Outline +//! +//! Reading a zoneinfo text file is split into three stages: +//! +//! - **Parsing** individual lines of text into `Lines` is done by the `line` +//! module; +//! - **Interpreting** these lines into a complete `Table` is done by the +//! `table` module; +//! - **Calculating transitions** from this table is done by the `transitions` +//! module. + extern crate regex; pub mod line; diff --git a/src/line.rs b/src/line.rs index e638857..98aa495 100644 --- a/src/line.rs +++ b/src/line.rs @@ -1,3 +1,73 @@ +//! Parsing zoneinfo data files, line-by-line. +//! +//! This module provides functions that take a line of input from a zoneinfo +//! data file and attempts to parse it, returning the details of the line if +//! it gets parsed successfully. It classifies them as `Rule`, `Link`, +//! `Zone`, or `Continuation` lines. +//! +//! `Line` is the type that parses and holds zoneinfo line data. To try to +//! parse a string, use the `Line::from_str` constructor. (This isn’t the +//! `FromStr` trait, so you can’t use `parse` on a string. Sorry!) +//! +//! ## Examples +//! +//! Parsing a `Rule` line: +//! +//! ``` +//! use parse_zoneinfo::line::*; +//! +//! let parser = LineParser::new(); +//! let line = parser.parse_str("Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S"); +//! +//! assert_eq!(line, Ok(Line::Rule(Rule { +//! name: "EU", +//! from_year: Year::Number(1977), +//! to_year: Some(Year::Number(1980)), +//! month: Month::April, +//! day: DaySpec::FirstOnOrAfter(Weekday::Sunday, 1), +//! time: TimeSpec::HoursMinutes(1, 0).with_type(TimeType::UTC), +//! time_to_add: TimeSpec::HoursMinutes(1, 0), +//! letters: Some("S"), +//! }))); +//! ``` +//! +//! Parsing a `Zone` line: +//! +//! ``` +//! use parse_zoneinfo::line::*; +//! +//! let parser = LineParser::new(); +//! let line = parser.parse_str("Zone Australia/Adelaide 9:30 Aus AC%sT 1971 Oct 31 2:00:00"); +//! +//! assert_eq!(line, Ok(Line::Zone(Zone { +//! name: "Australia/Adelaide", +//! info: ZoneInfo { +//! utc_offset: TimeSpec::HoursMinutes(9, 30), +//! saving: Saving::Multiple("Aus"), +//! format: "AC%sT", +//! time: Some(ChangeTime::UntilTime( +//! Year::Number(1971), +//! Month::October, +//! DaySpec::Ordinal(31), +//! TimeSpec::HoursMinutesSeconds(2, 0, 0).with_type(TimeType::Wall)) +//! ), +//! }, +//! }))); +//! ``` +//! +//! Parsing a `Link` line: +//! +//! ``` +//! use parse_zoneinfo::line::*; +//! +//! let parser = LineParser::new(); +//! let line = parser.parse_str("Link Europe/Istanbul Asia/Istanbul"); +//! assert_eq!(line, Ok(Line::Link(Link { +//! existing: "Europe/Istanbul", +//! new: "Asia/Istanbul", +//! }))); +//! ``` + use std::fmt; use std::str::FromStr; // we still support rust that doesn't have the inherent methods @@ -64,7 +134,12 @@ impl std::error::Error for Error {} impl Default for LineParser { fn default() -> Self { + // A set of regexes to test against. + // + // All of these regexes use the `(?x)` flag, which means they support + // comments and whitespace directly in the regex string! LineParser { + // Format of a Rule line: one capturing group per field. rule_line: Regex::new( r##"(?x) ^ Rule \s+ @@ -82,6 +157,7 @@ impl Default for LineParser { ) .unwrap(), + // Format of a day specification. day_field: Regex::new( r##"(?x) ^ ( ?P \w+ ) @@ -91,6 +167,7 @@ impl Default for LineParser { ) .unwrap(), + // Format of an hour and a minute specification. hm_field: Regex::new( r##"(?x) ^ ( ?P -? ) @@ -100,6 +177,7 @@ impl Default for LineParser { ) .unwrap(), + // Format of an hour, a minute, and a second specification. hms_field: Regex::new( r##"(?x) ^ ( ?P -? ) @@ -109,6 +187,9 @@ impl Default for LineParser { ) .unwrap(), + // ^ those two could be done with the same regex, but... they aren‘t. + + // Format of a Zone line, with one capturing group per field. zone_line: Regex::new( r##"(?x) ^ Zone \s+ @@ -125,6 +206,8 @@ impl Default for LineParser { ) .unwrap(), + // Format of a Continuation Zone line, which is the same as the opening + // Zone line except the first two fields are replaced by whitespace. continuation_line: Regex::new( r##"(?x) ^ \s+ @@ -140,6 +223,7 @@ impl Default for LineParser { ) .unwrap(), + // Format of a Link line, with one capturing group per field. link_line: Regex::new( r##"(?x) ^ Link \s+ @@ -150,6 +234,7 @@ impl Default for LineParser { ) .unwrap(), + // Format of an empty line, which contains only comments. empty_line: Regex::new( r##"(?x) ^ \s* @@ -161,10 +246,22 @@ impl Default for LineParser { } } +/// A **year** definition field. +/// +/// A year has one of the following representations in a file: +/// +/// - `min` or `minimum`, the minimum year possible, for when a rule needs to +/// apply up until the first rule with a specific year; +/// - `max` or `maximum`, the maximum year possible, for when a rule needs to +/// apply after the last rule with a specific year; +/// - a year number, referring to a specific year. #[derive(PartialEq, Debug, Copy, Clone)] pub enum Year { + /// The minimum year possible: `min` or `minimum`. Minimum, + /// The maximum year possible: `max` or `maximum`. Maximum, + /// A specific year number. Number(i64), } @@ -610,15 +707,27 @@ impl TimeSpec { } } +/// The time at which the rules change for a location. +/// +/// This is described with as few units as possible: a change that occurs at +/// the beginning of the year lists only the year, a change that occurs on a +/// particular day has to list the year, month, and day, and one that occurs +/// at a particular second has to list everything. #[derive(PartialEq, Debug, Copy, Clone)] pub enum ChangeTime { + /// The earliest point in a particular **year**. UntilYear(Year), + /// The earliest point in a particular **month**. UntilMonth(Year, Month), + /// The earliest point in a particular **day**. UntilDay(Year, Month, DaySpec), + /// The earliest point in a particular **hour, minute, or second**. UntilTime(Year, Month, DaySpec, TimeSpecAndType), } impl ChangeTime { + /// Convert this change time to an absolute timestamp, as the number of + /// seconds since the Unix epoch that the change occurs at. pub fn to_timestamp(&self) -> i64 { fn seconds_in_year(year: i64) -> i64 { if is_leap(year) { @@ -744,21 +853,50 @@ fn to_timestamp() { assert_eq!(time.to_timestamp(), 951642000); } +/// The information contained in both zone lines *and* zone continuation lines. #[derive(PartialEq, Debug, Copy, Clone)] pub struct ZoneInfo<'a> { + /// The amount of time that needs to be added to UTC to get the standard + /// time in this zone. pub utc_offset: TimeSpec, + /// The name of all the rules that should apply in the time zone, or the + /// amount of time to add. pub saving: Saving<'a>, + /// The format for time zone abbreviations, with `%s` as the string marker. pub format: &'a str, + /// The time at which the rules change for this location, or `None` if + /// these rules are in effect until the end of time (!). pub time: Option, } +/// The amount of daylight saving time (DST) to apply to this timespan. This +/// is a special type for a certain field in a zone line, which can hold +/// different types of value. #[derive(PartialEq, Debug, Copy, Clone)] pub enum Saving<'a> { + /// Just stick to the base offset. NoSaving, + /// This amount of time should be saved while this timespan is in effect. + /// (This is the equivalent to there being a single one-off rule with the + /// given amount of time to save). OneOff(TimeSpec), + /// All rules with the given name should apply while this timespan is in + /// effect. Multiple(&'a str), } +/// A **rule** definition line. +/// +/// According to the `zic(8)` man page, a rule line has this form, along with +/// an example: +/// +/// ```text +/// Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +/// Rule US 1967 1973 ‐ Apr lastSun 2:00 1:00 D +/// ``` +/// +/// Apart from the opening `Rule` to specify which kind of line this is, and +/// the `type` column, every column in the line has a field in this struct. #[derive(PartialEq, Debug, Copy, Clone)] pub struct Rule<'a> { pub name: &'a str, @@ -771,6 +909,25 @@ pub struct Rule<'a> { pub letters: Option<&'a str>, } +/// A **zone** definition line. +/// +/// According to the `zic(8)` man page, a zone line has this form, along with +/// an example: +/// +/// ```text +/// Zone NAME GMTOFF RULES/SAVE FORMAT [UNTILYEAR [MONTH [DAY [TIME]]]] +/// Zone Australia/Adelaide 9:30 Aus AC%sT 1971 Oct 31 2:00 +/// ``` +/// +/// The opening `Zone` identifier is ignored, and the last four columns are +/// all optional, with their variants consolidated into a `ChangeTime`. +/// +/// The `Rules/Save` column, if it contains a value, *either* contains the +/// name of the rules to use for this zone, *or* contains a one-off period of +/// time to save. +/// +/// A continuation rule line contains all the same fields apart from the +/// `Name` column and the opening `Zone` identifier. #[derive(PartialEq, Debug, Copy, Clone)] pub struct Zone<'a> { pub name: &'a str, @@ -948,6 +1105,9 @@ impl LineParser { let saving = self.saving_from_str(caps.name("rulessave").unwrap().as_str())?; let format = caps.name("format").unwrap().as_str(); + // The year, month, day, and time fields are all optional, meaning + // that it should be impossible to, say, have a defined month but not + // a defined year. let time = match ( caps.name("year"), caps.name("month"), diff --git a/src/table.rs b/src/table.rs index a74f9b4..fbb42ac 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1,3 +1,49 @@ +//! Collecting parsed zoneinfo data lines into a set of time zone data. +//! +//! This module provides the `Table` struct, which is able to take parsed +//! lines of input from the `line` module and coalesce them into a single +//! set of data. +//! +//! It’s not as simple as it seems, because the zoneinfo data lines refer to +//! each other through strings: lines of the form “link zone A to B” could be +//! *parsed* successfully but still fail to be *interpreted* successfully if +//! “B” doesn’t exist. So it has to check every step of the way—nothing wrong +//! with this, it’s just a consequence of reading data from a text file. +//! +//! This module only deals with constructing a table from data: any analysis +//! of the data is done elsewhere. +//! +//! +//! ## Example +//! +//! ``` +//! use parse_zoneinfo::line::{Zone, Line, LineParser, Link}; +//! use parse_zoneinfo::table::{TableBuilder}; +//! +//! let parser = LineParser::new(); +//! let mut builder = TableBuilder::new(); +//! +//! let zone = "Zone Pacific/Auckland 11:39:04 - LMT 1868 Nov 2"; +//! let link = "Link Pacific/Auckland Antarctica/McMurdo"; +//! +//! for line in [zone, link] { +//! match parser.parse_str(&line)? { +//! Line::Zone(zone) => builder.add_zone_line(zone).unwrap(), +//! Line::Continuation(cont) => builder.add_continuation_line(cont).unwrap(), +//! Line::Rule(rule) => builder.add_rule_line(rule).unwrap(), +//! Line::Link(link) => builder.add_link_line(link).unwrap(), +//! Line::Space => {} +//! } +//! } +//! +//! let table = builder.build(); +//! +//! assert!(table.get_zoneset("Pacific/Auckland").is_some()); +//! assert!(table.get_zoneset("Antarctica/McMurdo").is_some()); +//! assert!(table.get_zoneset("UTC").is_none()); +//! # Ok::<(), parse_zoneinfo::line::Error>(()) +//! ``` + use std::collections::hash_map::{Entry, HashMap}; use std::fmt;