From 98ebe687ce608c985a5bce2d3e9410fa234a931a Mon Sep 17 00:00:00 2001 From: jtmoon79 <815261+jtmoon79@users.noreply.github.com> Date: Mon, 1 May 2023 23:15:33 -0700 Subject: [PATCH] datetime.rs use compile-time map timezone names to values Use PfhMap compile-time map from timezone names, e.g. "PST", to timezone values, e.g. "-07:00" Issue #84 --- Cargo.lock | 49 ++ Cargo.toml | 1 + src/data/datetime.rs | 994 +++++++++++++++++------------------- src/tests/datetime_tests.rs | 87 ++-- 4 files changed, 565 insertions(+), 566 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bff39edc..36a00157 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,6 +1366,48 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6448add382c60bbbc64f9dab41309a12ec530c05191601042f911356ac09758c" +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" +dependencies = [ + "phf_shared", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92aacdc5f16768709a569e913f7451034034178b05bdc8acda226659a3dccc66" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "phf_shared" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-utils" version = "0.1.0" @@ -1769,6 +1811,12 @@ dependencies = [ "time 0.3.20", ] +[[package]] +name = "siphasher" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" + [[package]] name = "skeptic" version = "0.13.7" @@ -1840,6 +1888,7 @@ dependencies = [ "more-asserts", "mut_static", "nix", + "phf", "rand 0.8.5", "rangemap", "regex", diff --git a/Cargo.toml b/Cargo.toml index c6b07363..4c77c828 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,6 +51,7 @@ min-max = "0.1.8" more-asserts = "0.3.1" mut_static = "5.0.0" nix = "0.26.2" +phf = { version = "0.11", features = ["macros"] } rand = "0.8.5" rangemap = "1.3.0" regex = "1.7.1" diff --git a/src/data/datetime.rs b/src/data/datetime.rs index 12200215..7a7a3869 100644 --- a/src/data/datetime.rs +++ b/src/data/datetime.rs @@ -38,14 +38,12 @@ #![allow(non_camel_case_types)] #![allow(non_upper_case_globals)] +use crate::debug_panic; #[cfg(any(debug_assertions, test))] use crate::debug::printers::{buffer_to_String_noraw, str_to_String_noraw}; #[doc(hidden)] pub use crate::data::line::{LineIndex, RangeLineIndex}; -use std::collections::BTreeMap; -#[cfg(any(debug_assertions, test))] -use std::collections::HashMap; use std::convert::TryFrom; // for passing array slices as references use std::fmt; #[doc(hidden)] @@ -69,6 +67,8 @@ use ::const_format::concatcp; use ::const_str::to_byte_array; use ::lazy_static::lazy_static; use ::more_asserts::{debug_assert_ge, debug_assert_le, debug_assert_lt}; +use ::phf::phf_map; +use ::phf::Map as PhfMap; use ::regex::bytes::Regex; #[allow(unused_imports)] use ::si_trace_print::{defn, defo, defx, defñ, den, deo, dex, deñ}; @@ -1685,70 +1685,14 @@ const CGP_TZz: &CaptureGroupPattern = r"(?P[\+\-−][012][[:digit:]]{3})"; const CGP_TZzc: &CaptureGroupPattern = r"(?P[\+\-−][012][[:digit:]]:[[:digit:]]{2})"; /// `strftime` specifier `%#z` e.g. `"+09"` const CGP_TZzp: &CaptureGroupPattern = r"(?P[\+\-−][012][[:digit:]])"; -/// `strftime` specifier `%Z` e.g. `"ACST"`, all lowercase also allowed +/// `strftime` specifier `%Z` e.g. `"ACST"`, lowercase also allowed +/// ordering is important for more complete matches, +/// e.g. `"PETT"` should occur before `"PET"` pub(crate) const CGP_TZZ: &CaptureGroupPattern = "(?P\ ACDT|ACST|ACT|ACWST|ADT|AEDT|AEST|AET|AFT|AKDT|AKST|ALMT|AMST|AMT|ANAT|AQTT|ART|AST|AWST|AZOST|AZOT|AZT|BIOT|BIT|BNT|BOT|BRST|BRT|BST|BTT|CAT|CCT|CDT|CEST|CET|CHADT|CHAST|CHOST|CHOT|CHST|CHUT|CIST|CKT|CLST|CLT|COST|COT|CST|CT|CVT|CWST|CXT|DAVT|DDUT|DFT|EASST|EAST|EAT|ECT|EDT|EEST|EET|EGST|EGT|EST|ET|FET|FJT|FKST|FKT|FNT|GALT|GAMT|GET|GFT|GILT|GIT|GMT|GST|GYT|HAEC|HDT|HKT|HMT|HOVST|HOVT|HST|ICT|IDLW|IDT|IOT|IRDT|IRKT|IRST|IST|JST|KALT|KGT|KOST|KRAT|KST|LHST|LINT|MAGT|MART|MAWT|MDT|MEST|MET|MHT|MIST|MIT|MMT|MSK|MST|MUT|MVT|MYT|NCT|NDT|NFT|NOVT|NPT|NST|NT|NUT|NZDT|NZST|OMST|ORAT|PDT|PETT|PET|PGT|PHOT|PHST|PHT|PKT|PMDT|PMST|PONT|PST|PWT|PYST|PYT|RET|ROTT|SAKT|SAMT|SAST|SBT|SCT|SDT|SGT|SLST|SRET|SRT|SST|SYOT|TAHT|TFT|THA|TJT|TKT|TLT|TMT|TOT|TRT|TVT|ULAST|ULAT|UTC|UT|UYST|UYT|UZT|VET|VLAT|VOLT|VOST|VUT|WAKT|WAST|WAT|WEST|WET|WGST|WGT|WIB|WITA|WIT|WST|YAKT|YEKT|ZULU|Z|\ acdt|acst|act|acwst|adt|aedt|aest|aet|aft|akdt|akst|almt|amst|amt|anat|aqtt|art|ast|awst|azost|azot|azt|biot|bit|bnt|bot|brst|brt|bst|btt|cat|cct|cdt|cest|cet|chadt|chast|chost|chot|chst|chut|cist|ckt|clst|clt|cost|cot|cst|ct|cvt|cwst|cxt|davt|ddut|dft|easst|east|eat|ect|edt|eest|eet|egst|egt|est|et|fet|fjt|fkst|fkt|fnt|galt|gamt|get|gft|gilt|git|gmt|gst|gyt|haec|hdt|hkt|hmt|hovst|hovt|hst|ict|idlw|idt|iot|irdt|irkt|irst|ist|jst|kalt|kgt|kost|krat|kst|lhst|lint|magt|mart|mawt|mdt|mest|met|mht|mist|mit|mmt|msk|mst|mut|mvt|myt|nct|ndt|nft|novt|npt|nst|nt|nut|nzdt|nzst|omst|orat|pdt|pett|pet|pgt|phot|phst|pht|pkt|pmdt|pmst|pont|pst|pwt|pyst|pyt|ret|rott|sakt|samt|sast|sbt|sct|sdt|sgt|slst|sret|srt|sst|syot|taht|tft|tha|tjt|tkt|tlt|tmt|tot|trt|tvt|ulast|ulat|utc|ut|uyst|uyt|uzt|vet|vlat|volt|vost|vut|wakt|wast|wat|west|wet|wgst|wgt|wib|wita|wit|wst|yakt|yekt|zulu|z\ )"; -/// hardcoded listing of named timezone abbreviations -#[cfg(any(debug_assertions, test))] -pub(crate) const TZZ_LIST_UPPER: &[&str] = &[ - "ACDT", "ACST", "ACT", "ACWST", "ADT", "AEDT", "AEST", "AET", "AFT", "AKDT", "AKST", "ALMT", "AMST", - "AMT", "ANAT", "AQTT", "ART", "AST", "AWST", "AZOST", "AZOT", "AZT", "BIOT", "BIT", "BNT", "BOT", "BRST", - "BRT", "BST", "BTT", "CAT", "CCT", "CDT", "CEST", "CET", "CHADT", "CHAST", "CHOST", "CHOT", "CHST", - "CHUT", "CIST", "CKT", "CLST", "CLT", "COST", "COT", "CST", "CT", "CVT", "CWST", "CXT", "DAVT", "DDUT", - "DFT", "EASST", "EAST", "EAT", "ECT", "EDT", "EEST", "EET", "EGST", "EGT", "EST", "ET", "FET", "FJT", - "FKST", "FKT", "FNT", "GALT", "GAMT", "GET", "GFT", "GILT", "GIT", "GMT", "GST", "GYT", "HAEC", "HDT", - "HKT", "HMT", "HOVST", "HOVT", "HST", "ICT", "IDLW", "IDT", "IOT", "IRDT", "IRKT", "IRST", "IST", "JST", - "KALT", "KGT", "KOST", "KRAT", "KST", "LHST", "LINT", "MAGT", "MART", "MAWT", "MDT", "MEST", "MET", - "MHT", "MIST", "MIT", "MMT", "MSK", "MST", "MUT", "MVT", "MYT", "NCT", "NDT", "NFT", "NOVT", "NPT", - "NST", "NT", "NUT", "NZDT", "NZST", "OMST", "ORAT", "PDT", "PETT", "PET", "PGT", "PHOT", "PHST", "PHT", - "PKT", "PMDT", "PMST", "PONT", "PST", "PWT", "PYST", "PYT", "RET", "ROTT", "SAKT", "SAMT", "SAST", "SBT", - "SCT", "SDT", "SGT", "SLST", "SRET", "SRT", "SST", "SYOT", "TAHT", "TFT", "THA", "TJT", "TKT", "TLT", - "TMT", "TOT", "TRT", "TVT", "ULAST", "ULAT", "UT", "UTC", "UYST", "UYT", "UZT", "VET", "VLAT", "VOLT", "VOST", - "VUT", "WAKT", "WAST", "WAT", "WEST", "WET", "WGST", "WGT", "WIB", "WITA", "WIT", "WST", "YAKT", "YEKT", - "ZULU", "Z", -]; - -/// lowercase version of [`TZZ_LIST_UPPER`] -#[cfg(any(debug_assertions, test))] -pub(crate) const TZZ_LIST_LOWER: &[&str] = &[ - "acdt", "acst", "act", "acwst", "adt", "aedt", "aest", "aet", "aft", "akdt", "akst", "almt", "amst", - "amt", "anat", "aqtt", "art", "ast", "awst", "azost", "azot", "azt", "biot", "bit", "bnt", "bot", "brst", - "brt", "bst", "btt", "cat", "cct", "cdt", "cest", "cet", "chadt", "chast", "chost", "chot", "chst", - "chut", "cist", "ckt", "clst", "clt", "cost", "cot", "cst", "ct", "cvt", "cwst", "cxt", "davt", "ddut", - "dft", "easst", "east", "eat", "ect", "edt", "eest", "eet", "egst", "egt", "est", "et", "fet", "fjt", - "fkst", "fkt", "fnt", "galt", "gamt", "get", "gft", "gilt", "git", "gmt", "gst", "gyt", "haec", "hdt", - "hkt", "hmt", "hovst", "hovt", "hst", "ict", "idlw", "idt", "iot", "irdt", "irkt", "irst", "ist", "jst", - "kalt", "kgt", "kost", "krat", "kst", "lhst", "lint", "magt", "mart", "mawt", "mdt", "mest", "met", - "mht", "mist", "mit", "mmt", "msk", "mst", "mut", "mvt", "myt", "nct", "ndt", "nft", "novt", "npt", - "nst", "nt", "nut", "nzdt", "nzst", "omst", "orat", "pdt", "pett", "pet", "pgt", "phot", "phst", "pht", - "pkt", "pmdt", "pmst", "pont", "pst", "pwt", "pyst", "pyt", "ret", "rott", "sakt", "samt", "sast", "sbt", - "sct", "sdt", "sgt", "slst", "sret", "srt", "sst", "syot", "taht", "tft", "tha", "tjt", "tkt", "tlt", - "tmt", "tot", "trt", "tvt", "ulast", "ulat", "ut", "utc", "uyst", "uyt", "uzt", "vet", "vlat", "volt", "vost", - "vut", "wakt", "wast", "wat", "west", "wet", "wgst", "wgt", "wib", "wita", "wit", "wst", "yakt", "yekt", - "zulu", "z", -]; - -#[cfg(any(debug_assertions, test))] -lazy_static! { - /// map lowercase `%Z` timezones (e.g. "pst") to uppercase (e.g. "PST") - // TODO: [2023/04/29] replace with compile-time `phf::phf_map` - // see https://github.com/rust-phf/rust-phf - pub(crate) static ref TZZ_LOWER_TO_UPPER: HashMap<&'static str, &'static str> = { - assert_eq!(TZZ_LIST_UPPER.len(), TZZ_LIST_LOWER.len()); - let mut map = HashMap::<&'static str, &'static str>::new(); - map.reserve(TZZ_LIST_LOWER.len()); - for (index, tz_lower) in TZZ_LIST_LOWER.iter().enumerate() { - let tz_upper = TZZ_LIST_UPPER[index]; - map.insert(tz_lower, tz_upper); - } - - map - }; -} - /// for testing #[doc(hidden)] #[cfg(test)] @@ -1780,8 +1724,9 @@ pub const RP_DIGITS: &RegexPattern = "[[:digit:]]+"; /// one to three digits pub const RP_DIGITS3: &RegexPattern = r"[[:digit:]]{1,3}"; -/// field name header for date in RFC 2822 line-oriented message -pub const RP_RFC2822_DATE: &RegexPattern = "[Dd][Aa][Tt][Ee]:"; +/// field name header for date in RFC 2822 line-oriented message, +/// not matching wacky-case variants like `dAte:` +pub const RP_RFC2822_DATE: &RegexPattern = "(date|Date|DATE):"; /// All named timezone abbreviations, maps all chrono strftime `%Z` values /// (e.g. `"EDT"`) to equivalent `%:z` value (e.g. `"-04:00"`). @@ -1799,6 +1744,9 @@ pub const RP_RFC2822_DATE: &RegexPattern = "[Dd][Aa][Tt][Ee]:"; /// default timezone offset value, e.g. the value passed to `--tz-offset`. /// See the opening paragraph in [_List of time zone abbreviations_]. /// +/// In this structure, ambiguous timezone names have their values set to empty +/// string, e.g. `"SST"` maps to `""`. See [Issue #59]. +/// /// The listing of timezone abbreviations and values can be scraped from /// Wikipedia with this code snippet: /// @@ -1824,465 +1772,459 @@ pub const RP_RFC2822_DATE: &RegexPattern = "[Dd][Aa][Tt][Ee]:"; /// - Applicable tz offsets /// - Applicable tz abbreviations /// +/// [Issue #59]: https://github.com/jtmoon79/super-speedy-syslog-searcher/issues/59 /// [_List of time zone abbreviations_]: https://en.wikipedia.org/w/index.php?title=List_of_time_zone_abbreviations&oldid=1106679802 /// [`DateTime::parse_from_str`]: https://docs.rs/chrono/0.4.22/chrono/format/strftime/#fn7 -pub const TZZ_ALL: [(&str, &str); 422] = [ - // uppercase - ("ACDT", "+10:30"), - ("ACST", "+09:30"), - ("ACT", "-05:00"), - ("ACT", "+08:00"), - ("ACWST", "+08:45"), - ("ADT", "-03:00"), - ("AEDT", "+11:00"), - ("AEST", "+10:00"), - ("AET", "+11:00"), - ("AFT", "+04:30"), - ("AKDT", "-08:00"), - ("AKST", "-09:00"), - ("ALMT", "+06:00"), - ("AMST", "-03:00"), - ("AMT", "-04:00"), - ("AMT", "+04:00"), - ("ANAT", "+12:00"), - ("AQTT", "+05:00"), - ("ART", "-03:00"), - ("AST", "+03:00"), - ("AST", "-04:00"), - ("AWST", "+08:00"), - ("AZOST", "+00:00"), - ("AZOT", "-01:00"), - ("AZT", "+04:00"), - ("BNT", "+08:00"), - ("BIOT", "+06:00"), - ("BIT", "-12:00"), - ("BOT", "-04:00"), - ("BRST", "-02:00"), - ("BRT", "-03:00"), - ("BST", "+06:00"), - ("BST", "+11:00"), - ("BST", "+01:00"), - ("BTT", "+06:00"), - ("CAT", "+02:00"), - ("CCT", "+06:30"), - ("CDT", "-05:00"), - ("CDT", "-04:00"), - ("CEST", "+02:00"), - ("CET", "+01:00"), - ("CHADT", "+13:45"), - ("CHAST", "+12:45"), - ("CHOT", "+08:00"), - ("CHOST", "+09:00"), - ("CHST", "+10:00"), - ("CHUT", "+10:00"), - ("CIST", "-08:00"), - ("CKT", "-10:00"), - ("CLST", "-03:00"), - ("CLT", "-04:00"), - ("COST", "-04:00"), - ("COT", "-05:00"), - ("CST", "-06:00"), - ("CST", "+08:00"), - ("CST", "-05:00"), - ("CT", "-05:00"), - ("CVT", "-01:00"), - ("CWST", "+08:45"), - ("CXT", "+07:00"), - ("DAVT", "+07:00"), - ("DDUT", "+10:00"), - ("DFT", "+01:00"), - ("EASST", "-05:00"), - ("EAST", "-06:00"), - ("EAT", "+03:00"), - ("ECT", "-04:00"), - ("ECT", "-05:00"), - ("EDT", "-04:00"), - ("EEST", "+03:00"), - ("EET", "+02:00"), - ("EGST", "-00:00"), - ("EGT", "-01:00"), - ("EST", "-05:00"), - ("ET", "-04:00"), - ("FET", "+03:00"), - ("FJT", "+12:00"), - ("FKST", "-03:00"), - ("FKT", "-04:00"), - ("FNT", "-02:00"), - ("GALT", "-06:00"), - ("GAMT", "-09:00"), - ("GET", "+04:00"), - ("GFT", "-03:00"), - ("GILT", "+12:00"), - ("GIT", "-09:00"), - ("GMT", "-00:00"), - ("GST", "-02:00"), - ("GST", "+04:00"), - ("GYT", "-04:00"), - ("HDT", "-09:00"), - ("HAEC", "+02:00"), - ("HST", "-10:00"), - ("HKT", "+08:00"), - ("HMT", "+05:00"), - ("HOVST", "+08:00"), - ("HOVT", "+07:00"), - ("ICT", "+07:00"), - ("IDLW", "-12:00"), - ("IDT", "+03:00"), - ("IOT", "+03:00"), - ("IRDT", "+04:30"), - ("IRKT", "+08:00"), - ("IRST", "+03:30"), - ("IST", "+05:30"), - ("IST", "+01:00"), - ("IST", "+02:00"), - ("JST", "+09:00"), - ("KALT", "+02:00"), - ("KGT", "+06:00"), - ("KOST", "+11:00"), - ("KRAT", "+07:00"), - ("KST", "+09:00"), - ("LHST", "+10:30"), - ("LHST", "+11:00"), - ("LINT", "+14:00"), - ("MAGT", "+12:00"), - ("MART", "-09:30"), - ("MAWT", "+05:00"), - ("MDT", "-06:00"), - ("MET", "+01:00"), - ("MEST", "+02:00"), - ("MHT", "+12:00"), - ("MIST", "+11:00"), - ("MIT", "-09:30"), - ("MMT", "+06:30"), - ("MSK", "+03:00"), - ("MST", "+08:00"), - ("MST", "-07:00"), - ("MUT", "+04:00"), - ("MVT", "+05:00"), - ("MYT", "+08:00"), - ("NCT", "+11:00"), - ("NDT", "-02:30"), - ("NFT", "+11:00"), - ("NOVT", "+07:00"), - ("NPT", "+05:45"), - ("NST", "-03:30"), - ("NT", "-03:30"), - ("NUT", "-11:00"), - ("NZDT", "+13:00"), - ("NZST", "+12:00"), - ("OMST", "+06:00"), - ("ORAT", "+05:00"), - ("PDT", "-07:00"), - ("PET", "-05:00"), - ("PETT", "+12:00"), - ("PGT", "+10:00"), - ("PHOT", "+13:00"), - ("PHT", "+08:00"), - ("PHST", "+08:00"), - ("PKT", "+05:00"), - ("PMDT", "-02:00"), - ("PMST", "-03:00"), - ("PONT", "+11:00"), - ("PST", "-08:00"), - ("PWT", "+09:00"), - ("PYST", "-03:00"), - ("PYT", "-04:00"), - ("RET", "+04:00"), - ("ROTT", "-03:00"), - ("SAKT", "+11:00"), - ("SAMT", "+04:00"), - ("SAST", "+02:00"), - ("SBT", "+11:00"), - ("SCT", "+04:00"), - ("SDT", "-10:00"), - ("SGT", "+08:00"), - ("SLST", "+05:30"), - ("SRET", "+11:00"), - ("SRT", "-03:00"), - ("SST", "-11:00"), - ("SST", "+08:00"), - ("SYOT", "+03:00"), - ("TAHT", "-10:00"), - ("THA", "+07:00"), - ("TFT", "+05:00"), - ("TJT", "+05:00"), - ("TKT", "+13:00"), - ("TLT", "+09:00"), - ("TMT", "+05:00"), - ("TRT", "+03:00"), - ("TOT", "+13:00"), - ("TVT", "+12:00"), - ("ULAST", "+09:00"), - ("ULAT", "+08:00"), - ("UT", "-00:00"), - ("UTC", "-00:00"), - ("UYST", "-02:00"), - ("UYT", "-03:00"), - ("UZT", "+05:00"), - ("VET", "-04:00"), - ("VLAT", "+10:00"), - ("VOLT", "+03:00"), - ("VOST", "+06:00"), - ("VUT", "+11:00"), - ("WAKT", "+12:00"), - ("WAST", "+02:00"), - ("WAT", "+01:00"), - ("WEST", "+01:00"), - ("WET", "-00:00"), - ("WIB", "+07:00"), - ("WIT", "+09:00"), - ("WITA", "+08:00"), - ("WGST", "-02:00"), - ("WGT", "-03:00"), - ("WST", "+08:00"), - ("YAKT", "+09:00"), - ("YEKT", "+05:00"), - ("ZULU", "+00:00"), - ("Z", "+00:00"), - // lowercase - ("acdt", "+10:30"), - ("acst", "+09:30"), - ("act", "-05:00"), - ("act", "+08:00"), - ("acwst", "+08:45"), - ("adt", "-03:00"), - ("aedt", "+11:00"), - ("aest", "+10:00"), - ("aet", "+11:00"), - ("aft", "+04:30"), - ("akdt", "-08:00"), - ("akst", "-09:00"), - ("almt", "+06:00"), - ("amst", "-03:00"), - ("amt", "-04:00"), - ("amt", "+04:00"), - ("anat", "+12:00"), - ("aqtt", "+05:00"), - ("art", "-03:00"), - ("ast", "+03:00"), - ("ast", "-04:00"), - ("awst", "+08:00"), - ("azost", "-00:00"), - ("azot", "-01:00"), - ("azt", "+04:00"), - ("bnt", "+08:00"), - ("biot", "+06:00"), - ("bit", "-12:00"), - ("bot", "-04:00"), - ("brst", "-02:00"), - ("brt", "-03:00"), - ("bst", "+06:00"), - ("bst", "+11:00"), - ("bst", "+01:00"), - ("btt", "+06:00"), - ("cat", "+02:00"), - ("cct", "+06:30"), - ("cdt", "-05:00"), - ("cdt", "-04:00"), - ("cest", "+02:00"), - ("cet", "+01:00"), - ("chadt", "+13:45"), - ("chast", "+12:45"), - ("chot", "+08:00"), - ("chost", "+09:00"), - ("chst", "+10:00"), - ("chut", "+10:00"), - ("cist", "-08:00"), - ("ckt", "-10:00"), - ("clst", "-03:00"), - ("clt", "-04:00"), - ("cost", "-04:00"), - ("cot", "-05:00"), - ("cst", "-06:00"), - ("cst", "+08:00"), - ("cst", "-05:00"), - ("ct", "-05:00"), - ("cvt", "-01:00"), - ("cwst", "+08:45"), - ("cxt", "+07:00"), - ("davt", "+07:00"), - ("ddut", "+10:00"), - ("dft", "+01:00"), - ("easst", "-05:00"), - ("east", "-06:00"), - ("eat", "+03:00"), - ("ect", "-04:00"), - ("ect", "-05:00"), - ("edt", "-04:00"), - ("eest", "+03:00"), - ("eet", "+02:00"), - ("egst", "-00:00"), - ("egt", "-01:00"), - ("est", "-05:00"), - ("et", "-04:00"), - ("fet", "+03:00"), - ("fjt", "+12:00"), - ("fkst", "-03:00"), - ("fkt", "-04:00"), - ("fnt", "-02:00"), - ("galt", "-06:00"), - ("gamt", "-09:00"), - ("get", "+04:00"), - ("gft", "-03:00"), - ("gilt", "+12:00"), - ("git", "-09:00"), - ("gmt", "-00:00"), - ("gst", "-02:00"), - ("gst", "+04:00"), - ("gyt", "-04:00"), - ("hdt", "-09:00"), - ("haec", "+02:00"), - ("hst", "-10:00"), - ("hkt", "+08:00"), - ("hmt", "+05:00"), - ("hovst", "+08:00"), - ("hovt", "+07:00"), - ("ict", "+07:00"), - ("idlw", "-12:00"), - ("idt", "+03:00"), - ("iot", "+03:00"), - ("irdt", "+04:30"), - ("irkt", "+08:00"), - ("irst", "+03:30"), - ("ist", "+05:30"), - ("ist", "+01:00"), - ("ist", "+02:00"), - ("jst", "+09:00"), - ("kalt", "+02:00"), - ("kgt", "+06:00"), - ("kost", "+11:00"), - ("krat", "+07:00"), - ("kst", "+09:00"), - ("lhst", "+10:30"), - ("lhst", "+11:00"), - ("lint", "+14:00"), - ("magt", "+12:00"), - ("mart", "-09:30"), - ("mawt", "+05:00"), - ("mdt", "-06:00"), - ("met", "+01:00"), - ("mest", "+02:00"), - ("mht", "+12:00"), - ("mist", "+11:00"), - ("mit", "-09:30"), - ("mmt", "+06:30"), - ("msk", "+03:00"), - ("mst", "+08:00"), - ("mst", "-07:00"), - ("mut", "+04:00"), - ("mvt", "+05:00"), - ("myt", "+08:00"), - ("nct", "+11:00"), - ("ndt", "-02:30"), - ("nft", "+11:00"), - ("novt", "+07:00"), - ("npt", "+05:45"), - ("nst", "-03:30"), - ("nt", "-03:30"), - ("nut", "-11:00"), - ("nzdt", "+13:00"), - ("nzst", "+12:00"), - ("omst", "+06:00"), - ("orat", "+05:00"), - ("pdt", "-07:00"), - ("pet", "-05:00"), - ("pett", "+12:00"), - ("pgt", "+10:00"), - ("phot", "+13:00"), - ("pht", "+08:00"), - ("phst", "+08:00"), - ("pkt", "+05:00"), - ("pmdt", "-02:00"), - ("pmst", "-03:00"), - ("pont", "+11:00"), - ("pst", "-08:00"), - ("pwt", "+09:00"), - ("pyst", "-03:00"), - ("pyt", "-04:00"), - ("ret", "+04:00"), - ("rott", "-03:00"), - ("sakt", "+11:00"), - ("samt", "+04:00"), - ("sast", "+02:00"), - ("sbt", "+11:00"), - ("sct", "+04:00"), - ("sdt", "-10:00"), - ("sgt", "+08:00"), - ("slst", "+05:30"), - ("sret", "+11:00"), - ("srt", "-03:00"), - ("sst", "-11:00"), - ("sst", "+08:00"), - ("syot", "+03:00"), - ("taht", "-10:00"), - ("tha", "+07:00"), - ("tft", "+05:00"), - ("tjt", "+05:00"), - ("tkt", "+13:00"), - ("tlt", "+09:00"), - ("tmt", "+05:00"), - ("trt", "+03:00"), - ("tot", "+13:00"), - ("tvt", "+12:00"), - ("ulast", "+09:00"), - ("ulat", "+08:00"), - ("ut", "-00:00"), - ("utc", "-00:00"), - ("uyst", "-02:00"), - ("uyt", "-03:00"), - ("uzt", "+05:00"), - ("vet", "-04:00"), - ("vlat", "+10:00"), - ("volt", "+03:00"), - ("vost", "+06:00"), - ("vut", "+11:00"), - ("wakt", "+12:00"), - ("wast", "+02:00"), - ("wat", "+01:00"), - ("west", "+01:00"), - ("wet", "-00:00"), - ("wib", "+07:00"), - ("wit", "+09:00"), - ("wita", "+08:00"), - ("wgst", "-02:00"), - ("wgt", "-03:00"), - ("wst", "+08:00"), - ("yakt", "+09:00"), - ("yekt", "+05:00"), - ("zulu", "+00:00"), - ("z", "+00:00"), -]; - -type Map_TZZ_to_TZz<'a> = BTreeMap<&'a str, &'a str>; - -lazy_static! { - /// Map of all `%Z` values, e.g. `"PST"` or `"pst"`, - /// to the `%:z` value, e.g. `"-07:00"`. - /// A key with an `is_empty()` value signifies an ambiguous timezone. - /// Ambiguous timezones will use the provided fallback timezone offset - /// (i.e. CLI option `--tz-offset`). - /// - /// Ambiguous timezone names have their values set to empty string, - /// e.g. `"SST"` maps to `""`. See [Issue #59]. - /// - /// [Issue #59]: https://github.com/jtmoon79/super-speedy-syslog-searcher/issues/59 - // XXX: must be `pub` to allow access from `src/bin/bin.rs` - // TODO: [2023/04/29] replace with compile-time `phf::phf_map` - // see https://github.com/rust-phf/rust-phf - pub static ref MAP_TZZ_TO_TZz: Map_TZZ_to_TZz<'static> = { - let mut map = Map_TZZ_to_TZz::new(); - #[allow(non_snake_case)] - for tzZ_zc in TZZ_ALL.iter() { - if map.insert(tzZ_zc.0, tzZ_zc.1).is_some() { - // duplicate key entries are set to empty string - // (this signifies an ambiguous named timezone) - map.insert(tzZ_zc.0, ""); - } - } - - map - }; -} +pub static MAP_TZZ_TO_TZz: PhfMap<&'static str, &'static str> = phf_map! { + // uppercase + "ACDT" => "+10:30", + "ACST" => "+09:30", + "ACT" => "", + //"ACT" => "-05:00", + //"ACT" => "+08:00", + "ACWST" => "+08:45", + "ADT" => "-03:00", + "AEDT" => "+11:00", + "AEST" => "+10:00", + "AET" => "+11:00", + "AFT" => "+04:30", + "AKDT" => "-08:00", + "AKST" => "-09:00", + "ALMT" => "+06:00", + "AMST" => "-03:00", + "AMT" => "", + //"AMT" => "-04:00", + //"AMT" => "+04:00", + "ANAT" => "+12:00", + "AQTT" => "+05:00", + "ART" => "-03:00", + "AST" => "", + //"AST" => "+03:00", + //"AST" => "-04:00", + "AWST" => "+08:00", + "AZOST" => "+00:00", + "AZOT" => "-01:00", + "AZT" => "+04:00", + "BNT" => "+08:00", + "BIOT" => "+06:00", + "BIT" => "-12:00", + "BOT" => "-04:00", + "BRST" => "-02:00", + "BRT" => "-03:00", + "BST" => "", + //"BST" => "+06:00", + //"BST" => "+11:00", + //"BST" => "+01:00", + "BTT" => "+06:00", + "CAT" => "+02:00", + "CCT" => "+06:30", + "CDT" => "", + //"CDT" => "-05:00", + //"CDT" => "-04:00", + "CEST" => "+02:00", + "CET" => "+01:00", + "CHADT" => "+13:45", + "CHAST" => "+12:45", + "CHOT" => "+08:00", + "CHOST" => "+09:00", + "CHST" => "+10:00", + "CHUT" => "+10:00", + "CIST" => "-08:00", + "CKT" => "-10:00", + "CLST" => "-03:00", + "CLT" => "-04:00", + "COST" => "-04:00", + "COT" => "-05:00", + "CST" => "", + //"CST" => "-06:00", + //"CST" => "+08:00", + //"CST" => "-05:00", + "CT" => "-05:00", + "CVT" => "-01:00", + "CWST" => "+08:45", + "CXT" => "+07:00", + "DAVT" => "+07:00", + "DDUT" => "+10:00", + "DFT" => "+01:00", + "EASST" => "-05:00", + "EAST" => "-06:00", + "EAT" => "+03:00", + "ECT" => "", + //"ECT" => "-04:00", + //"ECT" => "-05:00", + "EDT" => "-04:00", + "EEST" => "+03:00", + "EET" => "+02:00", + "EGST" => "-00:00", + "EGT" => "-01:00", + "EST" => "-05:00", + "ET" => "-04:00", + "FET" => "+03:00", + "FJT" => "+12:00", + "FKST" => "-03:00", + "FKT" => "-04:00", + "FNT" => "-02:00", + "GALT" => "-06:00", + "GAMT" => "-09:00", + "GET" => "+04:00", + "GFT" => "-03:00", + "GILT" => "+12:00", + "GIT" => "-09:00", + "GMT" => "-00:00", + "GST" => "", + //"GST" => "-02:00", + //"GST" => "+04:00", + "GYT" => "-04:00", + "HDT" => "-09:00", + "HAEC" => "+02:00", + "HST" => "-10:00", + "HKT" => "+08:00", + "HMT" => "+05:00", + "HOVST" => "+08:00", + "HOVT" => "+07:00", + "ICT" => "+07:00", + "IDLW" => "-12:00", + "IDT" => "+03:00", + "IOT" => "+03:00", + "IRDT" => "+04:30", + "IRKT" => "+08:00", + "IRST" => "+03:30", + "IST" => "", + //"IST" => "+05:30", + //"IST" => "+01:00", + //"IST" => "+02:00", + "JST" => "+09:00", + "KALT" => "+02:00", + "KGT" => "+06:00", + "KOST" => "+11:00", + "KRAT" => "+07:00", + "KST" => "+09:00", + "LHST" => "", + //"LHST" => "+10:30", + //"LHST" => "+11:00", + "LINT" => "+14:00", + "MAGT" => "+12:00", + "MART" => "-09:30", + "MAWT" => "+05:00", + "MDT" => "-06:00", + "MET" => "+01:00", + "MEST" => "+02:00", + "MHT" => "+12:00", + "MIST" => "+11:00", + "MIT" => "-09:30", + "MMT" => "+06:30", + "MSK" => "+03:00", + "MST" => "", + //"MST" => "+08:00", + //"MST" => "-07:00", + "MUT" => "+04:00", + "MVT" => "+05:00", + "MYT" => "+08:00", + "NCT" => "+11:00", + "NDT" => "-02:30", + "NFT" => "+11:00", + "NOVT" => "+07:00", + "NPT" => "+05:45", + "NST" => "-03:30", + "NT" => "-03:30", + "NUT" => "-11:00", + "NZDT" => "+13:00", + "NZST" => "+12:00", + "OMST" => "+06:00", + "ORAT" => "+05:00", + "PDT" => "-07:00", + "PET" => "-05:00", + "PETT" => "+12:00", + "PGT" => "+10:00", + "PHOT" => "+13:00", + "PHT" => "+08:00", + "PHST" => "+08:00", + "PKT" => "+05:00", + "PMDT" => "-02:00", + "PMST" => "-03:00", + "PONT" => "+11:00", + "PST" => "-08:00", + "PWT" => "+09:00", + "PYST" => "-03:00", + "PYT" => "-04:00", + "RET" => "+04:00", + "ROTT" => "-03:00", + "SAKT" => "+11:00", + "SAMT" => "+04:00", + "SAST" => "+02:00", + "SBT" => "+11:00", + "SCT" => "+04:00", + "SDT" => "-10:00", + "SGT" => "+08:00", + "SLST" => "+05:30", + "SRET" => "+11:00", + "SRT" => "-03:00", + "SST" => "", + //"SST" => "-11:00", + //"SST" => "+08:00", + "SYOT" => "+03:00", + "TAHT" => "-10:00", + "THA" => "+07:00", + "TFT" => "+05:00", + "TJT" => "+05:00", + "TKT" => "+13:00", + "TLT" => "+09:00", + "TMT" => "+05:00", + "TRT" => "+03:00", + "TOT" => "+13:00", + "TVT" => "+12:00", + "ULAST" => "+09:00", + "ULAT" => "+08:00", + "UT" => "-00:00", + "UTC" => "-00:00", + "UYST" => "-02:00", + "UYT" => "-03:00", + "UZT" => "+05:00", + "VET" => "-04:00", + "VLAT" => "+10:00", + "VOLT" => "+03:00", + "VOST" => "+06:00", + "VUT" => "+11:00", + "WAKT" => "+12:00", + "WAST" => "+02:00", + "WAT" => "+01:00", + "WEST" => "+01:00", + "WET" => "-00:00", + "WIB" => "+07:00", + "WIT" => "+09:00", + "WITA" => "+08:00", + "WGST" => "-02:00", + "WGT" => "-03:00", + "WST" => "+08:00", + "YAKT" => "+09:00", + "YEKT" => "+05:00", + "ZULU" => "+00:00", + "Z" => "+00:00", + // lowercase + "acdt" => "+10:30", + "acst" => "+09:30", + "act" => "", + //"act" => "-05:00", + //"act" => "+08:00", + "acwst" => "+08:45", + "adt" => "-03:00", + "aedt" => "+11:00", + "aest" => "+10:00", + "aet" => "+11:00", + "aft" => "+04:30", + "akdt" => "-08:00", + "akst" => "-09:00", + "almt" => "+06:00", + "amst" => "-03:00", + "amt" => "", + //"amt" => "-04:00", + //"amt" => "+04:00", + "anat" => "+12:00", + "aqtt" => "+05:00", + "art" => "-03:00", + "ast" => "", + //"ast" => "+03:00", + //"ast" => "-04:00", + "awst" => "+08:00", + "azost" => "-00:00", + "azot" => "-01:00", + "azt" => "+04:00", + "bnt" => "+08:00", + "biot" => "+06:00", + "bit" => "-12:00", + "bot" => "-04:00", + "brst" => "-02:00", + "brt" => "-03:00", + "bst" => "", + //"bst" => "+06:00", + //"bst" => "+11:00", + //"bst" => "+01:00", + "btt" => "+06:00", + "cat" => "+02:00", + "cct" => "+06:30", + "cdt" => "", + //"cdt" => "-05:00", + //"cdt" => "-04:00", + "cest" => "+02:00", + "cet" => "+01:00", + "chadt" => "+13:45", + "chast" => "+12:45", + "chot" => "+08:00", + "chost" => "+09:00", + "chst" => "+10:00", + "chut" => "+10:00", + "cist" => "-08:00", + "ckt" => "-10:00", + "clst" => "-03:00", + "clt" => "-04:00", + "cost" => "-04:00", + "cot" => "-05:00", + "cst" => "", + //"cst" => "-06:00", + //"cst" => "+08:00", + //"cst" => "-05:00", + "ct" => "-05:00", + "cvt" => "-01:00", + "cwst" => "+08:45", + "cxt" => "+07:00", + "davt" => "+07:00", + "ddut" => "+10:00", + "dft" => "+01:00", + "easst" => "-05:00", + "east" => "-06:00", + "eat" => "+03:00", + "ect" => "", + //"ect" => "-04:00", + //"ect" => "-05:00", + "edt" => "-04:00", + "eest" => "+03:00", + "eet" => "+02:00", + "egst" => "-00:00", + "egt" => "-01:00", + "est" => "-05:00", + "et" => "-04:00", + "fet" => "+03:00", + "fjt" => "+12:00", + "fkst" => "-03:00", + "fkt" => "-04:00", + "fnt" => "-02:00", + "galt" => "-06:00", + "gamt" => "-09:00", + "get" => "+04:00", + "gft" => "-03:00", + "gilt" => "+12:00", + "git" => "-09:00", + "gmt" => "-00:00", + "gst" => "", + //"gst" => "-02:00", + //"gst" => "+04:00", + "gyt" => "-04:00", + "hdt" => "-09:00", + "haec" => "+02:00", + "hst" => "-10:00", + "hkt" => "+08:00", + "hmt" => "+05:00", + "hovst" => "+08:00", + "hovt" => "+07:00", + "ict" => "+07:00", + "idlw" => "-12:00", + "idt" => "+03:00", + "iot" => "+03:00", + "irdt" => "+04:30", + "irkt" => "+08:00", + "irst" => "+03:30", + "ist" => "", + //"ist" => "+05:30", + //"ist" => "+01:00", + //"ist" => "+02:00", + "jst" => "+09:00", + "kalt" => "+02:00", + "kgt" => "+06:00", + "kost" => "+11:00", + "krat" => "+07:00", + "kst" => "+09:00", + "lhst" => "", + //"lhst" => "+10:30", + //"lhst" => "+11:00", + "lint" => "+14:00", + "magt" => "+12:00", + "mart" => "-09:30", + "mawt" => "+05:00", + "mdt" => "-06:00", + "met" => "+01:00", + "mest" => "+02:00", + "mht" => "+12:00", + "mist" => "+11:00", + "mit" => "-09:30", + "mmt" => "+06:30", + "msk" => "+03:00", + "mst" => "", + //"mst" => "+08:00", + //"mst" => "-07:00", + "mut" => "+04:00", + "mvt" => "+05:00", + "myt" => "+08:00", + "nct" => "+11:00", + "ndt" => "-02:30", + "nft" => "+11:00", + "novt" => "+07:00", + "npt" => "+05:45", + "nst" => "-03:30", + "nt" => "-03:30", + "nut" => "-11:00", + "nzdt" => "+13:00", + "nzst" => "+12:00", + "omst" => "+06:00", + "orat" => "+05:00", + "pdt" => "-07:00", + "pet" => "-05:00", + "pett" => "+12:00", + "pgt" => "+10:00", + "phot" => "+13:00", + "pht" => "+08:00", + "phst" => "+08:00", + "pkt" => "+05:00", + "pmdt" => "-02:00", + "pmst" => "-03:00", + "pont" => "+11:00", + "pst" => "-08:00", + "pwt" => "+09:00", + "pyst" => "-03:00", + "pyt" => "-04:00", + "ret" => "+04:00", + "rott" => "-03:00", + "sakt" => "+11:00", + "samt" => "+04:00", + "sast" => "+02:00", + "sbt" => "+11:00", + "sct" => "+04:00", + "sdt" => "-10:00", + "sgt" => "+08:00", + "slst" => "+05:30", + "sret" => "+11:00", + "srt" => "-03:00", + "sst" => "", + //"sst" => "-11:00", + //"sst" => "+08:00", + "syot" => "+03:00", + "taht" => "-10:00", + "tha" => "+07:00", + "tft" => "+05:00", + "tjt" => "+05:00", + "tkt" => "+13:00", + "tlt" => "+09:00", + "tmt" => "+05:00", + "trt" => "+03:00", + "tot" => "+13:00", + "tvt" => "+12:00", + "ulast" => "+09:00", + "ulat" => "+08:00", + "ut" => "-00:00", + "utc" => "-00:00", + "uyst" => "-02:00", + "uyt" => "-03:00", + "uzt" => "+05:00", + "vet" => "-04:00", + "vlat" => "+10:00", + "volt" => "+03:00", + "vost" => "+06:00", + "vut" => "+11:00", + "wakt" => "+12:00", + "wast" => "+02:00", + "wat" => "+01:00", + "west" => "+01:00", + "wet" => "-00:00", + "wib" => "+07:00", + "wit" => "+09:00", + "wita" => "+08:00", + "wgst" => "-02:00", + "wgt" => "-03:00", + "wst" => "+08:00", + "yakt" => "+09:00", + "yekt" => "+05:00", + "zulu" => "+00:00", + "z" => "+00:00", +}; /// [`RegexPattern`] divider _date?_ `2020/01/01` or `2020-01-01` or /// `2020 01 01` or `20200101` @@ -5228,7 +5170,7 @@ pub(crate) fn captures_to_buffer_bytes( .as_bytes(), ) .unwrap(); - match MAP_TZZ_TO_TZz.get_key_value(tzZ) { + match MAP_TZZ_TO_TZz.get_entry(tzZ) { Some((_tz_abbr, tz_offset_val)) => { match tz_offset_val.is_empty() { true => { @@ -5274,9 +5216,11 @@ pub fn bytes_to_regex_to_datetime( let regex_: &Regex = match DATETIME_PARSE_DATAS_REGEX_VEC.get(*index) { Some(val) => val, - None => { - panic!("requested DATETIME_PARSE_DATAS_REGEX_VEC.get({}), returned None. DATETIME_PARSE_DATAS_REGEX_VEC.len() {}", index, DATETIME_PARSE_DATAS_REGEX_VEC.len()); - } + None => + panic!( + "requested DATETIME_PARSE_DATAS_REGEX_VEC.get({}), returned None. DATETIME_PARSE_DATAS_REGEX_VEC.len() {}", + index, DATETIME_PARSE_DATAS_REGEX_VEC.len() + ), }; // The regular expression matching call. According to `tools/flamegraph.sh` diff --git a/src/tests/datetime_tests.rs b/src/tests/datetime_tests.rs index 252b3dd2..449d4414 100644 --- a/src/tests/datetime_tests.rs +++ b/src/tests/datetime_tests.rs @@ -7,8 +7,15 @@ #![allow(non_camel_case_types)] use crate::tests::common::{ - FO_0, FO_P1, FO_M7, FO_M8, - FO_E10, FO_L, FO_W8, FO_Z, + FO_0, + FO_P1, + FO_M7, + FO_M8, + FO_E10, + FO_L, + FO_L_STR, + FO_W8, + FO_Z, }; use crate::data::datetime::{ LineIndex, @@ -49,9 +56,6 @@ use crate::data::datetime::{ CGP_YEARy, CGP_TZZ, CGP_TZ_ALL, - TZZ_LIST_LOWER, - TZZ_LIST_UPPER, - TZZ_LOWER_TO_UPPER, MAP_TZZ_TO_TZz, RP_LB, RP_RB, @@ -766,47 +770,48 @@ fn test_DATETIME_PARSE_DATAS_test_cases(index: usize) { /// check of structures containing timezone names and timezone values fn test_Map_TZ_names() { let regex = regex::Regex::new(CGP_TZZ).unwrap(); - assert_eq!(TZZ_LIST_UPPER.len(), TZZ_LIST_LOWER.len(), "TZZ_LIST_UPPER len {} != {} TZZ_LIST_LOWER len", TZZ_LIST_UPPER.len(), TZZ_LIST_LOWER.len()); - for up in TZZ_LIST_UPPER { - assert!(MAP_TZZ_TO_TZz.contains_key(up), "Named timezone {:?} not found in MAP_TZZ_TO_TZz", up); - } - for lo in TZZ_LIST_LOWER { - let up = lo.to_ascii_uppercase(); - assert!(MAP_TZZ_TO_TZz.contains_key(up.as_str()), "Named timezone {:?} (lower {:?}) not found in MAP_TZZ_TO_TZz", up, lo); - } - // tz_name example "PST" + // tz_name example "PST" or "pst" // tz_val example "-07:00" - for (tz_name, tz_val) in MAP_TZZ_TO_TZz.iter() { + for (tz_name, tz_val) in MAP_TZZ_TO_TZz.entries() { + let tz_name_u = tz_name.to_ascii_uppercase(); + let tz_name_l = tz_name.to_ascii_lowercase(); + assert!(tz_name == &tz_name_u.as_str() || tz_name == &tz_name_l.as_str(), + "Bad timezone name {:?} not all uppercase or all lowercase", tz_name + ); + assert!(MAP_TZZ_TO_TZz.contains_key(&tz_name_u), + "Key {:?} as uppercase {:?} not found in MAP_TZZ_TO_TZz", tz_name, tz_name_u + ); + assert!(MAP_TZZ_TO_TZz.contains_key(&tz_name_l), + "Key {:?} as lowercase {:?} not found in MAP_TZZ_TO_TZz", tz_name, tz_name_l + ); + assert!(regex.is_match(tz_name), "Key {:?} from MAP_TZZ_TO_TZz not matched by CGP_TZZ Regex", tz_name); + let captures = regex.captures(tz_name).unwrap(); + assert_eq!(captures.len(), 2, "CGP_TZZ Regex captured {:?} != 2 expected", captures.len()); + let tz_name_captured = captures.get(1).unwrap().as_str(); + assert_eq!(&tz_name_captured, tz_name, "CGP_TZZ Regex captured {:?} != {:?} expected", tz_name_captured, tz_name); + assert!(CGP_TZZ.contains(tz_name), "CGP_TZZ does not contain name {:?} from MAP_TZZ_TO_TZz", tz_name); if ! tz_val.is_empty() { - assert!(tz_val.starts_with('+') || tz_val.starts_with('-'), "Bad timezone value starts_with {:?} for entry {:?}", tz_val, tz_name); + assert_eq!(tz_val.len(), 6, "Bad timezone value {:?} length {:?} for entry {:?}", tz_val, tz_val.len(), tz_name); + assert!("+-".contains(tz_val.chars().nth(0).unwrap()), "Bad timezone value starts_with {:?} for entry {:?}", tz_val, tz_name); + assert!("01".contains(tz_val.chars().nth(1).unwrap()), "Bad timezone value {:?} for entry {:?}", tz_val, tz_name); + assert!("0123456789".contains(tz_val.chars().nth(2).unwrap()), "Bad timezone value {:?} for entry {:?}", tz_val, tz_name); + assert!(":".contains(tz_val.chars().nth(3).unwrap()), "Bad timezone value {:?} for entry {:?}", tz_val, tz_name); assert!(tz_val.ends_with(":00") || tz_val.ends_with(":30") || tz_val.ends_with(":45"), "Bad timezone value ends_with {:?} for entry {:?}", tz_val, tz_name); assert!(tz_val.contains(':'), "Bad timezone value {:?} not contains ':' for entry {:?}", tz_val, tz_name); - assert_eq!(tz_val.len(), 6, "Bad timezone value {:?} length {:?} for entry {:?}", tz_val, tz_val.len(), tz_name); - } // empty value means the name is ambiguous - assert!(TZZ_LIST_UPPER.contains(tz_name) || TZZ_LIST_LOWER.contains(tz_name), "Named timezone {:?} not in TZZ_LIST_UPPER or TZZ_LIST_LOWER", tz_name); - } - for (index, tz_upper) in TZZ_LIST_UPPER.iter().enumerate() { - let tz_lower = TZZ_LIST_LOWER[index]; - let tz_lower_to_upper = tz_lower.to_ascii_uppercase(); - assert_eq!( - tz_upper, &tz_lower_to_upper.as_str(), - "TZZ_LIST_UPPER[{}]={:?} != TZZ_LIST_LOWER[{}]={:?} ({:?})", - index, tz_upper, index, tz_lower, tz_lower_to_upper, - ); - } - for (lo, up) in TZZ_LOWER_TO_UPPER.iter() { - assert!(regex.is_match(lo), "Key {:?} from TZZ_LOWER_TO_UPPER not matched by regex CGP_TZZ", lo); - assert!(regex.is_match(up), "Value {:?} from TZZ_LOWER_TO_UPPER not matched by regex CGP_TZZ", up); + } else { + // empty value means the name is ambiguous + let tz_val_u = MAP_TZZ_TO_TZz.get(&tz_name_u.as_str()).unwrap(); + let tz_val_l = MAP_TZZ_TO_TZz.get(&tz_name_l.as_str()).unwrap(); + assert!(tz_val_u.is_empty(), "Ambiguous timezone name {:?} has uppercase version {:?} that is not empty {:?}", tz_name, tz_name_u, tz_val_u); + assert!(tz_val_l.is_empty(), "Ambiguous timezone name {:?} has lowercase version {:?} that is not empty {:?}", tz_name, tz_name_l, tz_val_l); + } } - for (tz_name, _tz_val) in MAP_TZZ_TO_TZz.iter() { - assert!(regex.is_match(tz_name), "Key {:?} from MAP_TZZ_TO_TZz not matched by regex CGP_TZZ", tz_name); - assert!( - TZZ_LIST_UPPER.contains(tz_name) != TZZ_LIST_LOWER.contains(tz_name), - "Key {:?} from MAP_TZZ_TO_TZz {} in TZZ_LIST_UPPER, {} in TZZ_LIST_LOWER", - tz_name, - if TZZ_LIST_UPPER.contains(tz_name) { "is" } else { "not" }, - if TZZ_LIST_LOWER.contains(tz_name) { "is" } else { "not" }, - ); + let start = CGP_TZZ.find('>'); + assert!(start.is_some(), "CGP_TZZ does not contain start '>'"); + let end = CGP_TZZ.find(')'); + assert!(end.is_some(), "CGP_TZZ does not contain end ')'"); + for val in CGP_TZZ[start.unwrap() + 1..end.unwrap()].split('|') { + assert!(MAP_TZZ_TO_TZz.contains_key(val), "Substring {:?} from regex CGP_TZZ not found in MAP_TZZ_TO_TZz", val); } }