Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize size of TzOffset #165

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions chrono-tz-build/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ extern crate parse_zoneinfo;
#[cfg(feature = "filter-by-regex")]
extern crate regex;

use std::collections::BTreeSet;
use std::collections::{BTreeSet, HashSet};
use std::env;
use std::fs::File;
use std::io::{self, BufRead, BufReader, Write};
Expand Down Expand Up @@ -30,17 +30,17 @@ fn strip_comments(mut line: String) -> String {

// Generate a list of the time zone periods beyond the first that apply
// to this zone, as a string representation of a static slice.
fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String {
fn format_rest(rest: Vec<(i64, FixedTimespan)>, abbreviations: &str) -> String {
let mut ret = "&[\n".to_string();
for (start, FixedTimespan { utc_offset, dst_offset, name }) in rest {
ret.push_str(&format!(
" ({start}, FixedTimespan {{ \
utc_offset: {utc}, dst_offset: {dst}, name: \"{name}\" \
utc_offset: {utc}, dst_offset: {dst}, abbreviation: {index_len} \
}}),\n",
start = start,
utc = utc_offset,
dst = dst_offset,
name = name,
index_len = (abbreviations.find(&name).unwrap() << 3) | name.len(),
));
}
ret.push_str(" ]");
Expand Down Expand Up @@ -68,12 +68,29 @@ fn convert_bad_chars(name: &str) -> String {
// TimeZone for any contained struct that implements `Timespans`.
fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> {
let zones = table.zonesets.keys().chain(table.links.keys()).collect::<BTreeSet<_>>();

// Collect all unique abbreviations into a HashSet, sort, and concatenate into a string.
let mut abbreviations = HashSet::new();
for zone in &zones {
let timespans = table.timespans(zone).unwrap();
for (_, timespan) in timespans.rest.into_iter().chain(Some((0, timespans.first))) {
abbreviations.insert(timespan.name.clone());
}
}
let mut abbreviations: Vec<_> = abbreviations.iter().collect();
abbreviations.sort();
let mut abbreviations_str = String::new();
for abbr in abbreviations.drain(..) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is abbrevations.join("")?

abbreviations_str.push_str(abbr)
}

writeln!(timezone_file, "use core::fmt::{{self, Debug, Display, Formatter}};",)?;
writeln!(timezone_file, "use core::str::FromStr;\n",)?;
writeln!(
timezone_file,
"use crate::timezone_impl::{{TimeSpans, FixedTimespanSet, FixedTimespan}};\n",
)?;
writeln!(timezone_file, "pub(crate) const ABBREVIATIONS: &str = \"{}\";\n", abbreviations_str)?;
writeln!(
timezone_file,
"/// TimeZones built at compile time from the tz database
Expand Down Expand Up @@ -208,16 +225,17 @@ impl FromStr for Tz {{
first: FixedTimespan {{
utc_offset: {utc},
dst_offset: {dst},
name: \"{name}\",
abbreviation: {index_len},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's suffix the field name with _idx?

}},
rest: REST
}}
}},\n",
zone = zone_name,
rest = format_rest(timespans.rest),
rest = format_rest(timespans.rest, &abbreviations_str),
utc = timespans.first.utc_offset,
dst = timespans.first.dst_offset,
name = timespans.first.name,
index_len = (abbreviations_str.find(&timespans.first.name).unwrap() << 3)
| timespans.first.name.len(),
)?;
}
write!(
Expand Down
4 changes: 2 additions & 2 deletions chrono-tz/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ mod tests {

#[test]
fn test_type_size() {
assert_eq!(core::mem::size_of::<TzOffset>(), 32);
assert_eq!(core::mem::size_of::<DateTime<Tz>>(), 48);
assert_eq!(core::mem::size_of::<TzOffset>(), 16);
assert_eq!(core::mem::size_of::<DateTime<Tz>>(), 28);
}
}
21 changes: 15 additions & 6 deletions chrono-tz/src/timezone_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use chrono::{
};

use crate::binary_search::binary_search;
use crate::timezones::Tz;
use crate::timezones::{Tz, ABBREVIATIONS};

/// Returns [`Tz::UTC`].
impl Default for Tz {
Expand All @@ -25,8 +25,17 @@ pub struct FixedTimespan {
pub utc_offset: i32,
/// The additional offset from UTC for this timespan; typically for daylight saving time
pub dst_offset: i32,
/// The name of this timezone, for example the difference between `EDT`/`EST`
pub name: &'static str,
/// The abbreviation of this offset, for example the difference between `EDT`/`EST`.
/// Stored as a slice of the `ABBREVIATIONS` static as `index << 3 | len`.
pub(crate) abbreviation: i16,
}

impl FixedTimespan {
fn abbreviation(&self) -> &'static str {
let index = (self.abbreviation >> 3) as usize;
let len = (self.abbreviation & 0b111) as usize;
&ABBREVIATIONS[index..index + len]
}
}

impl Offset for FixedTimespan {
Expand All @@ -37,13 +46,13 @@ impl Offset for FixedTimespan {

impl Display for FixedTimespan {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
write!(f, "{}", self.name)
write!(f, "{}", self.abbreviation())
}
}

impl Debug for FixedTimespan {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
write!(f, "{}", self.name)
write!(f, "{}", self.abbreviation())
}
}

Expand Down Expand Up @@ -156,7 +165,7 @@ impl OffsetName for TzOffset {
}

fn abbreviation(&self) -> &str {
self.offset.name
self.offset.abbreviation()
}
}

Expand Down