From a937d6d93e741d233404815eb6272082394f74aa Mon Sep 17 00:00:00 2001 From: Don Isaac Date: Sat, 18 May 2024 13:10:03 -0400 Subject: [PATCH] perf: change ports type to Option --- url/benches/parse_url.rs | 9 ++++++++- url/src/lib.rs | 26 +++++++++++++++++--------- url/src/origin.rs | 6 +++--- url/src/parser.rs | 25 ++++++++++++++++--------- url/src/slicing.rs | 2 +- 5 files changed, 45 insertions(+), 23 deletions(-) diff --git a/url/benches/parse_url.rs b/url/benches/parse_url.rs index cc87dda5c..78388957c 100644 --- a/url/benches/parse_url.rs +++ b/url/benches/parse_url.rs @@ -19,5 +19,12 @@ fn long(bench: &mut Bencher) { bench.iter(|| black_box(url).parse::().unwrap()); } -benchmark_group!(benches, short, long); +fn very_long(bench: &mut Bencher) { + let url = "https://www.typescriptlang.org/play/?&q=42#code/PTAEBUE8AcFMGUDGAnAltALqVBnUBDUeDZAV0Q1OXwBsIZYjIcNYBbAOlAEFQWyKVWqAwMAUCD7NWbUG1j4AdngwALfFgDuq2ItCIA9m2j40igOYiGOADT04SNJlAHFNSBLAZ8Aa1h5URQwDAkRDUiCRHTl2ACNYZDxXKMZROA4xTwhVXGwAvUMg6hYREMUjQOE0xhxpdltQbQTGSANSLPCaABN9ZAVWEU0Q6rxY0ixOnvKsfBwcVHM9NXZS0AVEVSyDZeQueFhGWAAPfGMaWAAucrZKmgBaNMDzTMkAMQNkNZOz2DtlnFSQ2wQQSADN8Ih-AQ+vojNBzqx3FkSEocKCEtRYudgQQ+CRyJRqHRqlIWOwLplAqxkODIaAAEK0OgAbzEoFAXVQp1g1IuoEUpDY8WQAG4xABfMRUsEQxjwaA6GGs9mc7m8-mC4ViyViBGgWJMvmMmh0AC8oGZHK58nVAEYAAygcVivU4BXNPnyxWMc2W1U2hJ8gBMjudmTd3tA5oNJrFMbNfHdfTFWQAkqDGox8F0eoFcSTtKgNniBISmZBYUF8IE8EyXKDkdF5EKEkkM8a6Eoel7mn8dHpUDMaDgQoglPrYFkAVhghPcfGXJ8I80MlKQTTZRBSPELWyrWrAxqW6K9+cLGo+QLj9rMnrKPE+eBtz6LfuA8g+bag3Yz+YL6AAGYnRTe8X3jOM63NUCU0kelYDHUgAXnOsugMKFplAdQADcs1AX81D7XRsAmJQMPiLJZnmRZYB6WdlhEZ9QCw0wuSxWAuAACQMTRYBw5A7DrAwM3o5thRwLZ2zrUxGBrVAulSZ8BMUHoR2I-Rxx3SiFkUGjV0kKAHBQdAsFyQxjBYiw1ghVQYmPHF6JJfBzGrZQMCydYbO2RVVj41BQQrZZUE+WAAEdSFoQdID0sBeFBCIKFQZJcnHAxYgAK3gky9AAKXwZjHGMghlLU0y4WkrosjzQh5huGhTFAcEcByVwuAAdUHTydNARDYDiuhjhREQ0EQHxoTaZTG0YExqDYHAKV1Hlen6WAOyjUAAAp-R5Q8r2FABKKMAD4Nr9a1ts+cU9pdRaUGWnsYXNTazvVXaEjsHrU0UDZ-D5WIDAMc4lAO01juVUA+kJPRToPD9uoBT7vrwAB+N9ztAAAqUB7Q4ACAE5QD5LbqWAiUU1ujQECTF9ydYDsxRplbIKWin7tgGCwAMhAjOcTRUBNAgTW4jbXuQA7Z1C8K6HWkW7D+gGFEUPbKqWaJpphhoxiwDDpc1N79X+wHFdAO5jp148layTmCp5vm6E5HAx2QWjojlw37OiUEgpKLTFnkIIsniBCkMHAByPA+IrMy2GSUEPlAXL8u5iYDHk1YcB8dBQBMOYniyNXZsafsUkgEOYQwnSaN0l4wHeYLvnhVJolMagS7wWPkAAUWssP1JNA0Rsw2Yoj6KbTFOWwsmYmhSF+YF5KOIrncYPrO2QFvjewDMraTq0ukUEO3MkHBSGgaAPiwe3Hc5Sz8-Ohplj0VpSEaNpukHnDSmVxBp9T3iEgrAwmBEpLBCGwXwjdl7xQwMAvAYCMDFhPvNAA2gzDs61bR7TsKgpk60gx7QALocHbl3DY611rxjsAAfUCPPKh8YcDA1BnuQoI5zgcBoAYcw5CmRXQlLwrIAA5EIrhGAVy6CodQGBoqgAAEo8ioEsaw0JUh9Apj0GgqA-AuHSplBoXYioVk5KCdEfQvr+AojCMy00aKNA6ikPg3JtEZQoGsMKEVRDg1IOcWsf0cKrj1AzORXRVqPSJjtXWosjonVRtSLBAMPh8gAER9C6Ikp0-DsH83NIEmidMxA5OCUzTJNB2agFapGeintEgzBzr7XQWgPg+DwOtZYFY-pqHfowMJosA7jHsQCQoPRUL+H3lgFpLsUJoRwKMzpuJDAcNFhkIAA"; + + bench.bytes = url.len() as u64; + bench.iter(|| black_box(url).parse::().unwrap()); +} + +benchmark_group!(benches, short, long, very_long); benchmark_main!(benches); diff --git a/url/src/lib.rs b/url/src/lib.rs index 1959a1213..ac85d17b2 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -160,6 +160,7 @@ use std::mem; use std::net::IpAddr; #[cfg(any(unix, windows, target_os = "redox", target_os = "wasi"))] use std::net::{SocketAddr, ToSocketAddrs}; +use std::num::NonZeroU16; use std::ops::{Range, RangeFrom, RangeTo}; use std::path::{Path, PathBuf}; use std::str; @@ -202,7 +203,7 @@ pub struct Url { host_start: u32, host_end: u32, host: HostInternal, - port: Option, + port: Option, path_start: u32, // Before initial '/', if any query_start: Option, // Before '?', unlike Position::QueryStart fragment_start: Option, // Before '#', unlike Position::FragmentStart @@ -708,7 +709,7 @@ impl Url { let port_str = self.slice(self.host_end + 1..self.path_start); assert_eq!( self.port, - Some(port_str.parse::().expect("Couldn't parse port?")) + Some(port_str.parse::().expect("Couldn't parse port?")) ); } assert!( @@ -1214,7 +1215,7 @@ impl Url { /// ``` #[inline] pub fn port(&self) -> Option { - self.port + self.port.map(Into::into) } /// Return the port number for this URL, or the default port number if it is known. @@ -1246,7 +1247,13 @@ impl Url { /// ``` #[inline] pub fn port_or_known_default(&self) -> Option { - self.port.or_else(|| parser::default_port(self.scheme())) + self.port_or_known_default_internal().map(Into::into) + } + + #[inline] + pub(crate) fn port_or_known_default_internal(&self) -> Option { + self.port + .or_else(|| parser::default_port(self.scheme())) } /// Resolve a URL’s host and port number to `SocketAddr`. @@ -1757,8 +1764,8 @@ impl Url { /// /// Note that default port numbers are not reflected in the serialization. /// - /// If this URL is cannot-be-a-base, does not have a host, or has the `file` scheme; - /// do nothing and return `Err`. + /// If this URL is cannot-be-a-base, does not have a host, or has the `file` + /// scheme; or if the supplied port is `0`; do nothing and return [`Err`]. /// /// # Examples /// @@ -1814,11 +1821,12 @@ impl Url { /// # run().unwrap(); /// ``` #[allow(clippy::result_unit_err)] - pub fn set_port(&mut self, mut port: Option) -> Result<(), ()> { + pub fn set_port(&mut self, port: Option) -> Result<(), ()> { // has_host implies !cannot_be_a_base if !self.has_host() || self.host() == Some(Host::Domain("")) || self.scheme() == "file" { return Err(()); } + let mut port = port.map(|port| NonZeroU16::new(port).ok_or(())).transpose()?; if port.is_some() && port == parser::default_port(self.scheme()) { port = None } @@ -1826,7 +1834,7 @@ impl Url { Ok(()) } - fn set_port_internal(&mut self, port: Option) { + fn set_port_internal(&mut self, port: Option) { match (self.port, port) { (None, None) => {} (Some(_), None) => { @@ -2012,7 +2020,7 @@ impl Url { } /// opt_new_port: None means leave unchanged, Some(None) means remove any port number. - fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { + fn set_host_internal(&mut self, host: Host, opt_new_port: Option>) { let old_suffix_pos = if opt_new_port.is_some() { self.path_start } else { diff --git a/url/src/origin.rs b/url/src/origin.rs index 81193f510..659b96bc2 100644 --- a/url/src/origin.rs +++ b/url/src/origin.rs @@ -9,7 +9,7 @@ use crate::host::Host; use crate::parser::default_port; use crate::Url; -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::{num::NonZeroU16, sync::atomic::{AtomicUsize, Ordering}}; pub fn url_origin(url: &Url) -> Origin { let scheme = url.scheme(); @@ -24,7 +24,7 @@ pub fn url_origin(url: &Url) -> Origin { "ftp" | "http" | "https" | "ws" | "wss" => Origin::Tuple( scheme.to_owned(), url.host().unwrap().to_owned(), - url.port_or_known_default().unwrap(), + url.port_or_known_default_internal().unwrap(), ), // TODO: Figure out what to do if the scheme is a file "file" => Origin::new_opaque(), @@ -55,7 +55,7 @@ pub enum Origin { Opaque(OpaqueOrigin), /// Consists of the URL's scheme, host and port - Tuple(String, Host, u16), + Tuple(String, Host, NonZeroU16), } impl Origin { diff --git a/url/src/parser.rs b/url/src/parser.rs index 2e3a4f644..648c1b533 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -8,6 +8,7 @@ use std::error::Error; use std::fmt::{self, Formatter, Write}; +use std::num::NonZeroU16; use std::str; use crate::host::{Host, HostInternal}; @@ -172,11 +173,12 @@ impl> From for SchemeType { } } -pub fn default_port(scheme: &str) -> Option { +pub fn default_port(scheme: &str) -> Option { match scheme { - "http" | "ws" => Some(80), - "https" | "wss" => Some(443), - "ftp" => Some(21), + // SAFETY: We know these values are not zero because they are hardcoded. + "http" | "ws" => Some(unsafe { NonZeroU16::new_unchecked(80) }), + "https" | "wss" => Some(unsafe { NonZeroU16::new_unchecked(443) }), + "ftp" => Some(unsafe { NonZeroU16::new_unchecked(21) }), _ => None, } } @@ -943,7 +945,7 @@ impl<'a> Parser<'a> { input: Input<'i>, scheme_end: u32, scheme_type: SchemeType, - ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { + ) -> ParseResult<(u32, HostInternal, Option, Input<'i>)> { let (host, remaining) = Parser::parse_host(input, scheme_type)?; write!(&mut self.serialization, "{}", host).unwrap(); let host_end = to_u32(self.serialization.len())?; @@ -1100,9 +1102,9 @@ impl<'a> Parser<'a> { mut input: Input<'_>, default_port: P, context: Context, - ) -> ParseResult<(Option, Input<'_>)> + ) -> ParseResult<(Option, Input<'_>)> where - P: Fn() -> Option, + P: Fn() -> Option, { let mut port: u32 = 0; let mut has_any_digit = false; @@ -1120,7 +1122,12 @@ impl<'a> Parser<'a> { } input = remaining; } - let mut opt_port = Some(port as u16); + // NOTE: compiler should optimize away NonZeroU16::new's non-zero check + // during release builds + if port == 0 { + return Err(ParseError::InvalidPort); + } + let mut opt_port = NonZeroU16::new(port as u16); if !has_any_digit || opt_port == default_port() { opt_port = None; } @@ -1368,7 +1375,7 @@ impl<'a> Parser<'a> { host_start: u32, host_end: u32, host: HostInternal, - port: Option, + port: Option, mut path_start: u32, remaining: Input<'_>, ) -> ParseResult { diff --git a/url/src/slicing.rs b/url/src/slicing.rs index 13829575d..72637e3fe 100644 --- a/url/src/slicing.rs +++ b/url/src/slicing.rs @@ -175,7 +175,7 @@ impl Url { Position::AfterPort => { if let Some(port) = self.port { debug_assert!(self.byte_at(self.host_end) == b':'); - self.host_end as usize + ":".len() + count_digits(port) + self.host_end as usize + ":".len() + count_digits(port.into()) } else { self.host_end as usize }