use crate::{ bstr::Bstr, config::{DecoderConfig, HtpUnwanted}, log::Logger, parsers::{credentials, fragment, hostname, parse_hostport, path, port, query, scheme}, urlencoded::{decode_uri_inplace, decode_uri_with_flags, path_decode_uri_inplace}, utf8_decoder::decode_and_validate_inplace, util::{convert_port, FlagOperations, HtpFlags}, }; use nom::{combinator::opt, sequence::tuple}; /// URI structure. Each of the fields provides access to a single /// URI element. Where an element is not present in a URI, the /// corresponding field will be set to NULL or -1, depending on the /// field type. #[derive(Clone)] pub struct Uri { /// Decoder configuration pub(crate) cfg: DecoderConfig, /// Scheme, e.g., "http". pub(crate) scheme: Option, /// Username. pub(crate) username: Option, /// Password. pub(crate) password: Option, /// Hostname. pub(crate) hostname: Option, /// Port, as string. pub(crate) port: Option, /// Port, as number. This field will be None if there was /// no port information in the URI or the port information /// was invalid (e.g., it's not a number or it falls out of range. pub(crate) port_number: Option, /// The path part of this URI. pub(crate) path: Option, /// Query string. pub(crate) query: Option, /// Fragment identifier. This field will rarely be available in a server-side /// setting, but it's not impossible to see it. pub(crate) fragment: Option, } impl std::fmt::Debug for Uri { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("Uri") .field("scheme", &self.scheme) .field("username", &self.username) .field("password", &self.password) .field("hostname", &self.hostname) .field("port", &self.port) .field("port_number", &self.port_number) .field("path", &self.path) .field("query", &self.query) .field("fragment", &self.fragment) .finish() } } impl Default for Uri { /// Create an empty Uri struct. fn default() -> Self { Self { cfg: DecoderConfig::default(), scheme: None, username: None, password: None, hostname: None, port: None, port_number: None, path: None, query: None, fragment: None, } } } impl Uri { /// Create an empty Uri struct but with the given DecoderCfg pub(crate) fn with_config(cfg: DecoderConfig) -> Self { Self { cfg, scheme: None, username: None, password: None, hostname: None, port: None, port_number: None, path: None, query: None, fragment: None, } } /// Normalize uri scheme. pub(crate) fn normalized_scheme(&self) -> Option { if let Some(mut scheme) = self.scheme.clone() { scheme.make_ascii_lowercase(); Some(scheme) } else { None } } /// Normalize uri username. pub(crate) fn normalized_username(&self, flags: &mut u64) -> Option { if let Some(username) = self.username.as_ref() { decode_uri_with_flags(&self.cfg, flags, username.as_slice()).ok() } else { None } } /// Normalize uri password. pub(crate) fn normalized_password(&self, flags: &mut u64) -> Option { if let Some(password) = self.password.as_ref() { decode_uri_with_flags(&self.cfg, flags, password.as_slice()).ok() } else { None } } /// Normalize uri hostname. pub(crate) fn normalized_hostname(&self, flags: &mut u64) -> Option { if let Some(hostname) = self.hostname.as_ref() { let mut normalized_hostname = decode_uri_with_flags(&self.cfg, flags, hostname.as_slice()).ok()?; normalized_hostname.make_ascii_lowercase(); // Remove dots from the end of the string. while normalized_hostname.last() == Some(&(b'.')) { normalized_hostname.pop(); } Some(normalized_hostname) } else { None } } /// Normalize uri port. pub(crate) fn normalized_port(&self, flags: &mut u64) -> Option { if let Some(port) = self.port.as_ref() { let normalized_port = convert_port(port.as_slice()); if normalized_port.is_none() { // Failed to parse the port number. flags.set(HtpFlags::HOSTU_INVALID); } normalized_port } else { None } } /// Normalize uri fragment. pub(crate) fn normalized_fragment(&self, flags: &mut u64) -> Option { if let Some(fragment) = self.fragment.as_ref() { decode_uri_with_flags(&self.cfg, flags, fragment).ok() } else { None } } /// Normalize uri path. pub(crate) fn normalized_path( &self, flags: &mut u64, status: &mut HtpUnwanted, ) -> Option { if let Some(mut path) = self.path.clone() { // Decode URL-encoded (and %u-encoded) characters, as well as lowercase, // compress separators and convert backslashes. // Ignore result. path_decode_uri_inplace(&self.cfg, flags, status, &mut path); // Handle UTF-8 in the path. Validate it first, and only save it if cfg specifies it decode_and_validate_inplace(&self.cfg, flags, status, &mut path); // RFC normalization. normalize_uri_path_inplace(&mut path); Some(path) } else { None } } /// Parses request URI, making no attempt to validate the contents. /// /// It attempts, but is not guaranteed to successfully parse out a scheme, username, password, hostname, port, query, and fragment. /// Note: only attempts to extract a username, password, and hostname and subsequently port if it successfully parsed a scheme. pub(crate) fn parse_uri(&mut self, input: &[u8]) { let res = tuple(( opt(tuple(( scheme(), opt(credentials()), opt(tuple((hostname(), opt(port())))), ))), opt(path()), opt(query()), opt(fragment()), ))(input); if let Ok((_, (scheme_authority, path, query, fragment))) = res { if let Some(path) = path { self.path = Some(Bstr::from(path)); } if let Some(query) = query { self.query = Some(Bstr::from(query)); } if let Some(fragment) = fragment { self.fragment = Some(Bstr::from(fragment)); } if let Some((scheme, authority, hostname_port)) = scheme_authority { self.scheme = Some(Bstr::from(scheme)); if let Some((username, password)) = authority { self.username = Some(Bstr::from(username)); if let Some(password) = password { self.password = Some(Bstr::from(password)); } } if let Some((hostname, port)) = hostname_port { self.hostname = Some(Bstr::from(hostname)); if let Some(port) = port { self.port = Some(Bstr::from(port)); } } } } } /// Parses hostport provided in the URI. pub(crate) fn parse_uri_hostport(&mut self, hostport: &Bstr, flags: &mut u64) { if let Ok((_, (host, port_nmb, mut valid))) = parse_hostport(hostport) { let hostname = &host.to_ascii_lowercase(); self.hostname = Some(Bstr::from(hostname.as_slice())); if let Some((port, port_nmb)) = port_nmb { self.port = Some(Bstr::from(port)); if let Some(num) = port_nmb { self.port_number = Some(num); } else { valid = false; } } if !valid { flags.set(HtpFlags::HOSTU_INVALID) } } } /// Generate a normalized uri string. pub(crate) fn generate_normalized_uri( &self, mut logger: Option, ) -> (Option, Option) { // On the first pass determine the length of the final bstrs let mut partial_len = 0usize; let mut complete_len = 0usize; complete_len = complete_len.wrapping_add( self.scheme .as_ref() .map(|scheme| scheme.len() + 3) .unwrap_or(0), ); // '://' complete_len = complete_len.wrapping_add( self.username .as_ref() .map(|username| username.len()) .unwrap_or(0), ); complete_len = complete_len.wrapping_add( self.password .as_ref() .map(|password| password.len()) .unwrap_or(0), ); if self.username.is_some() || self.password.is_some() { complete_len = complete_len.wrapping_add(2); // ':' and '@' } complete_len = complete_len.wrapping_add( self.hostname .as_ref() .map(|hostname| hostname.len()) .unwrap_or(0), ); complete_len = complete_len.wrapping_add(self.port.as_ref().map(|port| port.len()).unwrap_or(0)); // ':' partial_len = partial_len.wrapping_add(self.path.as_ref().map(|path| path.len()).unwrap_or(0)); partial_len = partial_len.wrapping_add( self.query .as_ref() .map(|query| query.len() + 1) .unwrap_or(0), ); // ? partial_len = partial_len.wrapping_add( self.fragment .as_ref() .map(|fragment| fragment.len() + 1) .unwrap_or(0), ); // # complete_len = complete_len.wrapping_add(partial_len); // On the second pass construct the string let mut normalized_uri = Bstr::with_capacity(complete_len); let mut partial_normalized_uri = Bstr::with_capacity(partial_len); if let Some(scheme) = self.scheme.as_ref() { normalized_uri.add(scheme.as_slice()); normalized_uri.add("://"); } if self.username.is_some() || self.password.is_some() { if let Some(username) = self.username.as_ref() { normalized_uri.add(username.as_slice()); } normalized_uri.add(":"); if let Some(password) = self.password.as_ref() { normalized_uri.add(password.as_slice()); } normalized_uri.add("@"); } if let Some(hostname) = self.hostname.as_ref() { normalized_uri.add(hostname.as_slice()); } if let Some(port) = self.port.as_ref() { normalized_uri.add(":"); normalized_uri.add(port.as_slice()); } if let Some(mut path) = self.path.clone() { // Path is already decoded when we parsed the uri in transaction, only decode once more if self.cfg.double_decode_normalized_path { let path_len = path.len(); let _ = decode_uri_inplace(&self.cfg, &mut path); if path_len > path.len() { if let Some(logger) = logger.as_mut() { htp_warn!( logger, HtpLogCode::DOUBLE_ENCODED_URI, "URI path is double encoded" ); } } } partial_normalized_uri.add(path.as_slice()); } if let Some(mut query) = self.query.clone() { let _ = decode_uri_inplace(&self.cfg, &mut query); if self.cfg.double_decode_normalized_query { let query_len = query.len(); let _ = decode_uri_inplace(&self.cfg, &mut query); if query_len > query.len() { if let Some(logger) = logger.as_mut() { htp_warn!( logger, HtpLogCode::DOUBLE_ENCODED_URI, "URI query is double encoded" ); } } } partial_normalized_uri.add("?"); partial_normalized_uri.add(query.as_slice()); } if let Some(fragment) = self.fragment.as_ref() { partial_normalized_uri.add("#"); partial_normalized_uri.add(fragment.as_slice()); } normalized_uri.add(partial_normalized_uri.as_slice()); if !normalized_uri.is_empty() { if !partial_normalized_uri.is_empty() { (Some(partial_normalized_uri), Some(normalized_uri)) } else { (None, Some(normalized_uri)) } } else { (None, None) } } } enum NormUriState { Start, DotStart, TwoDotStart, Slash, SlashDot, SlashDotDot, SlashDotDotSlash, Regular, } /// Normalize URI path in place. This function implements the remove dot segments algorithm /// specified in RFC 3986, section 5.2.4. fn normalize_uri_path_inplace(s: &mut Bstr) { let mut state = NormUriState::Start; let slen = s.len(); let mut w = 0; for i in 0..slen { let c = s[i]; match state { NormUriState::Start => match c { b'.' => { state = NormUriState::DotStart; } b'/' => { state = NormUriState::Slash; } _ => { s[w] = c; w += 1; state = NormUriState::Regular; } }, NormUriState::DotStart => { match c { b'.' => { state = NormUriState::TwoDotStart; } b'/' => { // If the input buffer begins with a prefix of "./", then remove that prefix state = NormUriState::Start; } _ => { s[w] = b'.'; w += 1; s[w] = c; w += 1; state = NormUriState::Regular; } } } NormUriState::TwoDotStart => { match c { b'/' => { // If the input buffer begins with a prefix of "../", then remove that prefix state = NormUriState::Start; } _ => { s[w] = b'.'; w += 1; s[w] = b'.'; w += 1; s[w] = c; w += 1; state = NormUriState::Regular; } } } NormUriState::Slash | NormUriState::SlashDotDotSlash => match c { b'.' => { state = NormUriState::SlashDot; } _ => { s[w] = b'/'; w += 1; s[w] = c; w += 1; state = NormUriState::Regular; } }, NormUriState::SlashDot => match c { b'/' => { // /./ turns into / state = NormUriState::SlashDotDotSlash; } b'.' => { state = NormUriState::SlashDotDot; } _ => { s[w] = b'/'; w += 1; s[w] = b'.'; w += 1; s[w] = c; w += 1; state = NormUriState::Regular; } }, NormUriState::SlashDotDot => match c { b'/' => { while w > 0 && s[w - 1] != b'/' { w -= 1; } w = w.saturating_sub(1); state = NormUriState::SlashDotDotSlash; } _ => { s[w] = b'/'; w += 1; s[w] = b'.'; w += 1; s[w] = b'.'; w += 1; s[w] = c; w += 1; state = NormUriState::Regular; } }, NormUriState::Regular => match c { b'/' => { state = NormUriState::Slash; } _ => { s[w] = c; w += 1; } }, } } match state { NormUriState::Slash => { s[w] = b'/'; w += 1; } // if the input buffer consists only of "." or "..", then remove that NormUriState::DotStart | NormUriState::TwoDotStart => {} // if the input buffer ends with "/." or "/..", remove that NormUriState::SlashDot => {} // we already erased the previous part, and do not add the trailing slash NormUriState::SlashDotDotSlash => {} NormUriState::SlashDotDot => { while w > 0 && s[w - 1] != b'/' { w -= 1; } w = w.saturating_sub(1); } // nothing special to do NormUriState::Start | NormUriState::Regular => {} } s.truncate(w); } //Tests #[cfg(test)] mod test { use super::*; use rstest::rstest; #[rstest] #[case::no_port(b"http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag", Some("http://user:pass@www.example.com:1234/path1/path2?a=b&c=d#frag"), Some("/path1/path2?a=b&c=d#frag"), Uri { cfg: DecoderConfig::default(), scheme: Some(Bstr::from("http")), username: Some(Bstr::from("user")), password: Some(Bstr::from("pass")), hostname: Some(Bstr::from("www.example.com")), port: Some(Bstr::from("1234")), port_number: None, path: Some(Bstr::from("/path1/path2")), query: Some(Bstr::from("a=b&c=d")), fragment: Some(Bstr::from("frag")), })] #[case::scheme_hostname_path(b"http://host.com/path", Some("http://host.com/path"), Some("/path"), Uri { cfg: DecoderConfig::default(), scheme: Some(Bstr::from("http")), username: None, password: None, hostname: Some(Bstr::from("host.com")), port: None, port_number: None, path: Some(Bstr::from("/path")), query: None, fragment: None, })] #[case::scheme_hostname(b"http://host.com", Some("http://host.com"), None, Uri { cfg: DecoderConfig::default(), scheme: Some(Bstr::from("http")), username: None, password: None, hostname: Some(Bstr::from("host.com")), port: None, port_number: None, path: None, query: None, fragment: None, })] #[case::scheme_path(b"http://", Some("http:////"), Some("//"), Uri { cfg: DecoderConfig::default(), scheme: Some(Bstr::from("http")), username: None, password: None, hostname: None, port: None, port_number: None, path: Some(Bstr::from("//")), query: None, fragment: None, })] #[case::path(b"/path", Some("/path"), Some("/path"), Uri { cfg: DecoderConfig::default(), scheme: None, username: None, password: None, hostname: None, port: None, port_number: None, path: Some(Bstr::from("/path")), query: None, fragment: None, })] #[case::empty_scheme_path(b"://", Some(":////"), Some("//"), Uri { cfg: DecoderConfig::default(), scheme: Some(Bstr::from("")), username: None, password: None, hostname: None, port: None, port_number: None, path: Some(Bstr::from("//")), query: None, fragment: None, })] #[case::empty(b"", None, None, Uri::default())] #[case::scheme_user_host(b"http://user@host.com", Some("http://user:@host.com"), None, Uri { cfg: DecoderConfig::default(), scheme: Some(Bstr::from("http")), username: Some(Bstr::from("user")), password: None, hostname: Some(Bstr::from("host.com")), port: None, port_number: None, path: None, query: None, fragment: None, })] fn test_parse_uri( #[case] input: &[u8], #[case] expected_normalized: Option<&str>, #[case] expected_partial: Option<&str>, #[case] expected: Uri, ) { let mut uri = Uri::default(); uri.parse_uri(input); assert_eq!(uri.scheme, expected.scheme); assert_eq!(uri.username, expected.username); assert_eq!(uri.password, expected.password); assert_eq!(uri.hostname, expected.hostname); assert_eq!(uri.port, expected.port); assert_eq!(uri.path, expected.path); assert_eq!(uri.query, expected.query); assert_eq!(uri.fragment, expected.fragment); assert_eq!( uri.generate_normalized_uri(None), ( expected_partial.map(Bstr::from), expected_normalized.map(Bstr::from) ) ); } #[rstest] #[case(b"/a/b/c/./../../g", b"/a/g")] #[case(b"mid/content=5/../6", b"mid/6")] #[case(b"./one", b"one")] #[case(b"../one", b"one")] #[case(b".", b"")] #[case(b"..", b"")] #[case(b"one/.", b"one")] #[case(b"one/..", b"")] #[case(b"/", b"/")] #[case(b"one/../", b"")] #[case(b"/../../../images.gif", b"/images.gif")] fn test_normalize_uri_path(#[case] input: &[u8], #[case] expected: &[u8]) { let mut s = Bstr::from(input); normalize_uri_path_inplace(&mut s); assert!(s.eq_slice(expected)) } }