use crate::{cursor::Cursor, fast_skip::fast_skip}; use bstr::ByteSlice; use fxhash::FxHashSet; use tracing::trace; #[derive(Debug, PartialEq, Eq, Clone)] pub enum ParseAction<'a> { Consume, Skip, RestartAt(usize), SingleCandidate(&'a [u8]), MultipleCandidates(Vec<&'a [u8]>), Done, } #[derive(Debug, PartialEq, Eq, Clone)] pub enum Bracketing<'a> { Included(&'a [u8]), Wrapped(&'a [u8]), None, } #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub struct SplitCandidate<'a> { variant: &'a [u8], utility: &'a [u8], } #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum ValidationResult { Invalid, Valid, Restart, } #[derive(Default)] pub struct ExtractorOptions { pub preserve_spaces_in_arbitrary: bool, } #[derive(Debug, PartialEq, Eq, Clone)] enum Arbitrary { /// Not inside any arbitrary value None, /// In arbitrary value mode with square brackets /// /// E.g.: `bg-[…]` /// ^ Brackets { start_idx: usize }, /// In arbitrary value mode with parens /// /// E.g.: `bg-(…)` /// ^ Parens { start_idx: usize }, } pub struct Extractor<'a> { opts: ExtractorOptions, input: &'a [u8], cursor: Cursor<'a>, idx_start: usize, idx_end: usize, idx_last: usize, arbitrary: Arbitrary, in_candidate: bool, in_escape: bool, discard_next: bool, quote_stack: Vec, bracket_stack: Vec, } impl<'a> Extractor<'a> { pub fn all(input: &'a [u8], opts: ExtractorOptions) -> Vec<&'a [u8]> { Self::new(input, opts).flatten().collect() } pub fn unique(input: &'a [u8], opts: ExtractorOptions) -> FxHashSet<&'a [u8]> { let mut candidates: FxHashSet<&[u8]> = Default::default(); candidates.reserve(100); candidates.extend(Self::new(input, opts).flatten()); candidates } pub fn unique_ord(input: &'a [u8], opts: ExtractorOptions) -> Vec<&'a [u8]> { // This is an inefficient way to get an ordered, unique // list as a Vec but it is only meant for testing. let mut candidates = Self::all(input, opts); let mut unique_list = FxHashSet::default(); unique_list.reserve(candidates.len()); candidates.retain(|c| unique_list.insert(*c)); candidates } pub fn with_positions(input: &'a [u8], opts: ExtractorOptions) -> Vec<(&'a [u8], usize)> { let mut result = Vec::new(); let extractor = Self::new(input, opts).flatten(); for item in extractor { // Since the items are slices of the input buffer, we can calculate the start index // by doing some pointer arithmetics. let start_index = item.as_ptr() as usize - input.as_ptr() as usize; result.push((item, start_index)); } result } } impl<'a> Extractor<'a> { pub fn new(input: &'a [u8], opts: ExtractorOptions) -> Self { Self { opts, input, cursor: Cursor::new(input), idx_start: 0, idx_end: 0, arbitrary: Arbitrary::None, in_candidate: false, in_escape: false, discard_next: false, idx_last: input.len(), quote_stack: Vec::with_capacity(8), bracket_stack: Vec::with_capacity(8), } } } /// Helpers impl<'a> Extractor<'a> { #[inline(always)] fn in_quotes(&self) -> bool { !self.quote_stack.is_empty() } #[inline(always)] fn get_current_candidate(&mut self) -> ParseAction<'a> { if self.discard_next { return ParseAction::Skip; } let mut candidate = &self.input[self.idx_start..=self.idx_end]; // The bracket stack is not empty, which means that we are dealing with unbalanced // brackets. if !self.bracket_stack.is_empty() { return ParseAction::Skip; } while !candidate.is_empty() { match Extractor::is_valid_candidate_string(candidate) { ValidationResult::Valid => return ParseAction::SingleCandidate(candidate), ValidationResult::Restart => return ParseAction::RestartAt(self.idx_start + 1), _ => {} } match candidate.split_last() { // At this point the candidate is technically invalid, however it can be that it // has a few dangling characters attached to it. For example, think about a // JavaScript object: // // ```js // { underline: true } // ``` // // The candidate at this point will be `underline:`, which is invalid. However, we // can assume in this case that the `:` should not be there, and therefore we can // try to slice it off and retry the validation. Some((b':' | b'/' | b'.', head)) => { candidate = head; } // It could also be that we have the candidate is nested inside of bracket or quote // pairs. In this case we want to retrieve the inner part and try to validate that // inner part instead. For example, in a JavaScript array: // // ```js // let myClasses = ["underline"] // ``` // // The `underline` is nested inside of quotes and in square brackets. Let's try to // get the inner part and validate that instead. _ => match Self::slice_surrounding(candidate) { Some(shorter) if shorter != candidate => { candidate = shorter; } _ => break, }, } } ParseAction::Consume } #[inline(always)] fn split_candidate(candidate: &'a [u8]) -> SplitCandidate { let mut brackets = 0; let mut idx_end = 0; for (n, c) in candidate.iter().enumerate() { match c { b'[' => brackets += 1, b']' if brackets > 0 => brackets -= 1, b':' if brackets == 0 => idx_end = n + 1, _ => {} } } SplitCandidate { variant: &candidate[0..idx_end], utility: &candidate[idx_end..], } } #[inline(always)] fn contains_in_constrained(candidate: &'a [u8], bytes: Vec) -> bool { let mut brackets = 0; for c in candidate { match c { b'[' => brackets += 1, b']' if brackets > 0 => brackets -= 1, _ if brackets == 0 && bytes.contains(c) => return true, _ => {} } } false } #[inline(always)] fn is_valid_candidate_string(candidate: &'a [u8]) -> ValidationResult { // Reject candidates that start with a capital letter if candidate[0].is_ascii_uppercase() { return ValidationResult::Invalid; } // Rejects candidates that end with "-" or "_" if candidate.ends_with(b"-") || candidate.ends_with(b"_") { return ValidationResult::Invalid; } // Reject candidates that are single camelCase words, e.g.: `useEffect` if candidate.iter().all(|c| c.is_ascii_alphanumeric()) && candidate.iter().any(|c| c.is_ascii_uppercase()) { return ValidationResult::Invalid; } // Reject candidates that look like SVG path data, e.g.: `m32.368 m7.5` if !candidate.contains(&b'-') && !candidate.contains(&b':') && candidate.iter().any(|c| c == &b'.') { return ValidationResult::Invalid; } // Reject candidates that look like version constraints or email addresses, e.g.: `next@latest`, `bob@example.com` if candidate .iter() .all(|c| c.is_ascii_alphanumeric() || c == &b'.' || c == &b'-' || c == &b'@') && candidate[1..].contains(&b'@') { return ValidationResult::Invalid; } // Reject candidates that look like URLs if candidate.starts_with(b"http://") || candidate.starts_with(b"https://") { return ValidationResult::Invalid; } // Reject candidates that look short markdown links, e.g.: `[https://example.com]` if candidate.starts_with(b"[http://") || candidate.starts_with(b"[https://") { return ValidationResult::Invalid; } // Reject candidates that look like imports with path aliases, e.g.: `@/components/button` if candidate.len() > 1 && candidate[1] == b'/' { return ValidationResult::Invalid; } // Reject candidates that look like paths, e.g.: `app/assets/stylesheets` if !candidate.contains(&b':') && !candidate.contains(&b'[') { let mut count = 0; for c in candidate { if c == &b'/' { count += 1; } if count > 1 { return ValidationResult::Invalid; } } } let split_candidate = Extractor::split_candidate(candidate); let mut offset = 0; let mut offset_end = 0; let utility = &split_candidate.utility; let original_utility = &utility; // Some special cases that we can ignore while validating if utility.starts_with(b"!-") { offset += 2; } else if utility.starts_with(b"!") || utility.starts_with(b"-") { offset += 1; } else if utility.ends_with(b"!") { offset_end += 1; } // These are allowed in arbitrary values and in variants but nowhere else if Extractor::contains_in_constrained(utility, vec![b'<', b'>']) { return ValidationResult::Restart; } // It's an arbitrary property if utility.starts_with(b"[") && utility.ends_with(b"]") && (utility.starts_with(b"['") || utility.starts_with(b"[\"") || utility.starts_with(b"[`")) { return ValidationResult::Restart; } // Pluck out the part that we are interested in. let utility = &utility[offset..(utility.len() - offset_end)]; // Validations // We should have _something_ if utility.is_empty() { return ValidationResult::Invalid; } // = b'0' && utility[0] <= b'9' && !utility.contains(&b':') { return ValidationResult::Invalid; } // In case of an arbitrary property, we should have at least this structure: [a:b] if utility.starts_with(b"[") && utility.ends_with(b"]") { // [a:b] is at least 5 characters long if utility.len() < 5 { return ValidationResult::Invalid; } // Now that we validated that the candidate is technically fine, let's ensure that it // doesn't start with a `-` because that would make it invalid for arbitrary properties. if original_utility.starts_with(b"-") || original_utility.starts_with(b"!-") { return ValidationResult::Invalid; } // Make sure an arbitrary property/value pair is valid, otherwise // we may generate invalid CSS that will cause tools like PostCSS // to crash when trying to parse the generated CSS. if !Self::validate_arbitrary_property(utility) { return ValidationResult::Invalid; } // The ':` must be preceded by a-Z0-9 because it represents a property name. // SAFETY: the Self::validate_arbitrary_property function from above validates that the // `:` exists. let colon = utility.find(":").unwrap(); if !utility .chars() .nth(colon - 1) .map_or_else(|| false, |c| c.is_ascii_alphanumeric()) { return ValidationResult::Invalid; } let property = &utility[1..colon]; // The property must match /^[a-zA-Z-][a-zA-Z0-9-_]+$/ if !property[0].is_ascii_alphabetic() && property[0] != b'-' { return ValidationResult::Invalid; } if !property .iter() .all(|c| c.is_ascii_alphanumeric() || c == &b'-' || c == &b'_') { return ValidationResult::Invalid; } } ValidationResult::Valid } /** * Make sure an arbitrary property/value pair is valid, otherwise * PostCSS may crash when trying to parse the generated CSS. * * `input` - the full candidate string, including the brackets */ fn validate_arbitrary_property(candidate: &[u8]) -> bool { if !candidate.starts_with(b"[") || !candidate.ends_with(b"]") { return false; } let property = &candidate[1..candidate.len() - 1]; let is_custom_property = property.starts_with(b"--"); let Some(colon_pos) = property.find(b":") else { return false; }; if is_custom_property { return true; } let mut stack = vec![]; let mut iter = property[colon_pos + 1..].iter(); while let Some(c) = iter.next() { match c { // The value portion cannot contain unquoted colons. // E.g. `[foo::bar]` leads to "foo::bar; which errors because of the `:`. b':' | b'{' | b'}' if stack.is_empty() => { return false; } b'\'' => { if let Some(b'\'') = stack.last() { _ = stack.pop() } else { stack.push(b'\'') } } b'"' => { if let Some(b'"') = stack.last() { _ = stack.pop() } else { stack.push(b'"') } } // Skip escaped characters. b'\\' => { iter.next(); } _ => {} } } true } #[inline(always)] fn parse_escaped(&mut self) -> ParseAction<'a> { // If this character is escaped, we don't care about it. // It gets consumed. trace!("Escape::Consume"); self.in_escape = false; ParseAction::Consume } #[inline(always)] fn parse_arbitrary(&mut self) -> ParseAction<'a> { // In this we could technically use memchr 6 times (then looped) to find the indexes / bounds of arbitrary values if self.in_escape { return self.parse_escaped(); } match self.cursor.curr { b'\\' => { // The `\` character is used to escape characters in arbitrary content _and_ to prevent the starting of arbitrary content trace!("Arbitrary::Escape"); self.in_escape = true; } b'(' => self.bracket_stack.push(self.cursor.curr), b')' => match self.bracket_stack.last() { Some(&b'(') => { self.bracket_stack.pop(); } // This is the last bracket meaning the end of arbitrary content _ if !self.in_quotes() => { if matches!(self.cursor.next, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') { return ParseAction::Consume; } if let Arbitrary::Parens { start_idx } = self.arbitrary { trace!("Arbitrary::End\t"); self.arbitrary = Arbitrary::None; if self.cursor.pos - start_idx == 1 { // We have an empty arbitrary value, which is not allowed return ParseAction::Skip; } // We have a valid arbitrary value return ParseAction::Consume; } // Last parenthesis is different compared to what we expect, therefore we are // not in a valid arbitrary value. return ParseAction::Skip; } // We're probably in quotes or nested brackets, so we keep going _ => {} }, // Make sure the brackets are balanced b'[' => self.bracket_stack.push(self.cursor.curr), b']' => match self.bracket_stack.last() { // We've ended a nested bracket Some(&b'[') => { self.bracket_stack.pop(); } // This is the last bracket meaning the end of arbitrary content _ if !self.in_quotes() => { if matches!(self.cursor.next, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') { return ParseAction::Consume; } if let Arbitrary::Brackets { start_idx: _ } = self.arbitrary { trace!("Arbitrary::End\t"); self.arbitrary = Arbitrary::None; // TODO: This is temporarily disabled such that the upgrade tool can work // with legacy arbitrary values. This will be re-enabled in the future (or // with a flag) // if self.cursor.pos - start_idx == 1 { // // We have an empty arbitrary value, which is not allowed // return ParseAction::Skip; // } } } // We're probably in quotes or nested brackets, so we keep going _ => {} }, // Arbitrary values sometimes contain quotes // These can "escape" the arbitrary value mode // switching of `[` and `]` characters b'"' | b'\'' | b'`' => match self.quote_stack.last() { Some(&last_quote) if last_quote == self.cursor.curr => { trace!("Quote::End\t"); self.quote_stack.pop(); } _ => { trace!("Quote::Start\t"); self.quote_stack.push(self.cursor.curr); } }, c if c.is_ascii_whitespace() && !self.opts.preserve_spaces_in_arbitrary => { trace!("Arbitrary::SkipAndEndEarly\t"); if let Arbitrary::Brackets { start_idx } | Arbitrary::Parens { start_idx } = self.arbitrary { // Restart the parser ahead of the arbitrary value It may pick up more // candidates return ParseAction::RestartAt(start_idx + 1); } } // Arbitrary values allow any character inside them // Except spaces unless you are in loose mode _ => { trace!("Arbitrary::Consume\t"); // No need to move the end index because either the arbitrary value will end properly OR we'll hit invalid characters } } ParseAction::Consume } #[inline(always)] fn parse_start(&mut self) -> ParseAction<'a> { match self.cursor.curr { // Enter arbitrary property mode b'[' if self.cursor.prev != b'\\' => { trace!("Arbitrary::Start\t"); self.arbitrary = Arbitrary::Brackets { start_idx: self.cursor.pos, }; ParseAction::Consume } // Allowed first characters. b'@' | b'!' | b'-' | b'<' | b'>' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'*' => { // TODO: A bunch of characters that we currently support but maybe we only want it behind // a flag. E.g.: ` ParseAction::Skip, } } #[inline(always)] fn parse_continue(&mut self) -> ParseAction<'a> { match self.cursor.curr { // Enter arbitrary value mode. E.g.: `bg-[rgba(0, 0, 0)]` // ^ b'[' if matches!(self.cursor.prev, b'@' | b'-' | b':' | b'/' | b'!' | b'\0') || self.cursor.prev.is_ascii_whitespace() => { trace!("Arbitrary::Start\t"); self.arbitrary = Arbitrary::Brackets { start_idx: self.cursor.pos, }; } // Enter arbitrary value mode. E.g.: `bg-(--my-color)` // ^ b'(' if matches!(self.cursor.prev, b'-' | b'/') => { trace!("Arbitrary::Start\t"); self.arbitrary = Arbitrary::Parens { start_idx: self.cursor.pos, }; } // Can't enter arbitrary value mode. This can't be a candidate. b'[' | b'(' => { trace!("Arbitrary::Skip_Start\t"); return ParseAction::Skip; } // A % can only appear at the end of the candidate itself. It can also only be after a // digit 0-9. This covers the following cases: // - from-15% b'%' if self.cursor.prev.is_ascii_digit() => { return match (self.cursor.at_end, self.cursor.next) { // End of string == end of candidate == okay (true, _) => ParseAction::Consume, // Looks like the end of a candidate == okay (_, b'\'' | b'"' | b'`') => ParseAction::Consume, (_, c) if c.is_ascii_whitespace() => ParseAction::Consume, // Otherwise, not a valid character in a candidate _ => ParseAction::Skip, }; } b'%' => return ParseAction::Skip, // < and > can only be part of a variant and only be the first or last character b'<' | b'>' | b'*' => { // Can only be the first or last character // // E.g.: // // - :underline // ^ if self.cursor.pos == self.idx_start || self.cursor.pos == self.idx_last { trace!("Candidate::Consume\t"); } // If it is in the middle, it can only be part of a stacked variant // - dark::underline // ^ else if self.cursor.prev == b':' || self.cursor.next == b':' { trace!("Candidate::Consume\t"); } else { return ParseAction::Skip; } } // Allowed characters in the candidate itself // None of these can come after a closing bracket `]` b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_' | b'@' if self.cursor.prev != b']' => { /* TODO: The `b'@'` is necessary for custom separators like _@, maybe we can handle this in a better way... */ trace!("Candidate::Consume\t"); } // A dot (.) can only appear in the candidate itself (not the arbitrary part), if the previous // and next characters are both digits. This covers the following cases: // - p-1.5 b'.' if self.cursor.prev.is_ascii_digit() => match self.cursor.next { next if next.is_ascii_digit() => { trace!("Candidate::Consume\t"); } _ => return ParseAction::Skip, }, // Allowed characters in the candidate itself // These MUST NOT appear at the end of the candidate b'/' | b':' if !self.cursor.at_end => { trace!("Candidate::Consume\t"); } // The important character `!`, is allowed at the end of the candidate b'!' => { trace!("Candidate::Consume\t"); } _ => return ParseAction::Skip, } ParseAction::Consume } #[inline(always)] fn can_be_candidate(&mut self) -> bool { self.in_candidate && matches!(self.arbitrary, Arbitrary::None) && (0..=127).contains(&self.cursor.curr) && (self.idx_start == 0 || self.input[self.idx_start - 1] <= 127) } #[inline(always)] fn handle_skip(&mut self) { // In all other cases, we skip characters and reset everything so we can make new candidates trace!("Characters::Skip\t"); self.idx_start = self.cursor.pos; self.idx_end = self.cursor.pos; self.in_candidate = false; self.arbitrary = Arbitrary::None; self.in_escape = false; } #[inline(always)] fn parse_char(&mut self) -> ParseAction<'a> { if !matches!(self.arbitrary, Arbitrary::None) { self.parse_arbitrary() } else if self.in_candidate { self.parse_continue() } else if self.parse_start() == ParseAction::Consume { self.in_candidate = true; self.idx_start = self.cursor.pos; self.idx_end = self.cursor.pos; ParseAction::Consume } else { ParseAction::Skip } } #[inline(always)] fn yield_candidate(&mut self) -> ParseAction<'a> { if self.can_be_candidate() { self.get_current_candidate() } else { ParseAction::Consume } } #[inline(always)] fn restart(&mut self, pos: usize) { trace!("Parser::Restart\t{}", pos); self.idx_start = pos; self.idx_end = pos; self.arbitrary = Arbitrary::None; self.in_candidate = false; self.in_escape = false; self.discard_next = false; self.quote_stack.clear(); self.bracket_stack.clear(); self.cursor.move_to(pos); } #[inline(always)] fn without_surrounding(&self) -> Bracketing<'a> { let range = self.idx_start..=self.idx_end; let clipped = &self.input[range]; Self::slice_surrounding(clipped) .map(Bracketing::Included) .or_else(|| { if self.idx_start == 0 || self.idx_end + 1 == self.idx_last { return None; } let range = self.idx_start - 1..=self.idx_end + 1; let clipped = &self.input[range]; Self::slice_surrounding(clipped).map(Bracketing::Wrapped) }) .unwrap_or(Bracketing::None) } #[inline(always)] fn is_balanced(input: &[u8]) -> bool { let mut depth = 0isize; for n in input { match n { b'[' | b'{' | b'(' => depth += 1, b']' | b'}' | b')' => depth -= 1, _ => continue, } if depth < 0 { return false; } } depth == 0 } #[inline(always)] fn slice_surrounding(input: &[u8]) -> Option<&[u8]> { let mut prev = None; let mut input = input; loop { let leading = input.first().unwrap_or(&0x00); let trailing = input.last().unwrap_or(&0x00); let needed = matches!( (leading, trailing), (b'(', b')') | (b'{', b'}') | (b'[', b']') | (b'"', b'"') | (b'`', b'`') | (b'\'', b'\'') ); if needed { prev = Some(input); input = &input[1..input.len() - 1]; continue; } if Self::is_balanced(input) && prev.is_some() { return Some(input); } return prev; } } #[inline(always)] fn parse_and_yield(&mut self) -> ParseAction<'a> { trace!("Cursor {}", self.cursor); // Fast skipping of invalid characters let can_skip_whitespace = false; // if self.opts.preserve_spaces_in_arbitrary { !self.in_arbitrary } else { true }; if can_skip_whitespace { if let Some(pos) = fast_skip(&self.cursor) { trace!("FastSkip::Restart\t{}", pos); return ParseAction::RestartAt(pos); } } let action = self.parse_char(); match action { ParseAction::RestartAt(_) => return action, ParseAction::Consume => { self.idx_end = self.cursor.pos; // If we're still consuming characters, we keep going // Only exception is if we've hit the end of the input if !self.cursor.at_end { return action; } } _ => {} } let action = self.yield_candidate(); match (&action, self.cursor.curr) { (ParseAction::RestartAt(_), _) => action, (_, 0x00) => ParseAction::Done, (ParseAction::SingleCandidate(candidate), _) => self.generate_slices(candidate), _ => ParseAction::RestartAt(self.cursor.pos + 1), } } /// Peek inside `[]`, `{}`, and `()` pairs /// to look for an additional candidate #[inline(always)] fn generate_slices(&mut self, candidate: &'a [u8]) -> ParseAction<'a> { match self.without_surrounding() { Bracketing::None => ParseAction::SingleCandidate(candidate), Bracketing::Included(sliceable) if sliceable == candidate => { ParseAction::SingleCandidate(candidate) } Bracketing::Included(sliceable) | Bracketing::Wrapped(sliceable) => { if candidate == sliceable { ParseAction::SingleCandidate(candidate) } else { let parts = vec![candidate, sliceable]; let parts = parts .into_iter() .filter(|v| !v.is_empty()) .collect::>(); ParseAction::MultipleCandidates(parts) } } } } } impl<'a> Iterator for Extractor<'a> { type Item = Vec<&'a [u8]>; fn next(&mut self) -> Option { if self.cursor.at_end { return None; } loop { let result = self.parse_and_yield(); // Cursor control match result { ParseAction::RestartAt(pos) => self.restart(pos), _ => self.cursor.advance_by(1), } // Candidate state control match result { ParseAction::SingleCandidate(_) => self.handle_skip(), ParseAction::MultipleCandidates(_) => self.handle_skip(), _ => {} } // Iterator results return match result { ParseAction::SingleCandidate(candidate) => Some(vec![candidate]), ParseAction::MultipleCandidates(candidates) => Some(candidates), ParseAction::Done => None, _ => continue, }; } } } #[cfg(test)] mod test { use super::*; fn _please_trace() { tracing_subscriber::fmt() .with_max_level(tracing::Level::TRACE) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) .compact() .init(); } fn run(input: &str, loose: bool) -> Vec<&str> { Extractor::unique_ord( input.as_bytes(), ExtractorOptions { preserve_spaces_in_arbitrary: loose, }, ) .into_iter() .map(|s| unsafe { std::str::from_utf8_unchecked(s) }) .collect() } #[test] fn it_can_parse_simple_candidates() { let candidates = run("underline", false); assert_eq!(candidates, vec!["underline"]); } #[test] fn it_can_parse_multiple_simple_utilities() { let candidates = run("font-bold underline", false); assert_eq!(candidates, vec!["font-bold", "underline"]); } #[test] fn it_can_parse_simple_candidates_with_variants() { let candidates = run("hover:underline", false); assert_eq!(candidates, vec!["hover:underline"]); } #[test] fn it_can_parse_start_variants() { let candidates = run("*:underline", false); assert_eq!(candidates, vec!["*:underline"]); let candidates = run("hover:*:underline", false); assert_eq!(candidates, vec!["hover:*:underline"]); } #[test] fn it_can_parse_simple_candidates_with_stacked_variants() { let candidates = run("focus:hover:underline", false); assert_eq!(candidates, vec!["focus:hover:underline"]); } #[test] fn it_can_parse_utilities_with_arbitrary_values() { let candidates = run("m-[2px]", false); assert_eq!(candidates, vec!["m-[2px]"]); } #[test] fn it_can_parse_utilities_with_arbitrary_var_shorthand() { let candidates = run("m-(--my-var)", false); assert_eq!(candidates, vec!["m-(--my-var)"]); } #[test] fn it_can_parse_utilities_with_arbitrary_var_shorthand_as_modifier() { let candidates = run("bg-(--my-color)/(--my-opacity)", false); assert_eq!(candidates, vec!["bg-(--my-color)/(--my-opacity)"]); } #[test] fn it_throws_away_arbitrary_values_that_are_unbalanced() { let candidates = run("m-[calc(100px*2]", false); assert!(candidates.is_empty()); } #[test] fn it_can_parse_utilities_with_arbitrary_values_and_variants() { let candidates = run("hover:m-[2px]", false); assert_eq!(candidates, vec!["hover:m-[2px]"]); } #[test] fn it_can_parse_arbitrary_variants() { let candidates = run("[@media(min-width:200px)]:underline", false); assert_eq!(candidates, vec!["[@media(min-width:200px)]:underline"]); } #[test] fn it_can_parse_matched_variants() { let candidates = run("group-[&:hover]:underline", false); assert_eq!(candidates, vec!["group-[&:hover]:underline"]); } #[test] fn it_should_not_keep_spaces() { let candidates = run("bg-[rgba(0, 0, 0)]", false); assert_eq!(candidates, vec!["rgba"]); } #[test] fn it_should_keep_spaces_in_loose_mode() { let candidates = run("bg-[rgba(0, 0, 0)]", true); assert_eq!(candidates, vec!["bg-[rgba(0, 0, 0)]"]); } #[test] fn it_should_keep_important_arbitrary_properties_legacy() { let candidates = run("![foo:bar]", false); assert_eq!(candidates, vec!["![foo:bar]"]); } #[test] fn it_should_keep_important_arbitrary_properties() { let candidates = run("[foo:bar]!", false); assert_eq!(candidates, vec!["[foo:bar]!"]); } #[test] fn it_should_keep_important_arbitrary_values() { let candidates = run("w-[calc(var(--size)/2)]!", false); assert_eq!(candidates, vec!["w-[calc(var(--size)/2)]!"]); } #[test] fn it_should_keep_important_candidates_legacy() { let candidates = run("!w-4", false); assert_eq!(candidates, vec!["!w-4"]); } #[test] fn it_should_keep_important_candidates() { let candidates = run("w-4!", false); assert_eq!(candidates, vec!["w-4!"]); } #[test] fn it_should_not_allow_for_bogus_candidates() { let candidates = run("[0]", false); assert!(candidates.is_empty()); let candidates = run("[something]", false); assert_eq!(candidates, vec!["something"]); let candidates = run(" [feature(slice_as_chunks)]", false); assert_eq!(candidates, vec!["feature(slice_as_chunks)"]); let candidates = run("![feature(slice_as_chunks)]", false); assert!(candidates.is_empty()); let candidates = run("-[feature(slice_as_chunks)]", false); assert!(candidates.is_empty()); let candidates = run("!-[feature(slice_as_chunks)]", false); assert!(candidates.is_empty()); let candidates = run("-[foo:bar]", false); assert!(candidates.is_empty()); let candidates = run("!-[foo:bar]", false); assert!(candidates.is_empty()); } #[test] fn it_should_keep_candidates_with_brackets_in_arbitrary_values_inside_quotes() { let candidates = run("content-['hello_[_]_world']", false); assert_eq!(candidates, vec!["content-['hello_[_]_world']"]); } #[test] fn it_should_ignore_leading_spaces() { let candidates = run(" backdrop-filter-none", false); assert_eq!(candidates, vec!["backdrop-filter-none"]); } #[test] fn it_should_ignore_trailing_spaces() { let candidates = run("backdrop-filter-none ", false); assert_eq!(candidates, vec!["backdrop-filter-none"]); } #[test] fn it_should_keep_classes_before_an_ending_newline() { let candidates = run("backdrop-filter-none\n", false); assert_eq!(candidates, vec!["backdrop-filter-none"]); } #[test] fn it_should_parse_out_the_correct_classes_from_tailwind_tests() { // From: tests/arbitrary-variants.test.js let candidates = run( r#"
"#, false, ); // TODO: it should not include additional (separate) classes: class, hover:, foo: bar, underline // TODO: Double check the expectations based on above information assert_eq!( candidates, vec![ "div", "class", r#"dark:lg:hover:[&>*]:underline"#, r#"[&_.foo\_\_bar]:hover:underline"#, r#"hover:[&_.foo\_\_bar]:underline"# ] ); } #[test] fn potential_candidates_are_skipped_when_hitting_impossible_characters() { let candidates = run("

A new software update is available. See what’s new in version 2.0.4.

", false); assert_eq!( candidates, vec![ "p", "class", "text-sm", "text-blue-700", // "A", // Uppercase first letter is not allowed "new", "software", "update", "is", "available", // "See", // Uppercase first letter is not allowed // "what", // what is dropped because it is followed by the fancy: ’ // "s", // s is dropped because it is preceded by the fancy: ’ // "new", // Already seen "in", "version", ] ); } #[test] fn ignores_arbitrary_property_ish_things() { // FIXME: () are only valid in an arbitrary let candidates = run(" [feature(slice_as_chunks)]", false); assert_eq!(candidates, vec!["feature(slice_as_chunks)",]); } #[test] fn foo_bar() { // w[…] is not a valid pattern for part of candidate // but @[] is (specifically in the context of a variant) let candidates = run("%w[text-[#bada55]]", false); assert_eq!(candidates, vec!["w", "text-[#bada55]"]); } #[test] fn crash_001() { let candidates = run("Aҿɿ[~5", false); assert!(candidates.is_empty()); } #[test] fn crash_002() { let candidates = run("", false); assert!(candidates.is_empty()); } #[test] fn bad_001() { let candidates = run("[杛杛]/", false); assert_eq!(candidates, vec!["杛杛"]); } #[test] fn bad_002() { let candidates = run(r"[\]\\\:[]", false); assert!(candidates.is_empty()); } #[test] fn bad_003() { // TODO: This seems… wrong let candidates = run(r"[𕤵:]", false); assert_eq!(candidates, vec!["𕤵", "𕤵:",]); } #[test] fn classes_in_js_arrays() { let candidates = run( r#"let classes = ['bg-black', 'hover:px-0.5', 'text-[13px]', '[--my-var:1_/_2]', '[.foo_&]:px-[0]', '[.foo_&]:[color:red]']">"#, false, ); assert_eq!( candidates, vec![ "let", "classes", "bg-black", "hover:px-0.5", "text-[13px]", "[--my-var:1_/_2]", "--my-var:1_/_2", "[.foo_&]:px-[0]", "[.foo_&]:[color:red]", ] ); } #[test] fn classes_in_js_arrays_without_spaces() { let candidates = run( r#"let classes = ['bg-black','hover:px-0.5','text-[13px]','[--my-var:1_/_2]','[.foo_&]:px-[0]','[.foo_&]:[color:red]']">"#, false, ); assert_eq!( candidates, vec![ "let", "classes", "bg-black", "hover:px-0.5", "text-[13px]", "[--my-var:1_/_2]", "--my-var:1_/_2", "[.foo_&]:px-[0]", "[.foo_&]:[color:red]", ] ); } #[test] fn classes_as_object_keys() { let candidates = run( r#"
"#, false, ); assert_eq!( candidates, vec!["div", "underline", "active", "px-1.5", "online"] ); } #[test] fn multiple_nested_candidates() { let candidates = run(r#"{color:red}"#, false); assert_eq!(candidates, vec!["color:red"]); } #[test] fn percent_ended_candidates() { let candidates = run( r#""#, false, ); assert_eq!( candidates, vec!["should", "work", "underline", "from-50%", "flex",] ); } #[test] fn candidate_cannot_start_with_uppercase_character() { let candidates = run(r#"
"#, false); assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); } #[test] fn candidate_cannot_end_with_a_dash() { let candidates = run(r#"
"#, false); assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); } #[test] fn candidate_cannot_end_with_an_underscore() { let candidates = run(r#"
"#, false); assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); } #[test] fn candidate_cannot_be_a_single_camelcase_word() { let candidates = run(r#"
"#, false); assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); } #[test] fn candidate_cannot_be_svg_path_data() { let candidates = run(r#""#, false); assert_eq!(candidates, vec!["path", "d"]); } #[test] fn candidate_cannot_be_email_or_version_constraint() { let candidates = run(r#"
next@latest"#, false); assert_eq!(candidates, vec!["div", "class", "@container/dialog"]); } #[test] fn candidate_cannot_be_a_url() { let candidates = run( r#"Our website is https://example.com or http://example.com if you want a virus"#, false, ); assert_eq!( candidates, vec!["website", "is", "com", "or", "if", "you", "want", "a", "virus"] ); } #[test] fn candidate_cannot_be_a_paths_with_aliases() { let candidates = run(r#"import potato from '@/potato';"#, false); assert_eq!(candidates, vec!["import", "potato", "from"]); } #[test] fn candidate_cannot_be_a_path() { let candidates = run( r#"import potato from 'some/path/to/something'; import banana from '@/banana';"#, false, ); assert_eq!(candidates, vec!["import", "potato", "from", "banana"]); } #[test] fn ruby_percent_formatted_strings() { let candidates = run(r#"%w[hover:flex]"#, false); assert_eq!(candidates, vec!["w", "hover:flex"]); } #[test] fn urls_in_arbitrary_values_are_ok() { let candidates = run(r#"
"#, false); assert_eq!( candidates, vec!["div", "class", "bg-[url('/img/hero-pattern.svg')]"] ); } #[test] fn colon_in_arbitrary_property_value() { let candidates = run("[color::] #[test::foo]", false); assert!(candidates .iter() .all(|candidate| !candidate.starts_with('['))); } #[test] fn braces_in_arbitrary_property_value() { let candidates = run("[color:${foo}] #[test:{foo}]", false); assert!(candidates .iter() .all(|candidate| !candidate.starts_with('['))); } #[test] fn quoted_colon_in_arbitrary_property_value() { let candidates = run("[content:'bar:bar'] [content:\"bar:bar\"]", false); assert!(candidates .iter() .any(|candidate| candidate == &"[content:'bar:bar']")); assert!(candidates .iter() .any(|candidate| candidate == &"[content:\"bar:bar\"]")); } #[test] fn quoted_braces_in_arbitrary_property_value() { let candidates = run("[content:'{bar}'] [content:\"{bar}\"]", false); assert!(candidates .iter() .any(|candidate| candidate == &"[content:'{bar}']")); assert!(candidates .iter() .any(|candidate| candidate == &"[content:\"{bar}\"]")); } #[test] fn colon_in_custom_property_value() { let candidates = run("[--foo:bar:bar]", false); assert!(candidates .iter() .any(|candidate| candidate == &"[--foo:bar:bar]")); } #[test] fn braces_in_custom_property_value() { let candidates = run("[--foo:{bar}]", false); assert!(candidates .iter() .any(|candidate| candidate == &"[--foo:{bar}]")); } #[test] fn candidate_slicing() { let result = Extractor::slice_surrounding(&b".foo_&]:px-[0"[..]) .map(std::str::from_utf8) .transpose() .unwrap(); assert_eq!(result, None); let result = Extractor::slice_surrounding(&b"[.foo_&]:px-[0]"[..]) .map(std::str::from_utf8) .transpose() .unwrap(); assert_eq!(result, Some("[.foo_&]:px-[0]")); let result = Extractor::slice_surrounding(&b"{[.foo_&]:px-[0]}"[..]) .map(std::str::from_utf8) .transpose() .unwrap(); assert_eq!(result, Some("[.foo_&]:px-[0]")); let result = Extractor::slice_surrounding(&b"![foo:bar]"[..]) .map(std::str::from_utf8) .transpose() .unwrap(); assert_eq!(result, None); let result = Extractor::slice_surrounding(&b"[\"pt-1.5\"]"[..]) .map(std::str::from_utf8) .transpose() .unwrap(); assert_eq!(result, Some("pt-1.5")); let count = 1_000; let crazy = format!("{}[.foo_&]:px-[0]{}", "[".repeat(count), "]".repeat(count)); let result = Extractor::slice_surrounding(crazy.as_bytes()) .map(std::str::from_utf8) .transpose() .unwrap(); assert_eq!(result, Some("[.foo_&]:px-[0]")); } #[test] fn does_not_emit_the_same_slice_multiple_times() { let candidates: Vec<_> = Extractor::with_positions("
".as_bytes(), Default::default()) .into_iter() .map(|(s, p)| unsafe { (std::str::from_utf8_unchecked(s), p) }) .collect(); assert_eq!(candidates, vec![("div", 1), ("class", 5), ("flex", 12),]); } #[test] fn empty_arbitrary_values_are_allowed_for_codemods() { let candidates = run( r#"
"#, false, ); assert_eq!( candidates, vec![ "div", "class", "group-[]:flex", "group-[]/name:flex", "peer-[]:flex", "peer-[]/name:flex" ] ); } #[test] fn simple_utility_names_with_numbers_work() { let candidates = run(r#"
"#, false); assert_eq!(candidates, vec!["div", "class", "h2", "hz",]); } #[test] fn classes_in_an_array_without_whitespace() { let candidates = run( "let classes = ['bg-black','hover:px-0.5','text-[13px]','[--my-var:1_/_2]','[.foo_&]:px-[0]','[.foo_&]:[color:red]']", false, ); assert_eq!( candidates, vec![ "let", "classes", "bg-black", "hover:px-0.5", "text-[13px]", "[--my-var:1_/_2]", "--my-var:1_/_2", "[.foo_&]:px-[0]", "[.foo_&]:[color:red]", ] ); } #[test] fn classes_in_an_array_with_spaces() { let candidates = run( "let classes = ['bg-black', 'hover:px-0.5', 'text-[13px]', '[--my-var:1_/_2]', '[.foo_&]:px-[0]', '[.foo_&]:[color:red]']", false, ); assert_eq!( candidates, vec![ "let", "classes", "bg-black", "hover:px-0.5", "text-[13px]", "[--my-var:1_/_2]", "--my-var:1_/_2", "[.foo_&]:px-[0]", "[.foo_&]:[color:red]", ] ); } #[test] fn classes_in_an_array_with_tabs() { let candidates = run( "let classes = ['bg-black',\t'hover:px-0.5',\t'text-[13px]',\t'[--my-var:1_/_2]',\t'[.foo_&]:px-[0]',\t'[.foo_&]:[color:red]']", false, ); assert_eq!( candidates, vec![ "let", "classes", "bg-black", "hover:px-0.5", "text-[13px]", "[--my-var:1_/_2]", "--my-var:1_/_2", "[.foo_&]:px-[0]", "[.foo_&]:[color:red]", ] ); } #[test] fn classes_in_an_array_with_newlines() { let candidates = run( "let classes = [\n'bg-black',\n'hover:px-0.5',\n'text-[13px]',\n'[--my-var:1_/_2]',\n'[.foo_&]:px-[0]',\n'[.foo_&]:[color:red]'\n]", false, ); assert_eq!( candidates, vec![ "let", "classes", "bg-black", "hover:px-0.5", "text-[13px]", "[--my-var:1_/_2]", "--my-var:1_/_2", "[.foo_&]:px-[0]", "[.foo_&]:[color:red]", ] ); } #[test] fn arbitrary_properties_are_not_picked_up_after_an_escape() { _please_trace(); let candidates = run( r#" \\[a\\]\\:block] "#, false, ); assert_eq!(candidates, vec!["!code", "a"]); } }