diff --git a/crates/oxide/src/extractor/arbitrary_property_machine.rs b/crates/oxide/src/extractor/arbitrary_property_machine.rs index 975c3df98..c1460092f 100644 --- a/crates/oxide/src/extractor/arbitrary_property_machine.rs +++ b/crates/oxide/src/extractor/arbitrary_property_machine.rs @@ -4,6 +4,31 @@ use crate::extractor::machine::{Machine, MachineState}; use crate::extractor::string_machine::StringMachine; use crate::extractor::CssVariableMachine; use classification_macros::ClassifyBytes; +use std::marker::PhantomData; + +#[derive(Debug, Default)] +pub struct IdleState; + +/// Parsing the property, e.g.: +/// +/// ```text +/// [color:red] +/// ^^^^^ +/// +/// [--my-color:red] +/// ^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct ParsingPropertyState; + +/// Parsing the value, e.g.: +/// +/// ```text +/// [color:red] +/// ^^^ +/// ``` +#[derive(Debug, Default)] +pub struct ParsingValueState; /// Extracts arbitrary properties from the input, including the brackets. /// @@ -17,182 +42,98 @@ use classification_macros::ClassifyBytes; /// ^^^^^^^^^^^^^^^^ /// ``` #[derive(Debug, Default)] -pub struct ArbitraryPropertyMachine { +pub struct ArbitraryPropertyMachine { /// Start position of the arbitrary value start_pos: usize, /// Track brackets to ensure they are balanced bracket_stack: BracketStack, - /// Current state of the machine - state: State, - css_variable_machine: CssVariableMachine, string_machine: StringMachine, + + _state: PhantomData, } -#[derive(Debug, Default)] -enum State { - #[default] - Idle, - - /// Parsing the property, e.g.: - /// - /// ```text - /// [color:red] - /// ^^^^^ - /// - /// [--my-color:red] - /// ^^^^^^^^^^ - /// ``` - ParsingProperty, - - /// Parsing the value, e.g.: - /// - /// ```text - /// [color:red] - /// ^^^ - /// ``` - ParsingValue, -} - -impl Machine for ArbitraryPropertyMachine { +impl ArbitraryPropertyMachine { #[inline(always)] - fn reset(&mut self) { - self.start_pos = 0; - self.state = State::Idle; - self.bracket_stack.reset(); + fn transition(&self) -> ArbitraryPropertyMachine { + ArbitraryPropertyMachine { + start_pos: self.start_pos, + bracket_stack: Default::default(), + css_variable_machine: Default::default(), + string_machine: Default::default(), + _state: PhantomData, + } } +} + +impl Machine for ArbitraryPropertyMachine { + #[inline(always)] + fn reset(&mut self) {} + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match cursor.curr.into() { + // Start of an arbitrary property + Class::OpenBracket => { + self.start_pos = cursor.pos; + cursor.advance(); + self.transition::().next(cursor) + } + + // Anything else is not a valid start of an arbitrary value + _ => MachineState::Idle, + } + } +} + +impl Machine for ArbitraryPropertyMachine { + #[inline(always)] + fn reset(&mut self) {} #[inline] fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { let len = cursor.input.len(); - match self.state { - State::Idle => match cursor.curr.into() { - // Start of an arbitrary property - Class::OpenBracket => { - self.start_pos = cursor.pos; - self.state = State::ParsingProperty; + while cursor.pos < len { + match cursor.curr.into() { + Class::Dash => match cursor.next.into() { + // Start of a CSS variable + // + // E.g.: `[--my-color:red]` + // ^^ + Class::Dash => return self.parse_property_variable(cursor), + + // Dashes are allowed in the property name + // + // E.g.: `[background-color:red]` + // ^ + _ => cursor.advance(), + }, + + // Alpha characters are allowed in the property name + // + // E.g.: `[color:red]` + // ^^^^^ + Class::AlphaLower => cursor.advance(), + + // End of the property name, but there must be at least a single character + Class::Colon if cursor.pos > self.start_pos + 1 => { cursor.advance(); - self.next(cursor) + return self.transition::().next(cursor); } - // Anything else is not a valid start of an arbitrary value - _ => MachineState::Idle, - }, - - State::ParsingProperty => { - while cursor.pos < len { - match cursor.curr.into() { - Class::Dash => match cursor.next.into() { - // Start of a CSS variable - // - // E.g.: `[--my-color:red]` - // ^^ - Class::Dash => return self.parse_property_variable(cursor), - - // Dashes are allowed in the property name - // - // E.g.: `[background-color:red]` - // ^ - _ => cursor.advance(), - }, - - // Alpha characters are allowed in the property name - // - // E.g.: `[color:red]` - // ^^^^^ - Class::AlphaLower => cursor.advance(), - - // End of the property name, but there must be at least a single character - Class::Colon if cursor.pos > self.start_pos + 1 => { - self.state = State::ParsingValue; - cursor.advance(); - return self.next(cursor); - } - - // Anything else is not a valid property character - _ => return self.restart(), - } - } - - self.restart() - } - - State::ParsingValue => { - while cursor.pos < len { - match cursor.curr.into() { - Class::Escape => match cursor.next.into() { - // An escaped whitespace character is not allowed - // - // E.g.: `[color:var(--my-\ color)]` - // ^ - Class::Whitespace => return self.restart(), - - // An escaped character, skip the next character, resume after - // - // E.g.: `[color:var(--my-\#color)]` - // ^ - _ => cursor.advance_twice(), - }, - - Class::OpenParen | Class::OpenBracket | Class::OpenCurly => { - if !self.bracket_stack.push(cursor.curr) { - return self.restart(); - } - cursor.advance(); - } - - Class::CloseParen | Class::CloseBracket | Class::CloseCurly - if !self.bracket_stack.is_empty() => - { - if !self.bracket_stack.pop(cursor.curr) { - return self.restart(); - } - cursor.advance(); - } - - // End of an arbitrary value - // - // 1. All brackets must be balanced - // 2. There must be at least a single character inside the brackets - Class::CloseBracket - if self.start_pos + 1 != cursor.pos - && self.bracket_stack.is_empty() => - { - return self.done(self.start_pos, cursor) - } - - // Start of a string - Class::Quote => return self.parse_string(cursor), - - // Another `:` inside of an arbitrary property is only valid inside of a string or - // inside of brackets. Everywhere else, it's invalid. - // - // E.g.: `[color:red:blue]` - // ^ Not valid - // E.g.: `[background:url(https://example.com)]` - // ^ Valid - // E.g.: `[content:'a:b:c:']` - // ^ ^ ^ Valid - Class::Colon if self.bracket_stack.is_empty() => return self.restart(), - - // Any kind of whitespace is not allowed - Class::Whitespace => return self.restart(), - - // Everything else is valid - _ => cursor.advance(), - }; - } - - self.restart() + // Anything else is not a valid property character + _ => return self.restart(), } } + + self.restart() } } -impl ArbitraryPropertyMachine { +impl ArbitraryPropertyMachine { fn parse_property_variable(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { match self.css_variable_machine.next(cursor) { MachineState::Idle => self.restart(), @@ -202,9 +143,8 @@ impl ArbitraryPropertyMachine { // E.g.: `[--my-color:red]` // ^ Class::Colon => { - self.state = State::ParsingValue; cursor.advance_twice(); - self.next(cursor) + self.transition::().next(cursor) } // Invalid arbitrary property @@ -212,7 +152,86 @@ impl ArbitraryPropertyMachine { }, } } +} +impl Machine for ArbitraryPropertyMachine { + #[inline(always)] + fn reset(&mut self) { + self.bracket_stack.reset(); + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + let len = cursor.input.len(); + while cursor.pos < len { + match cursor.curr.into() { + Class::Escape => match cursor.next.into() { + // An escaped whitespace character is not allowed + // + // E.g.: `[color:var(--my-\ color)]` + // ^ + Class::Whitespace => return self.restart(), + + // An escaped character, skip the next character, resume after + // + // E.g.: `[color:var(--my-\#color)]` + // ^ + _ => cursor.advance_twice(), + }, + + Class::OpenParen | Class::OpenBracket | Class::OpenCurly => { + if !self.bracket_stack.push(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + Class::CloseParen | Class::CloseBracket | Class::CloseCurly + if !self.bracket_stack.is_empty() => + { + if !self.bracket_stack.pop(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + // End of an arbitrary value + // + // 1. All brackets must be balanced + // 2. There must be at least a single character inside the brackets + Class::CloseBracket + if self.start_pos + 1 != cursor.pos && self.bracket_stack.is_empty() => + { + return self.done(self.start_pos, cursor) + } + + // Start of a string + Class::Quote => return self.parse_string(cursor), + + // Another `:` inside of an arbitrary property is only valid inside of a string or + // inside of brackets. Everywhere else, it's invalid. + // + // E.g.: `[color:red:blue]` + // ^ Not valid + // E.g.: `[background:url(https://example.com)]` + // ^ Valid + // E.g.: `[content:'a:b:c:']` + // ^ ^ ^ Valid + Class::Colon if self.bracket_stack.is_empty() => return self.restart(), + + // Any kind of whitespace is not allowed + Class::Whitespace => return self.restart(), + + // Everything else is valid + _ => cursor.advance(), + }; + } + + self.restart() + } +} + +impl ArbitraryPropertyMachine { fn parse_string(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { match self.string_machine.next(cursor) { MachineState::Idle => self.restart(), @@ -271,7 +290,7 @@ enum Class { #[cfg(test)] mod tests { - use super::ArbitraryPropertyMachine; + use super::{ArbitraryPropertyMachine, IdleState}; use crate::extractor::machine::Machine; #[test] @@ -279,8 +298,8 @@ mod tests { fn test_arbitrary_property_machine_performance() { let input = r#"