tailwindcss/crates/oxide/src/extractor/named_variant_machine.rs
Robin Malfait de145c5b06
Refactor: use compile time type state pattern (#17083)
This PR implements the state machines using the type state pattern at
compile time (via generic types) instead of a runtime state variable.
There is no runtime check to see what state we are in, instead we
transition to the new state when it's necessary.

This has some nice performance improvements for some of the state
machines, e.g.:

```diff
- ArbitraryVariableMachine: Throughput: 744.92 MB/s
+ ArbitraryVariableMachine: Throughput:   1.21 GB/s
```

We also don't have to store the current state because each machine runs
to completion. It's during execution that we can move to a new state if
necessary.


Unfortunately the diff is a tiny bit annoying to read, but essentially
this is what happened:

### The `enum` is split up in it's individual states as structs:
```rs
enum State {
  A,
  B,
  C,
}
```
Becomes:
```rs
struct A;
struct B;
struct C;
```

### Generics

The current machine will receive a generic `State` that we can default
to the `IdleState`. Then we use `PhantomData` to "use" the type because
the generic type is otherwise not used as a concrete value, it's just a
marker.

```rs
struct MyMachine {}
```
Becomes:
```rs
struct MyMachine<State = Idle> {
  _state: std::marker::PhantomData<State>
}
```

### Split 

Next, the `next` function used to match on the current state, but now
each match arm is moved to a dedicated implementation instead:
```rs
impl Machine for MyMachine {
  fn next(&mut self) -> MachineState {
    match self.state {
      State::A => { /* … */ },
      State::B => { /* … */ },
      State::C => { /* … */ },
    }
  }
}
``` 
Becomes:
```rs
impl Machine for MyMachine<A> {
  fn next(&mut self) -> MachineState {
    /* … */
  }
}
impl Machine for MyMachine<B> {
  fn next(&mut self) -> MachineState {
    /* … */
  }
}
impl Machine for MyMachine<C> {
  fn next(&mut self) -> MachineState {
    /* … */
  }
}
```

It's a bit more verbose, but now each state is implemented in its own
block. This also removes 2 levels of nesting which is a nice benefit.
2025-03-10 14:08:39 -04:00

416 lines
12 KiB
Rust

use crate::cursor;
use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine;
use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine;
use crate::extractor::machine::{Machine, MachineState};
use crate::extractor::modifier_machine::ModifierMachine;
use classification_macros::ClassifyBytes;
use std::marker::PhantomData;
#[derive(Debug, Default)]
pub struct IdleState;
/// Parsing a variant
#[derive(Debug, Default)]
pub struct ParsingState;
/// Parsing a modifier
///
/// E.g.:
///
/// ```text
/// group-hover/name:
/// ^^^^^
/// ```
///
#[derive(Debug, Default)]
pub struct ParsingModifierState;
/// Parsing the end of a variant
///
/// E.g.:
///
/// ```text
/// hover:
/// ^
/// ```
#[derive(Debug, Default)]
pub struct ParsingEndState;
/// Extract named variants from an input including the `:`.
///
/// E.g.:
///
/// ```text
/// hover:flex
/// ^^^^^^
///
/// data-[state=pending]:flex
/// ^^^^^^^^^^^^^^^^^^^^^
///
/// supports-(--my-variable):flex
/// ^^^^^^^^^^^^^^^^^^^^^^^^^
/// ```
#[derive(Debug, Default)]
pub struct NamedVariantMachine<State = IdleState> {
/// Start position of the variant
start_pos: usize,
arbitrary_variable_machine: ArbitraryVariableMachine,
arbitrary_value_machine: ArbitraryValueMachine,
modifier_machine: ModifierMachine,
_state: PhantomData<State>,
}
impl<State> NamedVariantMachine<State> {
#[inline(always)]
fn transition<NextState>(&self) -> NamedVariantMachine<NextState> {
NamedVariantMachine {
start_pos: self.start_pos,
arbitrary_variable_machine: Default::default(),
arbitrary_value_machine: Default::default(),
modifier_machine: Default::default(),
_state: PhantomData,
}
}
}
impl Machine for NamedVariantMachine<IdleState> {
#[inline(always)]
fn reset(&mut self) {}
#[inline(always)]
fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState {
match cursor.curr.into() {
Class::AlphaLower | Class::Star => match cursor.next.into() {
// Valid single character variant, must be followed by a `:`
//
// E.g.: `<div class="x:flex"></div>`
// ^^
// E.g.: `*:`
// ^^
Class::Colon => {
cursor.advance();
self.transition::<ParsingEndState>().next(cursor)
}
// Valid start characters
//
// E.g.: `hover:`
// ^
// E.g.: `**:`
// ^
_ => {
self.start_pos = cursor.pos;
cursor.advance();
self.transition::<ParsingState>().next(cursor)
}
},
// Valid start characters
//
// E.g.: `2xl:`
// ^
// E.g.: `@md:`
// ^
Class::Number | Class::At => {
self.start_pos = cursor.pos;
cursor.advance();
self.transition::<ParsingState>().next(cursor)
}
// Everything else, is not a valid start of the variant.
_ => MachineState::Idle,
}
}
}
impl Machine for NamedVariantMachine<ParsingState> {
#[inline(always)]
fn reset(&mut self) {}
#[inline(always)]
fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState {
let len = cursor.input.len();
while cursor.pos < len {
match cursor.curr.into() {
Class::Dash => match cursor.next.into() {
// Start of an arbitrary value
//
// E.g.: `data-[state=pending]:`.
// ^^
Class::OpenBracket => {
cursor.advance();
return match self.arbitrary_value_machine.next(cursor) {
MachineState::Idle => self.restart(),
MachineState::Done(_) => self.parse_arbitrary_end(cursor),
};
}
// Start of an arbitrary variable
//
// E.g.: `supports-(--my-color):`.
// ^^
Class::OpenParen => {
cursor.advance();
return match self.arbitrary_variable_machine.next(cursor) {
MachineState::Idle => self.restart(),
MachineState::Done(_) => self.parse_arbitrary_end(cursor),
};
}
// Valid characters _if_ followed by another valid character. These characters are
// only valid inside of the variant but not at the end of the variant.
//
// E.g.: `hover-`
// ^ Invalid
// E.g.: `hover-!`
// ^ Invalid
// E.g.: `hover-/`
// ^ Invalid
// E.g.: `flex-1`
// ^ Valid
Class::Dash
| Class::Underscore
| Class::AlphaLower
| Class::AlphaUpper
| Class::Number => cursor.advance(),
// Everything else is invalid
_ => return self.restart(),
},
// Start of an arbitrary value
//
// E.g.: `@[state=pending]:`.
// ^
Class::OpenBracket => {
return match self.arbitrary_value_machine.next(cursor) {
MachineState::Idle => self.restart(),
MachineState::Done(_) => self.parse_arbitrary_end(cursor),
};
}
Class::Underscore => match cursor.next.into() {
// Valid characters _if_ followed by another valid character. These characters are
// only valid inside of the variant but not at the end of the variant.
//
// E.g.: `hover_`
// ^ Invalid
// E.g.: `hover_!`
// ^ Invalid
// E.g.: `hover_/`
// ^ Invalid
// E.g.: `custom_1`
// ^ Valid
Class::Dash
| Class::Underscore
| Class::AlphaLower
| Class::AlphaUpper
| Class::Number => cursor.advance(),
// Everything else is invalid
_ => return self.restart(),
},
// Still valid characters
Class::AlphaLower | Class::AlphaUpper | Class::Number | Class::Star => {
cursor.advance()
}
// A `/` means we are at the end of the variant, but there might be a modifier
//
// E.g.:
//
// ```
// group-hover/name:
// ^
// ```
Class::Slash => return self.transition::<ParsingModifierState>().next(cursor),
// A `:` means we are at the end of the variant
//
// E.g.: `hover:`
// ^
Class::Colon => return self.done(self.start_pos, cursor),
// Everything else is invalid
_ => return self.restart(),
};
}
self.restart()
}
}
impl NamedVariantMachine<ParsingState> {
#[inline(always)]
fn parse_arbitrary_end(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState {
match cursor.next.into() {
Class::Slash => {
cursor.advance();
self.transition::<ParsingModifierState>().next(cursor)
}
Class::Colon => {
cursor.advance();
self.transition::<ParsingEndState>().next(cursor)
}
_ => self.restart(),
}
}
}
impl Machine for NamedVariantMachine<ParsingModifierState> {
#[inline(always)]
fn reset(&mut self) {}
#[inline(always)]
fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState {
match self.modifier_machine.next(cursor) {
MachineState::Idle => self.restart(),
MachineState::Done(_) => match cursor.next.into() {
// Modifier must be followed by a `:`
//
// E.g.: `group-hover/name:`
// ^
Class::Colon => {
cursor.advance();
self.transition::<ParsingEndState>().next(cursor)
}
// Everything else is invalid
_ => self.restart(),
},
}
}
}
impl Machine for NamedVariantMachine<ParsingEndState> {
#[inline(always)]
fn reset(&mut self) {}
#[inline(always)]
fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState {
match cursor.curr.into() {
// The end of a variant must be the `:`
//
// E.g.: `hover:`
// ^
Class::Colon => self.done(self.start_pos, cursor),
// Everything else is invalid
_ => self.restart(),
}
}
}
#[derive(Clone, Copy, ClassifyBytes)]
enum Class {
#[bytes_range(b'a'..=b'z')]
AlphaLower,
#[bytes_range(b'A'..=b'Z')]
AlphaUpper,
#[bytes(b'@')]
At,
#[bytes(b':')]
Colon,
#[bytes(b'-')]
Dash,
#[bytes(b'.')]
Dot,
#[bytes(b'\0')]
End,
#[bytes_range(b'0'..=b'9')]
Number,
#[bytes(b'[')]
OpenBracket,
#[bytes(b']')]
CloseBracket,
#[bytes(b'(')]
OpenParen,
#[bytes(b'\'', b'"', b'`')]
Quote,
#[bytes(b'*')]
Star,
#[bytes(b'/')]
Slash,
#[bytes(b'_')]
Underscore,
#[bytes(b' ', b'\t', b'\n', b'\r', b'\x0C')]
Whitespace,
#[fallback]
Other,
}
#[cfg(test)]
mod tests {
use super::{IdleState, NamedVariantMachine};
use crate::extractor::machine::Machine;
#[test]
#[ignore]
fn test_named_variant_machine_performance() {
let input = r#"<button class="hover:focus:flex data-[state=pending]:flex supports-(--my-variable):flex group-hover/named:not-has-peer-data-disabled:flex">"#;
NamedVariantMachine::<IdleState>::test_throughput(1_000_000, input);
NamedVariantMachine::<IdleState>::test_duration_once(input);
todo!()
}
#[test]
fn test_named_variant_extraction() {
for (input, expected) in [
// Simple variant
("hover:", vec!["hover:"]),
// Simple single-character variant
("a:", vec!["a:"]),
("a/foo:", vec!["a/foo:"]),
//
("group-hover:flex", vec!["group-hover:"]),
("group-hover/name:flex", vec!["group-hover/name:"]),
(
"group-[data-state=pending]/name:flex",
vec!["group-[data-state=pending]/name:"],
),
("supports-(--foo)/name:flex", vec!["supports-(--foo)/name:"]),
// Container queries
("@md:flex", vec!["@md:"]),
("@max-md:flex", vec!["@max-md:"]),
("@-[36rem]:flex", vec!["@-[36rem]:"]),
("@[36rem]:flex", vec!["@[36rem]:"]),
// --------------------------------------------------------
// Exceptions:
// Arbitrary variable must be valid
(r"supports-(--my-color\):", vec![]),
(r"supports-(--my#color)", vec![]),
// Single letter variant with uppercase letter is invalid
("A:", vec![]),
] {
let actual = NamedVariantMachine::<IdleState>::test_extract_all(input);
if actual != expected {
dbg!(&input, &actual, &expected);
}
assert_eq!(actual, expected);
}
}
}