Improve Oxide scanner API (#14187)

This PR updates the API for interacting with the Oxide API. Until now,
we used the name `scanDir(…)` which is fine, but we do way more work
right now.

We now have features such as:

1. Auto source detection (can be turned off, e.g.: `@tailwindcss/vite`
doesn't need it)
2. Scan based on `@source`s found in CSS files
3. Do "incremental" rebuilds (which means that the `scanDir(…)` result
was stateful).

To solve these issues, this PR introduces a new `Scanner` class where
you can pass in the `detectSources` and `sources` options. E.g.:

```ts
let scanner = new Scanner({
  // Optional, omitting `detectSources` field disables automatic source detection
  detectSources: { base: __dirname }, 

  // List of glob entries to scan. These come from `@source` directives in CSS.
  sources: [
    { base: __dirname, pattern: "src/**/*.css" },
    // …
  ],
});
```

The scanner object has the following API:

```ts
export interface ChangedContent {
  /** File path to the changed file */
  file?: string
  /** Contents of the changed file */
  content?: string
  /** File extension */
  extension: string
}
export interface DetectSources {
  /** Base path to start scanning from */
  base: string
}
export interface GlobEntry {
  /** Base path of the glob */
  base: string
  /** Glob pattern */
  pattern: string
}
export interface ScannerOptions {
  /** Automatically detect sources in the base path */
  detectSources?: DetectSources
  /** Glob sources */
  sources?: Array<GlobEntry>
}
export declare class Scanner {
  constructor(opts: ScannerOptions)
  scan(): Array<string>
  scanFiles(input: Array<ChangedContent>): Array<string>
  get files(): Array<string>
  get globs(): Array<GlobEntry>
}
```

The `scanFiles(…)` method is used for incremental rebuilds. It takes the
`ChangedContent` array for all the new/changes files. It returns whether
we scanned any new candidates or not.

Note that the `scanner` object is stateful, this means that we don't
have to track candidates in a `Set` anymore. We can just call
`getCandidates()` when we need it.

This PR also removed some unused code that we had in the `scanDir(…)`
function to allow for sequential or parallel `IO`, and sequential or
parallel `Parsing`. We only used the same `IO` and `Parsing` strategies
for all files, so I just got rid of it.

---------

Co-authored-by: Jordan Pittman <jordan@cryptica.me>
This commit is contained in:
Robin Malfait 2024-08-16 15:05:42 +02:00 committed by GitHub
parent cc4689deef
commit a902128640
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 629 additions and 904 deletions

242
Cargo.lock generated
View File

@ -202,83 +202,6 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "futures"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0"
dependencies = [
"futures-channel",
"futures-core",
"futures-executor",
"futures-io",
"futures-sink",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-channel"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
dependencies = [
"futures-core",
"futures-sink",
]
[[package]]
name = "futures-core"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
[[package]]
name = "futures-executor"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]]
name = "futures-io"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
[[package]]
name = "futures-sink"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
[[package]]
name = "futures-task"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
[[package]]
name = "futures-util"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
dependencies = [
"futures-channel",
"futures-core",
"futures-io",
"futures-sink",
"futures-task",
"memchr",
"pin-project-lite",
"pin-utils",
"slab",
]
[[package]]
name = "fxhash"
version = "0.2.1"
@ -398,16 +321,6 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.18"
@ -527,41 +440,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
dependencies = [
"cfg-if",
"libc",
"redox_syscall 0.5.3",
"smallvec",
"windows-targets 0.52.6",
]
[[package]]
name = "pin-project-lite"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
[[package]]
name = "pin-utils"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "proc-macro2"
version = "1.0.86"
@ -611,15 +495,6 @@ dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "redox_syscall"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
dependencies = [
"bitflags 2.6.0",
]
[[package]]
name = "regex"
version = "1.8.3"
@ -675,27 +550,12 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "scc"
version = "2.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05ccfb12511cdb770157ace92d7dda771e498445b78f9886e8cdbc5140a4eced"
dependencies = [
"sdd",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "sdd"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "177258b64c0faaa9ffd3c65cd3262c2bc7e2588dbbd9c1641d0346145c1bbda8"
[[package]]
name = "semver"
version = "1.0.17"
@ -708,31 +568,6 @@ version = "1.0.163"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2"
[[package]]
name = "serial_test"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b4b487fe2acf240a021cf57c6b2b4903b1e78ca0ecd862a71b71d2a51fed77d"
dependencies = [
"futures",
"log",
"once_cell",
"parking_lot",
"scc",
"serial_test_derive",
]
[[package]]
name = "serial_test_derive"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.18",
]
[[package]]
name = "sharded-slab"
version = "0.1.4"
@ -742,15 +577,6 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "slab"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
dependencies = [
"autocfg",
]
[[package]]
name = "smallvec"
version = "1.10.0"
@ -801,10 +627,8 @@ dependencies = [
"glob-match",
"globwalk",
"ignore",
"lazy_static",
"log",
"rayon",
"serial_test",
"tempfile",
"tracing",
"tracing-subscriber",
@ -819,7 +643,7 @@ checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998"
dependencies = [
"cfg-if",
"fastrand",
"redox_syscall 0.3.5",
"redox_syscall",
"rustix",
"windows-sys 0.45.0",
]
@ -1003,22 +827,6 @@ dependencies = [
"windows_x86_64_msvc 0.48.0",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm 0.52.6",
"windows_aarch64_msvc 0.52.6",
"windows_i686_gnu 0.52.6",
"windows_i686_gnullvm",
"windows_i686_msvc 0.52.6",
"windows_x86_64_gnu 0.52.6",
"windows_x86_64_gnullvm 0.52.6",
"windows_x86_64_msvc 0.52.6",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
@ -1031,12 +839,6 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
@ -1049,12 +851,6 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
@ -1067,18 +863,6 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
@ -1091,12 +875,6 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
@ -1109,12 +887,6 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
@ -1127,12 +899,6 @@ version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
@ -1144,9 +910,3 @@ name = "windows_x86_64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

View File

@ -1,74 +1,48 @@
use napi::bindgen_prelude::{FromNapiValue, ToNapiValue};
use std::{collections::HashSet, path::PathBuf};
#[macro_use]
extern crate napi_derive;
#[derive(Debug, Clone)]
#[napi(object)]
pub struct ChangedContent {
/// File path to the changed file
pub file: Option<String>,
/// Contents of the changed file
pub content: Option<String>,
/// File extension
pub extension: String,
}
impl From<ChangedContent> for tailwindcss_oxide::ChangedContent {
fn from(changed_content: ChangedContent) -> Self {
tailwindcss_oxide::ChangedContent {
file: changed_content.file.map(PathBuf::from),
content: changed_content.content,
}
}
}
#[derive(Debug, Clone)]
#[napi]
pub struct ScanResult {
// Private information necessary for incremental rebuilds. Note: these fields are not exposed
// to JS
base: Option<String>,
sources: Vec<GlobEntry>,
// Public API:
pub globs: Vec<GlobEntry>,
pub files: Vec<String>,
pub candidates: Vec<String>,
}
#[napi]
impl ScanResult {
#[napi]
pub fn scan_files(&self, input: Vec<ChangedContent>) -> Vec<String> {
let result = tailwindcss_oxide::scan_dir(tailwindcss_oxide::ScanOptions {
base: self.base.clone(),
sources: self.sources.clone().into_iter().map(Into::into).collect(),
});
let mut unique_candidates: HashSet<String> = HashSet::from_iter(result.candidates);
let candidates_from_files: HashSet<String> = HashSet::from_iter(tailwindcss_oxide::scan_files(
input.into_iter().map(Into::into).collect(),
IO::Parallel as u8 | Parsing::Parallel as u8,
));
unique_candidates.extend(candidates_from_files);
unique_candidates
.into_iter()
.map(|x| x.to_string())
.collect()
}
#[napi(object)]
pub struct DetectSources {
/// Base path to start scanning from
pub base: String,
}
#[derive(Debug, Clone)]
#[napi(object)]
pub struct GlobEntry {
/// Base path of the glob
pub base: String,
/// Glob pattern
pub pattern: String,
}
impl From<ChangedContent> for tailwindcss_oxide::ChangedContent {
fn from(changed_content: ChangedContent) -> Self {
Self {
file: changed_content.file.map(Into::into),
content: changed_content.content,
}
}
}
impl From<GlobEntry> for tailwindcss_oxide::GlobEntry {
fn from(glob: GlobEntry) -> Self {
tailwindcss_oxide::GlobEntry {
Self {
base: glob.base,
pattern: glob.pattern,
}
@ -77,67 +51,75 @@ impl From<GlobEntry> for tailwindcss_oxide::GlobEntry {
impl From<tailwindcss_oxide::GlobEntry> for GlobEntry {
fn from(glob: tailwindcss_oxide::GlobEntry) -> Self {
GlobEntry {
Self {
base: glob.base,
pattern: glob.pattern,
}
}
}
impl From<DetectSources> for tailwindcss_oxide::scanner::detect_sources::DetectSources {
fn from(detect_sources: DetectSources) -> Self {
Self::new(detect_sources.base.into())
}
}
// ---
#[derive(Debug, Clone)]
#[napi(object)]
pub struct ScanOptions {
/// Base path to start scanning from
pub base: Option<String>,
pub struct ScannerOptions {
/// Automatically detect sources in the base path
pub detect_sources: Option<DetectSources>,
/// Glob sources
pub sources: Option<Vec<GlobEntry>>,
}
#[derive(Debug, Clone)]
#[napi]
pub fn clear_cache() {
tailwindcss_oxide::clear_cache();
pub struct Scanner {
scanner: tailwindcss_oxide::Scanner,
}
#[napi]
pub fn scan_dir(args: ScanOptions) -> ScanResult {
let result = tailwindcss_oxide::scan_dir(tailwindcss_oxide::ScanOptions {
base: args.base.clone(),
sources: args
.sources
.clone()
.unwrap_or_default()
impl Scanner {
#[napi(constructor)]
pub fn new(opts: ScannerOptions) -> Self {
Self {
scanner: tailwindcss_oxide::Scanner::new(
opts.detect_sources.map(Into::into),
opts
.sources
.map(|x| x.into_iter().map(Into::into).collect()),
),
}
}
#[napi]
pub fn scan(&mut self) -> Vec<String> {
self.scanner.scan()
}
#[napi]
pub fn scan_files(&mut self, input: Vec<ChangedContent>) -> Vec<String> {
self
.scanner
.scan_content(input.into_iter().map(Into::into).collect())
}
#[napi(getter)]
pub fn files(&mut self) -> Vec<String> {
self.scanner.get_files()
}
#[napi(getter)]
pub fn globs(&mut self) -> Vec<GlobEntry> {
self
.scanner
.get_globs()
.into_iter()
.map(Into::into)
.collect(),
});
ScanResult {
// Private
base: args.base,
sources: args.sources.unwrap_or_default(),
// Public
files: result.files,
candidates: result.candidates,
globs: result.globs.into_iter().map(Into::into).collect(),
.collect()
}
}
#[derive(Debug)]
#[napi]
pub enum IO {
Sequential = 0b0001,
Parallel = 0b0010,
}
#[derive(Debug)]
#[napi]
pub enum Parsing {
Sequential = 0b0100,
Parallel = 0b1000,
}
#[napi]
pub fn scan_files(input: Vec<ChangedContent>, strategy: u8) -> Vec<String> {
tailwindcss_oxide::scan_files(input.into_iter().map(Into::into).collect(), strategy)
}

View File

@ -14,9 +14,7 @@ tracing = { version = "0.1.37", features = [] }
tracing-subscriber = { version = "0.3.16", features = ["env-filter"] }
walkdir = "2.3.3"
ignore = "0.4.20"
lazy_static = "1.4.0"
glob-match = "0.2.1"
serial_test = "3.1.1"
dunce = "1.0.5"
[dev-dependencies]

View File

@ -1,57 +0,0 @@
use std::{path::PathBuf, time::SystemTime};
use std::fs::{self};
use fxhash::{FxHashMap, FxHashSet};
/// A cache to manage the list of candidates and the last modified time of files
/// in the project. This is used to avoid recompiling files that haven't changed.
#[derive(Default)]
pub struct Cache {
mtimes: FxHashMap<PathBuf, SystemTime>,
candidates: FxHashSet<String>,
}
impl Cache {
pub fn clear(&mut self) {
self.mtimes.clear();
self.candidates.clear();
}
pub fn add_candidates(&mut self, additional_candidates: Vec<String>) {
self.candidates.extend(additional_candidates);
}
pub fn get_candidates(&self) -> Vec<String> {
let mut result = vec![];
result.extend(self.candidates.iter().cloned());
result.sort();
result
}
pub fn find_modified_files<'a>(&mut self, paths: &'a Vec<PathBuf>) -> Vec<&'a PathBuf> {
// Get a list of the files that have been modified since the last time we checked
let mut modified: Vec<&PathBuf> = vec![];
for path in paths {
let curr = fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::now());
let prev = self.mtimes.insert(path.clone(), curr);
match prev {
// Only add the file to the modified list if the mod time has changed
Some(prev) if prev != curr => {
modified.push(path);
},
// If the file was already in the cache then we don't need to do anything
Some(_) => (),
// If the file didn't exist before then it's been modified
None => modified.push(path),
}
}
modified
}
}

View File

@ -1,25 +1,25 @@
use crate::parser::Extractor;
use crate::scanner::detect_sources::DetectSources;
use bstr::ByteSlice;
use cache::Cache;
use fxhash::FxHashSet;
use fxhash::{FxHashMap, FxHashSet};
use glob::fast_glob;
use glob::get_fast_patterns;
use ignore::DirEntry;
use ignore::WalkBuilder;
use lazy_static::lazy_static;
use rayon::prelude::*;
use std::cmp::Ordering;
use std::path::Path;
use std::fs;
use std::path::PathBuf;
use std::sync::Mutex;
use std::sync;
use std::time::SystemTime;
use tracing::event;
use walkdir::WalkDir;
pub mod cache;
pub mod cursor;
pub mod fast_skip;
pub mod glob;
pub mod parser;
pub mod scanner;
static SHOULD_TRACE: sync::LazyLock<bool> = sync::LazyLock::new(
|| matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || value.eq("1") || value.eq("true") || value.contains("tailwind")),
);
fn init_tracing() {
if !*SHOULD_TRACE {
@ -60,29 +60,156 @@ pub struct GlobEntry {
pub pattern: String,
}
pub fn clear_cache() {
let mut cache = GLOBAL_CACHE.lock().unwrap();
cache.clear();
#[derive(Debug, Clone, Default)]
pub struct Scanner {
/// Auto content configuration
detect_sources: Option<DetectSources>,
/// Glob sources
sources: Option<Vec<GlobEntry>>,
/// Scanner is ready to scan. We delay the file system traversal for detecting all files until
/// we actually need them.
ready: bool,
/// All files that we have to scan
files: Vec<PathBuf>,
/// All generated globs
globs: Vec<GlobEntry>,
/// Track file modification times
mtimes: FxHashMap<PathBuf, SystemTime>,
/// Track unique set of candidates
candidates: FxHashSet<String>,
}
pub fn scan_dir(opts: ScanOptions) -> ScanResult {
init_tracing();
let (mut files, mut globs) = match opts.base {
Some(base) => {
// Only enable auto content detection when `base` is provided.
let base = Path::new(&base);
let (files, dirs) = resolve_files(base);
let globs = resolve_globs(base, dirs);
(files, globs)
impl Scanner {
pub fn new(detect_sources: Option<DetectSources>, sources: Option<Vec<GlobEntry>>) -> Self {
Self {
detect_sources,
sources,
..Default::default()
}
None => (vec![], vec![]),
};
}
// If we have additional sources, then we have to resolve them as well.
if !opts.sources.is_empty() {
let resolved_files: Vec<_> = match fast_glob(&opts.sources) {
pub fn scan(&mut self) -> Vec<String> {
init_tracing();
self.prepare();
self.compute_candidates();
let mut candidates: Vec<String> = self.candidates.clone().into_iter().collect();
candidates.sort();
candidates
}
#[tracing::instrument(skip_all)]
pub fn scan_content(&mut self, changed_content: Vec<ChangedContent>) -> Vec<String> {
self.prepare();
let candidates = parse_all_blobs(read_all_files(changed_content));
let mut new_candidates = vec![];
for candidate in candidates {
if self.candidates.contains(&candidate) {
continue;
}
self.candidates.insert(candidate.clone());
new_candidates.push(candidate);
}
new_candidates
}
#[tracing::instrument(skip_all)]
pub fn get_files(&mut self) -> Vec<String> {
self.prepare();
self.files
.iter()
.map(|x| x.to_string_lossy().into())
.collect()
}
#[tracing::instrument(skip_all)]
pub fn get_globs(&mut self) -> Vec<GlobEntry> {
self.prepare();
self.globs.clone()
}
#[tracing::instrument(skip_all)]
fn compute_candidates(&mut self) {
let mut changed_content = vec![];
for path in &self.files {
let current_time = fs::metadata(path)
.and_then(|m| m.modified())
.unwrap_or(SystemTime::now());
let previous_time = self.mtimes.insert(path.clone(), current_time);
let should_scan_file = match previous_time {
// Time has changed, so we need to re-scan the file
Some(prev) if prev != current_time => true,
// File was in the cache, no need to re-scan
Some(_) => false,
// File didn't exist before, so we need to scan it
None => true,
};
if should_scan_file {
changed_content.push(ChangedContent {
file: Some(path.clone()),
content: None,
});
}
}
if !changed_content.is_empty() {
let candidates = parse_all_blobs(read_all_files(changed_content));
self.candidates.extend(candidates);
}
}
// Ensures that all files/globs are resolved and the scanner is ready to scan
// content for candidates.
fn prepare(&mut self) {
if self.ready {
return;
}
self.detect_sources();
self.scan_sources();
self.ready = true;
}
#[tracing::instrument(skip_all)]
fn detect_sources(&mut self) {
if let Some(detect_sources) = &self.detect_sources {
let (files, globs) = detect_sources.detect();
self.files.extend(files);
self.globs.extend(globs);
}
}
#[tracing::instrument(skip_all)]
fn scan_sources(&mut self) {
let Some(sources) = &self.sources else {
return;
};
if sources.is_empty() {
return;
}
let resolved_files: Vec<_> = match fast_glob(sources) {
Ok(matches) => matches
.filter_map(|x| dunce::canonicalize(&x).ok())
.collect(),
@ -92,364 +219,36 @@ pub fn scan_dir(opts: ScanOptions) -> ScanResult {
}
};
files.extend(resolved_files);
self.files.extend(resolved_files);
self.globs.extend(sources.clone());
let optimized_incoming_globs = get_fast_patterns(&opts.sources)
.iter()
// Re-optimize the globs to reduce the number of patterns we have to scan.
self.globs = get_fast_patterns(&self.globs)
.into_iter()
.filter_map(|(root, globs)| {
let root = match dunce::canonicalize(root) {
Ok(root) => root,
Err(error) => {
event!(
tracing::Level::ERROR,
"Failed to canonicalize base path {:?}",
error
);
return None;
}
};
Some((root, globs))
})
.flat_map(|(root, globs)| {
globs.iter().filter_map(|glob| {
let root = match dunce::canonicalize(root.clone()) {
Ok(root) => root,
Err(error) => {
event!(
tracing::Level::ERROR,
"Failed to canonicalize base path {:?}",
error
);
return None;
}
};
let base = root.display().to_string();
let base = root.display().to_string();
let glob = glob.to_string();
Some(GlobEntry {
base,
pattern: glob,
})
globs.into_iter().map(move |glob| GlobEntry {
base: base.clone(),
pattern: glob,
})
})
.collect::<Vec<GlobEntry>>();
globs.extend(optimized_incoming_globs);
}
let mut cache = GLOBAL_CACHE.lock().unwrap();
let modified_files = cache.find_modified_files(&files);
let files = files.iter().map(|x| x.display().to_string()).collect();
if !modified_files.is_empty() {
let content: Vec<_> = modified_files
.into_iter()
.map(|file| ChangedContent {
file: Some(file.clone()),
content: None,
})
.collect();
let candidates = scan_files(content, IO::Parallel as u8 | Parsing::Parallel as u8);
cache.add_candidates(candidates);
}
ScanResult {
candidates: cache.get_candidates(),
files,
globs,
}
}
#[tracing::instrument(skip(root))]
fn resolve_globs(root: &Path, dirs: Vec<PathBuf>) -> Vec<GlobEntry> {
let allowed_paths = FxHashSet::from_iter(dirs);
// A list of directory names where we can't use globs, but we should track each file
// individually instead. This is because these directories are often used for both source and
// destination files.
let mut forced_static_directories = vec![root.join("public")];
// A list of known extensions + a list of extensions we found in the project.
let mut found_extensions = FxHashSet::from_iter(
include_str!("fixtures/template-extensions.txt")
.trim()
.lines()
.filter(|x| !x.starts_with('#')) // Drop commented lines
.filter(|x| !x.is_empty()) // Drop empty lines
.map(|x| x.to_string()),
);
// All root directories.
let mut root_directories = FxHashSet::from_iter(vec![root.to_path_buf()]);
// All directories where we can safely use deeply nested globs to watch all files.
// In other comments we refer to these as "deep glob directories" or similar.
//
// E.g.: `./src/**/*.{html,js}`
let mut deep_globable_directories: FxHashSet<PathBuf> = FxHashSet::default();
// All directories where we can only use shallow globs to watch all direct files but not
// folders.
// In other comments we refer to these as "shallow glob directories" or similar.
//
// E.g.: `./src/*/*.{html,js}`
let mut shallow_globable_directories: FxHashSet<PathBuf> = FxHashSet::default();
// Collect all valid paths from the root. This will already filter out ignored files, unknown
// extensions and binary files.
let mut it = WalkDir::new(root)
// Sorting to make sure that we always see the directories before the files. Also sorting
// alphabetically by default.
.sort_by(
|a, z| match (a.file_type().is_dir(), z.file_type().is_dir()) {
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
_ => a.file_name().cmp(z.file_name()),
},
)
.into_iter();
loop {
// We are only interested in valid entries
let entry = match it.next() {
Some(Ok(entry)) => entry,
_ => break,
};
// Ignore known directories that we don't want to traverse into.
if entry.file_type().is_dir() && entry.file_name() == ".git" {
it.skip_current_dir();
continue;
}
if entry.file_type().is_dir() {
// If we are in a directory where we know that we can't use any globs, then we have to
// track each file individually.
if forced_static_directories.contains(&entry.path().to_path_buf()) {
forced_static_directories.push(entry.path().to_path_buf());
root_directories.insert(entry.path().to_path_buf());
continue;
}
// If we are in a directory where the parent is a forced static directory, then this
// will become a forced static directory as well.
if forced_static_directories.contains(&entry.path().parent().unwrap().to_path_buf()) {
forced_static_directories.push(entry.path().to_path_buf());
root_directories.insert(entry.path().to_path_buf());
continue;
}
// If we are in a directory, and the directory is git ignored, then we don't have to
// descent into the directory. However, we have to make sure that we mark the _parent_
// directory as a shallow glob directory because using deep globs from any of the
// parent directories will include this ignored directory which should not be the case.
//
// Another important part is that if one of the ignored directories is a deep glob
// directory, then all of its parents (until the root) should be marked as shallow glob
// directories as well.
if !allowed_paths.contains(&entry.path().to_path_buf()) {
let mut parent = entry.path().parent();
while let Some(parent_path) = parent {
// If the parent is already marked as a valid deep glob directory, then we have
// to mark it as a shallow glob directory instead, because we won't be able to
// use deep globs for this directory anymore.
if deep_globable_directories.contains(parent_path) {
deep_globable_directories.remove(parent_path);
shallow_globable_directories.insert(parent_path.to_path_buf());
}
// If we reached the root, then we can stop.
if parent_path == root {
break;
}
// Mark the parent directory as a shallow glob directory and continue with its
// parent.
shallow_globable_directories.insert(parent_path.to_path_buf());
parent = parent_path.parent();
}
it.skip_current_dir();
continue;
}
// If we are in a directory that is not git ignored, then we can mark this directory as
// a valid deep glob directory. This is only necessary if any of its parents aren't
// marked as deep glob directories already.
let mut found_deep_glob_parent = false;
let mut parent = entry.path().parent();
while let Some(parent_path) = parent {
// If we reached the root, then we can stop.
if parent_path == root {
break;
}
// If the parent is already marked as a deep glob directory, then we can stop
// because this glob will match the current directory already.
if deep_globable_directories.contains(parent_path) {
found_deep_glob_parent = true;
break;
}
parent = parent_path.parent();
}
// If we didn't find a deep glob directory parent, then we can mark this directory as a
// deep glob directory (unless it is the root).
if !found_deep_glob_parent && entry.path() != root {
deep_globable_directories.insert(entry.path().to_path_buf());
}
}
// Handle allowed content paths
if is_allowed_content_path(entry.path())
&& allowed_paths.contains(&entry.path().to_path_buf())
{
let path = entry.path();
// Collect the extension for future use when building globs.
if let Some(extension) = path.extension().and_then(|x| x.to_str()) {
found_extensions.insert(extension.to_string());
}
}
}
let mut extension_list = found_extensions.into_iter().collect::<Vec<_>>();
extension_list.sort();
let extension_list = extension_list.join(",");
// Build the globs for all globable directories.
let shallow_globs = shallow_globable_directories.iter().map(|path| GlobEntry {
base: path.display().to_string(),
pattern: format!("*/*.{{{}}}", extension_list),
});
let deep_globs = deep_globable_directories.iter().map(|path| GlobEntry {
base: path.display().to_string(),
pattern: format!("**/*.{{{}}}", extension_list),
});
shallow_globs.chain(deep_globs).collect::<Vec<_>>()
}
#[tracing::instrument(skip(root))]
fn resolve_files(root: &Path) -> (Vec<PathBuf>, Vec<PathBuf>) {
let mut files: Vec<PathBuf> = vec![];
let mut dirs: Vec<PathBuf> = vec![];
for entry in resolve_allowed_paths(root) {
let Some(file_type) = entry.file_type() else {
continue;
};
if file_type.is_file() {
files.push(entry.into_path());
} else if file_type.is_dir() {
dirs.push(entry.into_path());
}
}
(files, dirs)
}
#[tracing::instrument(skip(root))]
pub fn resolve_allowed_paths(root: &Path) -> impl Iterator<Item = DirEntry> {
WalkBuilder::new(root)
.hidden(false)
.require_git(false)
.filter_entry(|entry| match entry.file_type() {
Some(file_type) if file_type.is_dir() => match entry.file_name().to_str() {
Some(dir) => !IGNORED_CONTENT_DIRS.contains(&dir),
None => false,
},
Some(file_type) if file_type.is_file() || file_type.is_symlink() => {
is_allowed_content_path(entry.path())
}
_ => false,
})
.build()
.filter_map(Result::ok)
}
lazy_static! {
static ref BINARY_EXTENSIONS: Vec<&'static str> =
include_str!("fixtures/binary-extensions.txt")
.trim()
.lines()
.collect::<Vec<_>>();
static ref IGNORED_EXTENSIONS: Vec<&'static str> =
include_str!("fixtures/ignored-extensions.txt")
.trim()
.lines()
.collect::<Vec<_>>();
static ref IGNORED_FILES: Vec<&'static str> = include_str!("fixtures/ignored-files.txt")
.trim()
.lines()
.collect::<Vec<_>>();
static ref IGNORED_CONTENT_DIRS: Vec<&'static str> = vec![".git"];
static ref SHOULD_TRACE: bool = {
matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || value.eq("1") || value.eq("true") || value.contains("tailwind"))
};
/// Track file modification times and cache candidates. This cache lives for the lifetime of
/// the process and simply adds candidates when files are modified. Since candidates aren't
/// removed, incremental builds may contain extra candidates.
static ref GLOBAL_CACHE: Mutex<Cache> = {
Mutex::new(Cache::default())
};
}
pub fn is_allowed_content_path(path: &Path) -> bool {
let path = PathBuf::from(path);
// Skip known ignored files
if path
.file_name()
.unwrap()
.to_str()
.map(|s| IGNORED_FILES.contains(&s))
.unwrap_or(false)
{
return false;
}
// Skip known ignored extensions
path.extension()
.map(|s| s.to_str().unwrap_or_default())
.map(|ext| !IGNORED_EXTENSIONS.contains(&ext) && !BINARY_EXTENSIONS.contains(&ext))
.unwrap_or(false)
}
#[derive(Debug)]
pub enum IO {
Sequential = 0b0001,
Parallel = 0b0010,
}
impl From<u8> for IO {
fn from(item: u8) -> Self {
match item & 0b0011 {
0b0001 => IO::Sequential,
0b0010 => IO::Parallel,
_ => unimplemented!("Unknown 'IO' strategy"),
}
}
}
#[derive(Debug)]
pub enum Parsing {
Sequential = 0b0100,
Parallel = 0b1000,
}
impl From<u8> for Parsing {
fn from(item: u8) -> Self {
match item & 0b1100 {
0b0100 => Parsing::Sequential,
0b1000 => Parsing::Parallel,
_ => unimplemented!("Unknown 'Parsing' strategy"),
}
}
}
#[tracing::instrument(skip(input, options))]
pub fn scan_files(input: Vec<ChangedContent>, options: u8) -> Vec<String> {
match (IO::from(options), Parsing::from(options)) {
(IO::Sequential, Parsing::Sequential) => parse_all_blobs_sync(read_all_files_sync(input)),
(IO::Sequential, Parsing::Parallel) => parse_all_blobs(read_all_files_sync(input)),
(IO::Parallel, Parsing::Sequential) => parse_all_blobs_sync(read_all_files(input)),
(IO::Parallel, Parsing::Parallel) => parse_all_blobs(read_all_files(input)),
}
}
@ -479,7 +278,7 @@ fn read_changed_content(c: ChangedContent) -> Option<Vec<u8>> {
}
}
#[tracing::instrument(skip(changed_content))]
#[tracing::instrument(skip_all)]
fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
event!(
tracing::Level::INFO,
@ -493,21 +292,7 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
.collect()
}
#[tracing::instrument(skip(changed_content))]
fn read_all_files_sync(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
event!(
tracing::Level::INFO,
"Reading {:?} file(s)",
changed_content.len()
);
changed_content
.into_iter()
.filter_map(read_changed_content)
.collect()
}
#[tracing::instrument(skip(blobs))]
#[tracing::instrument(skip_all)]
fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
let input = &input[..];
@ -530,27 +315,3 @@ fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
result.sort();
result
}
#[tracing::instrument(skip(blobs))]
fn parse_all_blobs_sync(blobs: Vec<Vec<u8>>) -> Vec<String> {
let input: Vec<_> = blobs.iter().map(|blob| &blob[..]).collect();
let input = &input[..];
let mut result: Vec<String> = input
.iter()
.map(|input| Extractor::unique(input, Default::default()))
.fold(FxHashSet::default(), |mut a, b| {
a.extend(b);
a
})
.into_iter()
.map(|s| {
// SAFETY: When we parsed the candidates, we already guaranteed that the byte slices
// are valid, therefore we don't have to re-check here when we want to convert it back
// to a string.
unsafe { String::from_utf8_unchecked(s.to_vec()) }
})
.collect();
result.sort();
result
}

View File

@ -0,0 +1,64 @@
use ignore::{DirEntry, WalkBuilder};
use std::{path::Path, sync};
static BINARY_EXTENSIONS: sync::LazyLock<Vec<&'static str>> = sync::LazyLock::new(|| {
include_str!("fixtures/binary-extensions.txt")
.trim()
.lines()
.collect()
});
static IGNORED_EXTENSIONS: sync::LazyLock<Vec<&'static str>> = sync::LazyLock::new(|| {
include_str!("fixtures/ignored-extensions.txt")
.trim()
.lines()
.collect()
});
static IGNORED_FILES: sync::LazyLock<Vec<&'static str>> = sync::LazyLock::new(|| {
include_str!("fixtures/ignored-files.txt")
.trim()
.lines()
.collect()
});
static IGNORED_CONTENT_DIRS: sync::LazyLock<Vec<&'static str>> =
sync::LazyLock::new(|| vec![".git"]);
#[tracing::instrument(skip(root))]
pub fn resolve_allowed_paths(root: &Path) -> impl Iterator<Item = DirEntry> {
WalkBuilder::new(root)
.hidden(false)
.require_git(false)
.filter_entry(|entry| match entry.file_type() {
Some(file_type) if file_type.is_dir() => match entry.file_name().to_str() {
Some(dir) => !IGNORED_CONTENT_DIRS.contains(&dir),
None => false,
},
Some(file_type) if file_type.is_file() || file_type.is_symlink() => {
is_allowed_content_path(entry.path())
}
_ => false,
})
.build()
.filter_map(Result::ok)
}
pub fn is_allowed_content_path(path: &Path) -> bool {
// Skip known ignored files
if path
.file_name()
.unwrap()
.to_str()
.map(|s| IGNORED_FILES.contains(&s))
.unwrap_or(false)
{
return false;
}
// Skip known ignored extensions
path.extension()
.map(|s| s.to_str().unwrap_or_default())
.map(|ext| !IGNORED_EXTENSIONS.contains(&ext) && !BINARY_EXTENSIONS.contains(&ext))
.unwrap_or(false)
}

View File

@ -0,0 +1,223 @@
use crate::scanner::allowed_paths::{is_allowed_content_path, resolve_allowed_paths};
use crate::GlobEntry;
use fxhash::FxHashSet;
use std::cmp::Ordering;
use std::path::PathBuf;
use std::sync;
use walkdir::WalkDir;
#[derive(Debug, Clone)]
pub struct DetectSources {
base: PathBuf,
}
static KNOWN_EXTENSIONS: sync::LazyLock<Vec<&'static str>> = sync::LazyLock::new(|| {
include_str!("fixtures/template-extensions.txt")
.trim()
.lines()
// Drop commented lines
.filter(|x| !x.starts_with('#'))
// Drop empty lines
.filter(|x| !x.is_empty())
.collect()
});
impl DetectSources {
pub fn new(base: PathBuf) -> Self {
Self { base }
}
pub fn detect(&self) -> (Vec<PathBuf>, Vec<GlobEntry>) {
let (files, dirs) = self.resolve_files();
let globs = self.resolve_globs(&dirs);
(files, globs)
}
fn resolve_files(&self) -> (Vec<PathBuf>, Vec<PathBuf>) {
let mut files: Vec<PathBuf> = vec![];
let mut dirs: Vec<PathBuf> = vec![];
for entry in resolve_allowed_paths(&self.base) {
let Some(file_type) = entry.file_type() else {
continue;
};
if file_type.is_file() {
files.push(entry.into_path());
} else if file_type.is_dir() {
dirs.push(entry.into_path());
}
}
(files, dirs)
}
fn resolve_globs(&self, dirs: &Vec<PathBuf>) -> Vec<GlobEntry> {
let allowed_paths = FxHashSet::from_iter(dirs);
// A list of directory names where we can't use globs, but we should track each file
// individually instead. This is because these directories are often used for both source and
// destination files.
let mut forced_static_directories = vec![self.base.join("public")];
// A list of known extensions + a list of extensions we found in the project.
let mut found_extensions =
FxHashSet::from_iter(KNOWN_EXTENSIONS.iter().map(|x| x.to_string()));
// All root directories.
let mut root_directories = FxHashSet::from_iter(vec![self.base.clone()]);
// All directories where we can safely use deeply nested globs to watch all files.
// In other comments we refer to these as "deep glob directories" or similar.
//
// E.g.: `./src/**/*.{html,js}`
let mut deep_globable_directories: FxHashSet<PathBuf> = FxHashSet::default();
// All directories where we can only use shallow globs to watch all direct files but not
// folders.
// In other comments we refer to these as "shallow glob directories" or similar.
//
// E.g.: `./src/*/*.{html,js}`
let mut shallow_globable_directories: FxHashSet<PathBuf> = FxHashSet::default();
// Collect all valid paths from the root. This will already filter out ignored files, unknown
// extensions and binary files.
let mut it = WalkDir::new(&self.base)
// Sorting to make sure that we always see the directories before the files. Also sorting
// alphabetically by default.
.sort_by(
|a, z| match (a.file_type().is_dir(), z.file_type().is_dir()) {
(true, false) => Ordering::Less,
(false, true) => Ordering::Greater,
_ => a.file_name().cmp(z.file_name()),
},
)
.into_iter();
loop {
// We are only interested in valid entries
let entry = match it.next() {
Some(Ok(entry)) => entry,
_ => break,
};
// Ignore known directories that we don't want to traverse into.
if entry.file_type().is_dir() && entry.file_name() == ".git" {
it.skip_current_dir();
continue;
}
if entry.file_type().is_dir() {
// If we are in a directory where we know that we can't use any globs, then we have to
// track each file individually.
if forced_static_directories.contains(&entry.path().to_path_buf()) {
forced_static_directories.push(entry.path().to_path_buf());
root_directories.insert(entry.path().to_path_buf());
continue;
}
// If we are in a directory where the parent is a forced static directory, then this
// will become a forced static directory as well.
if forced_static_directories.contains(&entry.path().parent().unwrap().to_path_buf())
{
forced_static_directories.push(entry.path().to_path_buf());
root_directories.insert(entry.path().to_path_buf());
continue;
}
// If we are in a directory, and the directory is git ignored, then we don't have to
// descent into the directory. However, we have to make sure that we mark the _parent_
// directory as a shallow glob directory because using deep globs from any of the
// parent directories will include this ignored directory which should not be the case.
//
// Another important part is that if one of the ignored directories is a deep glob
// directory, then all of its parents (until the root) should be marked as shallow glob
// directories as well.
if !allowed_paths.contains(&entry.path().to_path_buf()) {
let mut parent = entry.path().parent();
while let Some(parent_path) = parent {
// If the parent is already marked as a valid deep glob directory, then we have
// to mark it as a shallow glob directory instead, because we won't be able to
// use deep globs for this directory anymore.
if deep_globable_directories.contains(parent_path) {
deep_globable_directories.remove(parent_path);
shallow_globable_directories.insert(parent_path.to_path_buf());
}
// If we reached the root, then we can stop.
if parent_path == self.base {
break;
}
// Mark the parent directory as a shallow glob directory and continue with its
// parent.
shallow_globable_directories.insert(parent_path.to_path_buf());
parent = parent_path.parent();
}
it.skip_current_dir();
continue;
}
// If we are in a directory that is not git ignored, then we can mark this directory as
// a valid deep glob directory. This is only necessary if any of its parents aren't
// marked as deep glob directories already.
let mut found_deep_glob_parent = false;
let mut parent = entry.path().parent();
while let Some(parent_path) = parent {
// If we reached the root, then we can stop.
if parent_path == self.base {
break;
}
// If the parent is already marked as a deep glob directory, then we can stop
// because this glob will match the current directory already.
if deep_globable_directories.contains(parent_path) {
found_deep_glob_parent = true;
break;
}
parent = parent_path.parent();
}
// If we didn't find a deep glob directory parent, then we can mark this directory as a
// deep glob directory (unless it is the root).
if !found_deep_glob_parent && entry.path() != self.base {
deep_globable_directories.insert(entry.path().to_path_buf());
}
}
// Handle allowed content paths
if is_allowed_content_path(entry.path())
&& allowed_paths.contains(&entry.path().to_path_buf())
{
let path = entry.path();
// Collect the extension for future use when building globs.
if let Some(extension) = path.extension().and_then(|x| x.to_str()) {
found_extensions.insert(extension.to_string());
}
}
}
let mut extension_list = found_extensions.into_iter().collect::<Vec<_>>();
extension_list.sort();
let extension_list = extension_list.join(",");
// Build the globs for all globable directories.
let shallow_globs = shallow_globable_directories.iter().map(|path| GlobEntry {
base: path.display().to_string(),
pattern: format!("*/*.{{{}}}", extension_list),
});
let deep_globs = deep_globable_directories.iter().map(|path| GlobEntry {
base: path.display().to_string(),
pattern: format!("**/*.{{{}}}", extension_list),
});
shallow_globs.chain(deep_globs).collect::<Vec<_>>()
}
}

View File

@ -0,0 +1,2 @@
pub mod allowed_paths;
pub mod detect_sources;

View File

@ -1,6 +1,6 @@
#[cfg(test)]
mod scan_dir {
use serial_test::serial;
mod scanner {
use scanner::detect_sources::DetectSources;
use std::process::Command;
use std::{fs, path};
@ -11,9 +11,6 @@ mod scan_dir {
paths_with_content: &[(&str, Option<&str>)],
globs: Vec<&str>,
) -> (Vec<String>, Vec<String>) {
// Ensure that every test truly runs in isolation without any cache
clear_cache();
// Create a temporary working directory
let dir = tempdir().unwrap().into_path();
@ -38,24 +35,28 @@ mod scan_dir {
let base = format!("{}", dir.display());
// Resolve all content paths for the (temporary) current working directory
let result = scan_dir(ScanOptions {
base: Some(base.clone()),
sources: globs
.iter()
.map(|x| GlobEntry {
base: base.clone(),
pattern: x.to_string(),
})
.collect(),
});
let mut scanner = Scanner::new(
Some(DetectSources::new(base.clone().into())),
Some(
globs
.iter()
.map(|x| GlobEntry {
base: base.clone(),
pattern: x.to_string(),
})
.collect(),
),
);
let mut paths: Vec<_> = result
.files
let candidates = scanner.scan();
let mut paths: Vec<_> = scanner
.get_files()
.into_iter()
.map(|x| x.replace(&format!("{}{}", &base, path::MAIN_SEPARATOR), ""))
.collect();
for glob in result.globs {
for glob in scanner.get_globs() {
paths.push(format!(
"{}{}{}",
glob.base,
@ -78,7 +79,7 @@ mod scan_dir {
// _could_ be random)
paths.sort();
(paths, result.candidates)
(paths, candidates)
}
fn scan(paths_with_content: &[(&str, Option<&str>)]) -> (Vec<String>, Vec<String>) {
@ -90,7 +91,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_work_with_a_set_of_root_files() {
let globs = test(&[
("index.html", None),
@ -102,7 +102,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_work_with_a_set_of_root_files_and_ignore_ignored_files() {
let globs = test(&[
(".gitignore", Some("b.html")),
@ -115,7 +114,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_list_all_files_in_the_public_folder_explicitly() {
let globs = test(&[
("index.html", None),
@ -135,7 +133,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_list_nested_folders_explicitly_in_the_public_folder() {
let globs = test(&[
("index.html", None),
@ -165,7 +162,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_list_all_files_in_the_public_folder_explicitly_except_ignored_files() {
let globs = test(&[
(".gitignore", Some("public/b.html\na.html")),
@ -178,7 +174,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_use_a_glob_for_top_level_folders() {
let globs = test(&[
("index.html", None),
@ -196,7 +191,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_ignore_binary_files() {
let globs = test(&[
("index.html", None),
@ -208,7 +202,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_ignore_known_extensions() {
let globs = test(&[
("index.html", None),
@ -220,7 +213,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_ignore_known_files() {
let globs = test(&[
("index.html", None),
@ -231,7 +223,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_ignore_and_expand_nested_ignored_folders() {
let globs = test(&[
// Explicitly listed root files
@ -318,7 +309,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_scan_for_utilities() {
let mut ignores = String::new();
ignores.push_str("# md:font-bold\n");
@ -345,7 +335,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_scan_content_paths() {
let candidates = scan_with_globs(
&[
@ -361,7 +350,6 @@ mod scan_dir {
}
#[test]
#[serial]
fn it_should_scan_content_paths_even_when_they_are_git_ignored() {
let candidates = scan_with_globs(
&[

View File

@ -1,5 +1,5 @@
import watcher from '@parcel/watcher'
import { clearCache, scanDir, type ChangedContent } from '@tailwindcss/oxide'
import { Scanner, type ChangedContent } from '@tailwindcss/oxide'
import fixRelativePathsPlugin from 'internal-postcss-fix-relative-paths'
import { Features, transform } from 'lightningcss'
import { existsSync } from 'node:fs'
@ -145,8 +145,8 @@ export async function handle(args: Result<ReturnType<typeof options>>) {
// Compile the input
let compiler = await compile(input)
let scanDirResult = scanDir({
base, // Root directory, mainly used for auto content detection
let scanner = new Scanner({
detectSources: { base },
sources: compiler.globs.map((pattern) => ({
base: inputBasePath, // Globs are relative to the input.css file
pattern,
@ -156,7 +156,7 @@ export async function handle(args: Result<ReturnType<typeof options>>) {
// Watch for changes
if (args['--watch']) {
let cleanupWatchers = await createWatchers(
watchDirectories(base, scanDirResult),
watchDirectories(base, scanner),
async function handle(files) {
try {
// If the only change happened to the output file, then we don't want to
@ -195,9 +195,6 @@ export async function handle(args: Result<ReturnType<typeof options>>) {
// Clear all watchers
cleanupWatchers()
// Clear cached candidates
clearCache()
// Collect the new `input` and `cssImportPaths`.
;[input, cssImportPaths] = await handleImports(
args['--input']
@ -212,30 +209,33 @@ export async function handle(args: Result<ReturnType<typeof options>>) {
compiler = await compile(input)
// Re-scan the directory to get the new `candidates`
scanDirResult = scanDir({
base, // Root directory, mainly used for auto content detection
scanner = new Scanner({
detectSources: { base },
sources: compiler.globs.map((pattern) => ({
base: inputBasePath, // Globs are relative to the input.css file
pattern,
})),
})
// Scan the directory for candidates
let candidates = scanner.scan()
// Setup new watchers
cleanupWatchers = await createWatchers(watchDirectories(base, scanDirResult), handle)
cleanupWatchers = await createWatchers(watchDirectories(base, scanner), handle)
// Re-compile the CSS
compiledCss = compiler.build(scanDirResult.candidates)
compiledCss = compiler.build(candidates)
}
// Scan changed files only for incremental rebuilds.
else if (rebuildStrategy === 'incremental') {
let candidates = scanDirResult.scanFiles(changedFiles)
let newCandidates = scanner.scanFiles(changedFiles)
// No candidates found which means we don't need to rebuild. This can
// happen if a file is detected but doesn't match any of the globs.
if (candidates.length === 0) return
if (newCandidates.length <= 0) return
compiledCss = compiler.build(candidates)
compiledCss = compiler.build(newCandidates)
}
await write(compiledCss, args)
@ -265,7 +265,7 @@ export async function handle(args: Result<ReturnType<typeof options>>) {
process.stdin.resume()
}
await write(compiler.build(scanDirResult.candidates), args)
await write(compiler.build(scanner.scan()), args)
let end = process.hrtime.bigint()
eprintln(header())
@ -273,9 +273,9 @@ export async function handle(args: Result<ReturnType<typeof options>>) {
eprintln(`Done in ${formatDuration(end - start)}`)
}
function watchDirectories(base: string, scanDirResult: ReturnType<typeof scanDir>) {
function watchDirectories(base: string, scanner: Scanner) {
return [base].concat(
scanDirResult.globs.flatMap((globEntry) => {
scanner.globs.flatMap((globEntry) => {
// We don't want a watcher for negated globs.
if (globEntry.pattern[0] === '!') return []

View File

@ -13,11 +13,6 @@ const INPUT_CSS_PATH = `${__dirname}/fixtures/example-project/input.css`
const css = String.raw
beforeEach(async () => {
let { clearCache } = await import('@tailwindcss/oxide')
clearCache()
})
test("`@import 'tailwindcss'` is replaced with the generated CSS", async () => {
let processor = postcss([
tailwindcss({ base: `${__dirname}/fixtures/example-project`, optimize: { minify: false } }),

View File

@ -1,4 +1,4 @@
import { scanDir } from '@tailwindcss/oxide'
import { Scanner } from '@tailwindcss/oxide'
import fs from 'fs'
import fixRelativePathsPlugin from 'internal-postcss-fix-relative-paths'
import { Features, transform } from 'lightningcss'
@ -129,16 +129,19 @@ function tailwindcss(opts: PluginOptions = {}): AcceptedPlugin {
let css = ''
// Look for candidates used to generate the CSS
let scanDirResult = scanDir({
base, // Root directory, mainly used for auto content detection
let scanner = new Scanner({
detectSources: { base },
sources: context.compiler.globs.map((pattern) => ({
base: inputBasePath, // Globs are relative to the input.css file
pattern,
})),
})
//
let candidates = scanner.scan()
// Add all found files as direct dependencies
for (let file of scanDirResult.files) {
for (let file of scanner.files) {
result.messages.push({
type: 'dependency',
plugin: '@tailwindcss/postcss',
@ -150,7 +153,7 @@ function tailwindcss(opts: PluginOptions = {}): AcceptedPlugin {
// Register dependencies so changes in `base` cause a rebuild while
// giving tools like Vite or Parcel a glob that can be used to limit
// the files that cause a rebuild to only those that match it.
for (let { base, pattern } of scanDirResult.globs) {
for (let { base, pattern } of scanner.globs) {
result.messages.push({
type: 'dir-dependency',
plugin: '@tailwindcss/postcss',
@ -162,9 +165,9 @@ function tailwindcss(opts: PluginOptions = {}): AcceptedPlugin {
if (rebuildStrategy === 'full') {
context.compiler = await createCompiler()
css = context.compiler.build(hasTailwind ? scanDirResult.candidates : [])
css = context.compiler.build(hasTailwind ? candidates : [])
} else if (rebuildStrategy === 'incremental') {
css = context.compiler.build!(scanDirResult.candidates)
css = context.compiler.build!(candidates)
}
// Replace CSS

View File

@ -1,4 +1,4 @@
import { scanDir } from '@tailwindcss/oxide'
import { Scanner } from '@tailwindcss/oxide'
import fixRelativePathsPlugin, { normalizePath } from 'internal-postcss-fix-relative-paths'
import { Features, transform } from 'lightningcss'
import path from 'path'
@ -9,9 +9,9 @@ import type { Plugin, ResolvedConfig, Rollup, Update, ViteDevServer } from 'vite
export default function tailwindcss(): Plugin[] {
let server: ViteDevServer | null = null
let config: ResolvedConfig | null = null
let candidates = new Set<string>()
let scanDirResult: ReturnType<typeof scanDir> | null = null
let scanner: Scanner | null = null
let changedContent: { content: string; extension: string }[] = []
let candidates: string[] = []
// In serve mode this is treated as a set — the content doesn't matter.
// In build mode, we store file contents to use them in renderChunk.
@ -63,21 +63,18 @@ export default function tailwindcss(): Plugin[] {
function scan(src: string, extension: string) {
let updated = false
if (scanDirResult === null) {
if (scanner === null) {
changedContent.push({ content: src, extension })
return updated
}
// Parse all candidates given the resolved files
for (let candidate of scanDirResult?.scanFiles([{ content: src, extension }]) ?? []) {
// On an initial or full build, updated becomes true immediately so we
// won't be making extra checks.
if (!updated) {
if (candidates.has(candidate)) continue
updated = true
}
candidates.add(candidate)
let newCandidates = scanner.scanFiles([{ content: src, extension }])
for (let candidate of newCandidates) {
updated = true
candidates.push(candidate)
}
return updated
}
@ -93,28 +90,31 @@ export default function tailwindcss(): Plugin[] {
},
})
scanDirResult = scanDir({
scanner = new Scanner({
sources: globs.map((pattern) => ({
base: inputBasePath, // Globs are relative to the input.css file
pattern,
})),
})
if (changedContent.length > 0) {
scanDirResult.candidates = scanDirResult.scanFiles(changedContent.splice(0))
}
// This should not be here, but right now the Vite plugin is setup where we
// setup a new scanner and compiler every time we request the CSS file
// (regardless whether it actually changed or not).
let initialCandidates = scanner.scan()
for (let candidate of scanDirResult.candidates) {
candidates.add(candidate)
if (changedContent.length > 0) {
for (let candidate of scanner.scanFiles(changedContent.splice(0))) {
initialCandidates.push(candidate)
}
}
// Watch individual files
for (let file of scanDirResult.files) {
for (let file of scanner.files) {
addWatchFile(file)
}
// Watch globs
for (let glob of scanDirResult.globs) {
for (let glob of scanner.globs) {
if (glob.pattern[0] === '!') continue
let relative = path.relative(config!.root, glob.base)
@ -128,7 +128,7 @@ export default function tailwindcss(): Plugin[] {
addWatchFile(path.posix.join(relative, glob.pattern))
}
return build(Array.from(candidates))
return build(candidates.splice(0).concat(initialCandidates))
}
async function generateOptimizedCss(

View File

@ -1,4 +1,4 @@
import { scanDir } from '@tailwindcss/oxide'
import { Scanner } from '@tailwindcss/oxide'
import { bench } from 'vitest'
import { parseCandidate } from './candidate'
import { buildDesignSystem } from './design-system'
@ -8,12 +8,13 @@ import { Theme } from './theme'
const root = process.env.FOLDER || process.cwd()
// Auto content detection
const result = scanDir({ base: root })
const scanner = new Scanner({ detectSources: { base: root } })
const candidates = scanner.scan()
const designSystem = buildDesignSystem(new Theme())
bench('parseCandidate', () => {
for (let candidate of result.candidates) {
for (let candidate of candidates) {
parseCandidate(candidate, designSystem)
}
})

View File

@ -1,4 +1,4 @@
import { scanDir } from '@tailwindcss/oxide'
import { Scanner } from '@tailwindcss/oxide'
import { bench } from 'vitest'
import { compile } from '.'
@ -7,10 +7,12 @@ const root = process.env.FOLDER || process.cwd()
const css = String.raw
bench('compile', async () => {
let { candidates } = scanDir({ base: root })
let scanner = new Scanner({ detectSources: { base: root } })
let candidates = scanner.scan()
let { build } = await compile(css`
@tailwind utilities;
`)
build(candidates)
})

View File

@ -1,5 +1,5 @@
import { expect, test, type Page } from '@playwright/test'
import { IO, Parsing, scanFiles } from '@tailwindcss/oxide'
import { Scanner } from '@tailwindcss/oxide'
import fs from 'fs'
import path from 'path'
import { compile } from '../src'
@ -291,6 +291,7 @@ test('content-none persists when conditionally styling a pseudo-element', async
const preflight = fs.readFileSync(path.resolve(__dirname, '..', 'preflight.css'), 'utf-8')
const defaultTheme = fs.readFileSync(path.resolve(__dirname, '..', 'theme.css'), 'utf-8')
async function render(page: Page, content: string) {
let { build } = await compile(css`
@layer theme, base, components, utilities;
@ -314,10 +315,12 @@ async function render(page: Page, content: string) {
content = `<div id="mouse-park" class="size-12"></div>${content}`
await page.setContent(content)
let scanner = new Scanner({})
let candidates = scanner.scanFiles([{ content, extension: 'html' }])
await page.addStyleTag({
content: optimizeCss(
build(scanFiles([{ content, extension: 'html' }], IO.Sequential | Parsing.Sequential)),
),
content: optimizeCss(build(candidates)),
})
await page.locator('#mouse-park').hover()