diff --git a/.github/workflows/tests-linux.yml b/.github/workflows/tests-linux.yml index 06a6ed1db..5df9071f8 100644 --- a/.github/workflows/tests-linux.yml +++ b/.github/workflows/tests-linux.yml @@ -70,12 +70,13 @@ jobs: services: mssql: - image: "mcr.microsoft.com/mssql/server:2022-latest" + image: "mcr.microsoft.com/mssql/server:2025-latest" ports: - "1433:1433" env: SA_PASSWORD: "Admin12345" ACCEPT_EULA: "Y" + MSSQL_PID: "Express" steps: - uses: actions/checkout@v5 diff --git a/docker-compose.yml b/docker-compose.yml index e4c775515..1660d2b3e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,13 +73,14 @@ services: # mssql mssql: - image: "mcr.microsoft.com/mssql/server:2022-latest" + image: "mcr.microsoft.com/mssql/server:2025-latest" container_name: "typeorm-mssql" ports: - "1433:1433" environment: SA_PASSWORD: "Admin12345" ACCEPT_EULA: "Y" + MSSQL_PID: "Express" # cockroachdb cockroachdb: diff --git a/docs/docs/drivers/microsoft-sqlserver.md b/docs/docs/drivers/microsoft-sqlserver.md index 9f3962111..c9304944d 100644 --- a/docs/docs/drivers/microsoft-sqlserver.md +++ b/docs/docs/drivers/microsoft-sqlserver.md @@ -165,4 +165,64 @@ Based on [tedious](https://tediousjs.github.io/node-mssql/) MSSQL implementation ## Column Types -`int`, `bigint`, `bit`, `decimal`, `money`, `numeric`, `smallint`, `smallmoney`, `tinyint`, `float`, `real`, `date`, `datetime2`, `datetime`, `datetimeoffset`, `smalldatetime`, `time`, `char`, `varchar`, `text`, `nchar`, `nvarchar`, `ntext`, `binary`, `image`, `varbinary`, `hierarchyid`, `sql_variant`, `timestamp`, `uniqueidentifier`, `xml`, `geometry`, `geography`, `rowversion` +`int`, `bigint`, `bit`, `decimal`, `money`, `numeric`, `smallint`, `smallmoney`, `tinyint`, `float`, `real`, `date`, `datetime2`, `datetime`, `datetimeoffset`, `smalldatetime`, `time`, `char`, `varchar`, `text`, `nchar`, `nvarchar`, `ntext`, `binary`, `image`, `varbinary`, `hierarchyid`, `sql_variant`, `timestamp`, `uniqueidentifier`, `xml`, `geometry`, `geography`, `rowversion`, `vector` + +### Vector Type (vector) + +The `vector` data type is available in SQL Server for storing high-dimensional vectors, commonly used for: + +- Semantic search with embeddings +- Recommendation systems +- Similarity matching +- Machine learning applications + +NOTE: general `halfvec` type support is unavailable because this feature is still in preview. See the Microsoft docs: [Vector data type](https://learn.microsoft.com/en-us/sql/t-sql/data-types/vector-data-type). + +#### Usage + +```typescript +@Entity() +export class DocumentChunk { + @PrimaryGeneratedColumn() + id: number + + @Column("varchar") + content: string + + // Vector column with 1998 dimensions + @Column("vector", { length: 1998 }) + embedding: number[] +} +``` + +#### Vector Similarity Search + +SQL Server provides the `VECTOR_DISTANCE` function for calculating distances between vectors: + +```typescript +const queryEmbedding = [ + /* your query vector */ +] + +const results = await dataSource.query( + ` + DECLARE @question AS VECTOR (1998) = @0; + SELECT TOP (10) dc.*, + VECTOR_DISTANCE('cosine', @question, embedding) AS distance + FROM document_chunk dc + ORDER BY VECTOR_DISTANCE('cosine', @question, embedding) +`, + [JSON.stringify(queryEmbedding)], +) +``` + +**Distance Metrics:** + +- `'cosine'` - Cosine distance (most common for semantic search) +- `'euclidean'` - Euclidean (L2) distance +- `'dot'` - Negative dot product + +**Requirements:** + +- SQL Server version with vector support enabled +- Vector dimensions must be specified using the `length` option diff --git a/docs/docs/entity/1-entities.md b/docs/docs/entity/1-entities.md index 2134c7003..290dfef3f 100644 --- a/docs/docs/entity/1-entities.md +++ b/docs/docs/entity/1-entities.md @@ -182,16 +182,17 @@ There are several special column types with additional functionality available: ### Vector columns -Vector columns are supported on both PostgreSQL (via [`pgvector`](https://github.com/pgvector/pgvector) extension) and SAP HANA Cloud, enabling storing and querying vector embeddings for similarity search and machine learning applications. +Vector columns are supported on PostgreSQL (via [`pgvector`](https://github.com/pgvector/pgvector) extension), Microsoft SQL Server, and SAP HANA Cloud, enabling storing and querying vector embeddings for similarity search and machine learning applications. TypeORM supports both `vector` and `halfvec` column types across databases: -- `vector` - stores vectors as 4-byte floats (single precision) - - PostgreSQL: native `vector` type via pgvector extension - - SAP HANA: alias for `real_vector` type -- `halfvec` - stores vectors as 2-byte floats (half precision) for memory efficiency - - PostgreSQL: native `halfvec` type via pgvector extension - - SAP HANA: alias for `half_vector` type +- `vector` - stores vectors as 4-byte floats (single precision) +- PostgreSQL: native `vector` type via pgvector extension +- SQL Server: native `vector` type +- SAP HANA: alias for `real_vector` type +- `halfvec` - stores vectors as 2-byte floats (half precision) for memory efficiency +- PostgreSQL: native `halfvec` type via pgvector extension +- SAP HANA: alias for `half_vector` type You can specify the vector dimensions using the `length` option: @@ -201,45 +202,65 @@ export class Post { @PrimaryGeneratedColumn() id: number - // Vector without specified dimensions (works on PostgreSQL and SAP HANA) + // Vector without specified dimensions (works on PostgreSQL and SAP HANA; SQL Server requires explicit dimensions) @Column("vector") embedding: number[] | Buffer - // Vector with 3 dimensions: vector(3) (works on PostgreSQL and SAP HANA) + // Vector with 3 dimensions: vector(3) @Column("vector", { length: 3 }) embedding_3d: number[] | Buffer - // Half-precision vector with 4 dimensions: halfvec(4) (works on PostgreSQL and SAP HANA) + // Half-precision vector with 4 dimensions: halfvec(4) (PostgreSQL and SAP HANA only) @Column("halfvec", { length: 4 }) halfvec_embedding: number[] | Buffer } ``` -Vector columns can be used for similarity searches using PostgreSQL's vector operators: +**PostgreSQL** - Vector columns can be used for similarity searches using vector operators: ```typescript // L2 distance (Euclidean) - <-> const results = await dataSource.query( `SELECT id, embedding FROM post ORDER BY embedding <-> $1 LIMIT 5`, - ["[1,2,3]"] + ["[1,2,3]"], ) // Cosine distance - <=> const results = await dataSource.query( `SELECT id, embedding FROM post ORDER BY embedding <=> $1 LIMIT 5`, - ["[1,2,3]"] + ["[1,2,3]"], ) // Inner product - <#> const results = await dataSource.query( `SELECT id, embedding FROM post ORDER BY embedding <#> $1 LIMIT 5`, - ["[1,2,3]"] + ["[1,2,3]"], ) ``` -> **Note**: -> - **PostgreSQL**: Vector columns require the `pgvector` extension to be installed. The extension provides the vector data types and similarity operators. -> - **SAP HANA**: Vector columns require SAP HANA Cloud (2024Q1+) and a supported version of `@sap/hana-client`. Use the appropriate [vector similarity functions](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/vector-functions) for similarity searches. +**SQL Server** - Use the `VECTOR_DISTANCE` function for similarity searches: + +```typescript +const queryEmbedding = [1, 2, 3] + +// Cosine distance +const results = await dataSource.query( + ` + DECLARE @question AS VECTOR(3) = @0; + SELECT TOP (5) id, embedding, + VECTOR_DISTANCE('cosine', @question, embedding) AS distance + FROM post + ORDER BY VECTOR_DISTANCE('cosine', @question, embedding) +`, + [JSON.stringify(queryEmbedding)], +) +``` + +> **Note**: +> +> - **PostgreSQL**: Vector columns require the `pgvector` extension to be installed. The extension provides the vector data types and similarity operators. +> - **SQL Server**: Vector type support requires a compatible SQL Server version with vector functionality enabled. +> - **SAP HANA**: Vector columns require SAP HANA Cloud (2024Q1+) and a supported version of `@sap/hana-client`. Use the appropriate [vector similarity functions](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/vector-functions) for similarity searches. ## Column types diff --git a/sample/sample37-vector-sqlserver/README.md b/sample/sample37-vector-sqlserver/README.md new file mode 100644 index 000000000..88b0bd0a6 --- /dev/null +++ b/sample/sample37-vector-sqlserver/README.md @@ -0,0 +1,96 @@ +# Vector Type Support in SQL Server + +This sample demonstrates how to use the `vector` column type in SQL Server with TypeORM for storing and querying vector embeddings. + +## Overview + +SQL Server supports the `vector` data type for storing high-dimensional vectors, which is useful for: + +- Semantic search with embeddings +- Recommendation systems +- Similarity matching +- Machine learning applications + +## Features Demonstrated + +1. **Vector Column Definition**: Define columns with specific vector dimensions +2. **Storing Embeddings**: Save vector data as arrays of numbers +3. **Vector Similarity Search**: Use `VECTOR_DISTANCE` function for cosine similarity + +## Entity Definition + +```typescript +@Entity("document_chunks") +export class DocumentChunk { + @PrimaryGeneratedColumn("uuid") + id: string + + @Column("varchar", { length: "MAX" }) + content: string + + // Vector column with 1998 dimensions + @Column("vector", { length: 1998 }) + embedding: number[] + + @Column("uuid") + documentId: string + + @ManyToOne(() => Document, (document) => document.chunks) + @JoinColumn({ name: "documentId" }) + document: Document +} +``` + +## Vector Similarity Search + +SQL Server provides the `VECTOR_DISTANCE` function for calculating distances between vectors: + +```typescript +const queryEmbedding = [ + /* your query vector */ +] +const documentIds = ["doc-id-1", "doc-id-2"] + +const results = await connection.query( + ` + DECLARE @question AS VECTOR (1998) = @0; + SELECT TOP (10) dc.*, + VECTOR_DISTANCE('cosine', @question, embedding) AS distance, + d.fileName as "documentName" + FROM document_chunks dc + LEFT JOIN documents d ON dc.documentId = d.id + WHERE documentId IN (@1)) + ORDER BY VECTOR_DISTANCE('cosine', @question, embedding) +`, + [JSON.stringify(queryEmbedding), documentIds.join(", ")], +) +``` + +## Distance Metrics + +The `VECTOR_DISTANCE` function supports different distance metrics: + +- `'cosine'` - Cosine distance (most common for semantic search) +- `'euclidean'` - Euclidean (L2) distance +- `'dot'` - Negative dot product + +## Requirements + +- SQL Server with vector support enabled +- TypeORM with SQL Server driver (`mssql` package) + +## Running the Sample + +1. Make sure you have SQL Server running with vector support +2. Update the connection settings in `app.ts` if needed +3. Run: + ```bash + npm install + ts-node app.ts + ``` + +## Notes + +- Vector dimensions must be specified using the `length` option +- Embeddings are stored as JSON strings internally and converted to/from arrays automatically +- The maximum vector dimension depends on your SQL Server version and configuration diff --git a/sample/sample37-vector-sqlserver/app.ts b/sample/sample37-vector-sqlserver/app.ts new file mode 100644 index 000000000..fdadc64c2 --- /dev/null +++ b/sample/sample37-vector-sqlserver/app.ts @@ -0,0 +1,88 @@ +import "reflect-metadata" +import { DataSource } from "../../src/index" +import { DocumentChunk } from "./entity/DocumentChunk" +import { Document } from "./entity/Document" + +const AppDataSource = new DataSource({ + type: "mssql", + host: "localhost", + username: "sa", + password: "Admin12345", + database: "test", + synchronize: true, + dropSchema: true, + logging: false, + entities: [Document, DocumentChunk], + options: { + // Enable trust server certificate for local development + trustServerCertificate: true, + }, +}) + +AppDataSource.initialize() + .then(async (connection) => { + console.log("Inserting documents and chunks with vector embeddings...") + + // Create a document + const document = new Document() + document.fileName = "sample-document.txt" + await connection.manager.save(document) + + // Generate sample embeddings (in a real app, these would come from an ML model) + const generateEmbedding = (dimension: number): number[] => { + return Array.from({ length: dimension }, () => Math.random()) + } + + // Create document chunks with embeddings + const chunk1 = new DocumentChunk() + chunk1.content = + "TypeORM is an ORM that can run in NodeJS and can be used with TypeScript and JavaScript." + chunk1.embedding = generateEmbedding(1998) + chunk1.document = document + + const chunk2 = new DocumentChunk() + chunk2.content = + "It supports both Active Record and Data Mapper patterns." + chunk2.embedding = generateEmbedding(1998) + chunk2.document = document + + const chunk3 = new DocumentChunk() + chunk3.content = + "TypeORM supports MySQL, PostgreSQL, MariaDB, SQLite, MS SQL Server, Oracle, and more." + chunk3.embedding = generateEmbedding(1998) + chunk3.document = document + + await connection.manager.save([chunk1, chunk2, chunk3]) + + console.log("Documents and chunks have been saved!") + + // Perform a vector similarity search + console.log("\nPerforming vector similarity search...") + + // Query embedding (in a real app, this would be generated from user query) + const queryEmbedding = generateEmbedding(1998) + const documentIds = [document.id] + + const docIdParams = documentIds.map((_, i) => `@${i + 1}`).join(", ") + const results = await connection.query( + ` + DECLARE @question AS VECTOR (1998) = @0; + SELECT TOP (3) dc.*, VECTOR_DISTANCE('cosine', @question, embedding) AS distance, d.fileName as "documentName" + FROM document_chunks dc + LEFT JOIN documents d ON dc.documentId = d.id + WHERE documentId IN (${docIdParams}) + ORDER BY VECTOR_DISTANCE('cosine', @question, embedding) + `, + [JSON.stringify(queryEmbedding), ...documentIds], + ) + + console.log("Search results (top 3 most similar chunks):") + results.forEach((result: any, index: number) => { + console.log(`\n${index + 1}. Distance: ${result.distance}`) + console.log(` Content: ${result.content.substring(0, 80)}...`) + console.log(` Document: ${result.documentName}`) + }) + + await connection.destroy() + }) + .catch((error) => console.log(error)) diff --git a/sample/sample37-vector-sqlserver/entity/Document.ts b/sample/sample37-vector-sqlserver/entity/Document.ts new file mode 100644 index 000000000..da4c652f6 --- /dev/null +++ b/sample/sample37-vector-sqlserver/entity/Document.ts @@ -0,0 +1,19 @@ +import { + Entity, + PrimaryGeneratedColumn, + Column, + OneToMany, +} from "../../../src/index" +import { DocumentChunk } from "./DocumentChunk" + +@Entity("documents") +export class Document { + @PrimaryGeneratedColumn("uuid") + id: string + + @Column("varchar") + fileName: string + + @OneToMany(() => DocumentChunk, (chunk) => chunk.document) + chunks: DocumentChunk[] +} diff --git a/sample/sample37-vector-sqlserver/entity/DocumentChunk.ts b/sample/sample37-vector-sqlserver/entity/DocumentChunk.ts new file mode 100644 index 000000000..13b8926f5 --- /dev/null +++ b/sample/sample37-vector-sqlserver/entity/DocumentChunk.ts @@ -0,0 +1,27 @@ +import { + Entity, + PrimaryGeneratedColumn, + Column, + ManyToOne, + JoinColumn, +} from "../../../src/index" +import { Document } from "./Document" + +@Entity("document_chunks") +export class DocumentChunk { + @PrimaryGeneratedColumn("uuid") + id: string + + @Column("varchar", { length: "MAX" }) + content: string + + @Column("vector", { length: 1998 }) + embedding: number[] + + @Column("uuid") + documentId: string + + @ManyToOne(() => Document, (document) => document.chunks) + @JoinColumn({ name: "documentId" }) + document: Document +} diff --git a/sample/sample37-vector-sqlserver/package.json b/sample/sample37-vector-sqlserver/package.json new file mode 100644 index 000000000..139a90a1f --- /dev/null +++ b/sample/sample37-vector-sqlserver/package.json @@ -0,0 +1,19 @@ +{ + "name": "sample37-vector-sqlserver", + "version": "0.0.1", + "description": "Sample demonstrating vector type support in SQL Server with TypeORM", + "main": "app.ts", + "scripts": { + "start": "ts-node app.ts" + }, + "dependencies": { + "typeorm": "latest", + "mssql": "^11.0.0", + "reflect-metadata": "^0.2.2" + }, + "devDependencies": { + "@types/node": "^22.0.0", + "ts-node": "^10.9.2", + "typescript": "^5.5.4" + } +} diff --git a/src/commands/InitCommand.ts b/src/commands/InitCommand.ts index c0cc4716a..cf4d5e5bb 100644 --- a/src/commands/InitCommand.ts +++ b/src/commands/InitCommand.ts @@ -619,6 +619,7 @@ AppDataSource.initialize().then(async () => { environment: SA_PASSWORD: "Admin12345" ACCEPT_EULA: "Y" + MSSQL_PID: "Express" ` case "mongodb": diff --git a/src/driver/sqlserver/MssqlParameter.ts b/src/driver/sqlserver/MssqlParameter.ts index 1ca57d818..55f56a59c 100644 --- a/src/driver/sqlserver/MssqlParameter.ts +++ b/src/driver/sqlserver/MssqlParameter.ts @@ -49,6 +49,7 @@ export class MssqlParameter { constructor(value: any, type: "geography") constructor(value: any, type: "geometry") constructor(value: any, type: "rowversion") + constructor(value: any, type: "vector", length: number) constructor(public value: any, public type: string, ...params: number[]) { this.params = params || [] } diff --git a/src/driver/sqlserver/SqlServerDriver.ts b/src/driver/sqlserver/SqlServerDriver.ts index 6ddab2539..5e085b737 100644 --- a/src/driver/sqlserver/SqlServerDriver.ts +++ b/src/driver/sqlserver/SqlServerDriver.ts @@ -142,6 +142,7 @@ export class SqlServerDriver implements Driver { "geometry", "geography", "rowversion", + "vector", ] /** @@ -164,6 +165,7 @@ export class SqlServerDriver implements Driver { "nvarchar", "binary", "varbinary", + "vector", ] /** @@ -233,6 +235,7 @@ export class SqlServerDriver implements Driver { time: { precision: 7 }, datetime2: { precision: 7 }, datetimeoffset: { precision: 7 }, + vector: { length: 255 }, // default length if not provided a value } cteCapabilities: CteCapabilities = { @@ -549,6 +552,12 @@ export class SqlServerDriver implements Driver { return DateUtils.simpleJsonToString(value) } else if (columnMetadata.type === "simple-enum") { return DateUtils.simpleEnumToString(value) + } else if (columnMetadata.type === "vector") { + if (Array.isArray(value)) { + return JSON.stringify(value) + } else { + return value + } } return value @@ -586,6 +595,14 @@ export class SqlServerDriver implements Driver { value = DateUtils.stringToSimpleJson(value) } else if (columnMetadata.type === "simple-enum") { value = DateUtils.stringToSimpleEnum(value, columnMetadata) + } else if (columnMetadata.type === "vector") { + if (typeof value === "string") { + try { + value = JSON.parse(value) + } catch (e) { + // If parsing fails, return the value as-is + } + } } else if (columnMetadata.type === Number) { // convert to number if number value = !isNaN(+value) ? parseInt(value) : value @@ -707,8 +724,12 @@ export class SqlServerDriver implements Driver { let type = column.type + // Handle vector type with length (dimensions) + if (column.type === "vector") { + type = `vector(${column.length})` + } // used 'getColumnLength()' method, because SqlServer sets `varchar` and `nvarchar` length to 1 by default. - if (this.getColumnLength(column)) { + else if (this.getColumnLength(column)) { type += `(${this.getColumnLength(column)})` } else if ( column.precision !== null && diff --git a/src/driver/sqlserver/SqlServerQueryRunner.ts b/src/driver/sqlserver/SqlServerQueryRunner.ts index 0074d9322..b08acf629 100644 --- a/src/driver/sqlserver/SqlServerQueryRunner.ts +++ b/src/driver/sqlserver/SqlServerQueryRunner.ts @@ -3151,14 +3151,24 @@ export class SqlServerQueryRunner if (length === "-1") { tableColumn.length = "MAX" } else { - tableColumn.length = - !this.isDefaultColumnLength( - table, - tableColumn, - length, - ) - ? length - : "" + if (tableColumn.type === "vector") { + const len = +length + // NOTE: real returned length is (N*4 + 8) where N is desired dimensions + if (!Number.isNaN(len)) { + tableColumn.length = String( + (len - 8) / 4, + ) + } + } else { + tableColumn.length = + !this.isDefaultColumnLength( + table, + tableColumn, + length, + ) + ? length + : "" + } } } @@ -4134,6 +4144,8 @@ export class SqlServerQueryRunner return this.driver.mssql.UDT case "rowversion": return this.driver.mssql.RowVersion + case "vector": + return this.driver.mssql.Ntext } } diff --git a/src/driver/types/ColumnTypes.ts b/src/driver/types/ColumnTypes.ts index 589c53c5f..5233e0e7f 100644 --- a/src/driver/types/ColumnTypes.ts +++ b/src/driver/types/ColumnTypes.ts @@ -75,7 +75,7 @@ export type WithLengthColumnType = | "binary" // mssql | "varbinary" // mssql, sap | "string" // cockroachdb, spanner - | "vector" // postgres, sap + | "vector" // postgres, mssql, sap | "halfvec" // postgres, sap | "half_vector" // sap | "real_vector" // sap diff --git a/test/functional/database-schema/vectors/sqlserver/entity/DocumentChunk.ts b/test/functional/database-schema/vectors/sqlserver/entity/DocumentChunk.ts new file mode 100644 index 000000000..ba2a7de01 --- /dev/null +++ b/test/functional/database-schema/vectors/sqlserver/entity/DocumentChunk.ts @@ -0,0 +1,18 @@ +import { Entity } from "../../../../../../src/decorator/entity/Entity" +import { Column } from "../../../../../../src/decorator/columns/Column" +import { PrimaryGeneratedColumn } from "../../../../../../src/decorator/columns/PrimaryGeneratedColumn" + +@Entity() +export class DocumentChunk { + @PrimaryGeneratedColumn() + id: number + + @Column("varchar", { nullable: true }) + content: string + + @Column("vector", { length: 1998, nullable: true }) + embedding: number[] + + @Column("varchar", { nullable: true }) + documentId: string +} diff --git a/test/functional/database-schema/vectors/sqlserver/entity/Point.ts b/test/functional/database-schema/vectors/sqlserver/entity/Point.ts new file mode 100644 index 000000000..c41f1eac8 --- /dev/null +++ b/test/functional/database-schema/vectors/sqlserver/entity/Point.ts @@ -0,0 +1,15 @@ +import { Entity } from "../../../../../../src/decorator/entity/Entity" +import { Column } from "../../../../../../src/decorator/columns/Column" +import { PrimaryGeneratedColumn } from "../../../../../../src/decorator/columns/PrimaryGeneratedColumn" + +@Entity() +export class Point { + @PrimaryGeneratedColumn() + id: number + + @Column("varchar", { nullable: true }) + name: string + + @Column("vector", { length: 3, nullable: true }) + coords: number[] +} diff --git a/test/functional/database-schema/vectors/sqlserver/vector.ts b/test/functional/database-schema/vectors/sqlserver/vector.ts new file mode 100644 index 000000000..c88c4d3c3 --- /dev/null +++ b/test/functional/database-schema/vectors/sqlserver/vector.ts @@ -0,0 +1,222 @@ +import "reflect-metadata" +import { expect } from "chai" +import { DataSource } from "../../../../../src/data-source/DataSource" +import { + closeTestingConnections, + createTestingConnections, + reloadTestingDatabases, +} from "../../../../utils/test-utils" +import { DocumentChunk } from "./entity/DocumentChunk" +import { Point } from "./entity/Point" + +describe("columns > vector type > sqlserver", () => { + let connections: DataSource[] + before(async () => { + connections = await createTestingConnections({ + entities: [DocumentChunk, Point], + enabledDrivers: ["mssql"], + schemaCreate: true, + dropSchema: true, + }) + }) + + beforeEach(() => reloadTestingDatabases(connections)) + after(() => closeTestingConnections(connections)) + + it("should create vector column with specified dimensions", () => + Promise.all( + connections.map(async (connection) => { + const queryRunner = connection.createQueryRunner() + const table = await queryRunner.getTable("document_chunk") + await queryRunner.release() + + const embeddingColumn = table!.findColumnByName("embedding") + + expect(embeddingColumn).to.exist + expect(embeddingColumn!.type).to.equal("vector") + expect(embeddingColumn!.length).to.equal("1998") + }), + )) + + it("should persist and hydrate vector values", () => + Promise.all( + connections.map(async (connection) => { + const repository = connection.getRepository(DocumentChunk) + + const embedding = Array.from({ length: 1998 }, () => + Math.random(), + ) + + const chunk = new DocumentChunk() + chunk.content = "Test content" + chunk.embedding = embedding + + await repository.save(chunk) + + const loadedChunk = await repository.findOne({ + where: { id: chunk.id }, + }) + + expect(loadedChunk).to.exist + expect(loadedChunk!.embedding).to.be.an("array") + expect(loadedChunk!.embedding).to.have.lengthOf(1998) + + // Check that values are close (floating point comparison) + loadedChunk!.embedding.forEach((val, idx) => { + expect(val).to.be.closeTo(embedding[idx], 0.0001) + }) + }), + )) + + it("should update vector values", () => + Promise.all( + connections.map(async (connection) => { + const repository = connection.getRepository(Point) + + const point = new Point() + point.name = "Test Point" + point.coords = [1.0, 2.0, 3.0] + + await repository.save(point) + + point.coords = [4.0, 5.0, 6.0] + await repository.save(point) + + const loadedPoint = await repository.findOne({ + where: { id: point.id }, + }) + + expect(loadedPoint).to.exist + expect(loadedPoint!.coords).to.deep.equal([4.0, 5.0, 6.0]) + }), + )) + + it("should perform cosine similarity search using VECTOR_DISTANCE", () => + Promise.all( + connections.map(async (connection) => { + const repository = connection.getRepository(DocumentChunk) + const baseEmbedding = Array.from({ length: 1998 }, () => + Math.random(), + ) + + // Create test data with known vectors + const embeddings = [ + { + content: "Similar chunk", + embedding: [1.0, 1.0, 1.0, ...baseEmbedding.slice(3)], + }, + { + content: "Also similar", + embedding: [1.0, 1.0, 1.1, ...baseEmbedding.slice(3)], + }, + { + content: "Very different", + embedding: [ + 10.0, + 10.0, + 10.0, + ...baseEmbedding.slice(3), + ], + }, + ] + + await repository.save(embeddings) + + const query = [1.0, 1.0, 1.05, ...baseEmbedding.slice(3)] + + const results = await connection.query( + ` + DECLARE @query AS VECTOR (1998) = '${JSON.stringify( + query, + )}'; + SELECT TOP (2) *, VECTOR_DISTANCE('cosine', @query, embedding) AS distance + FROM document_chunk + ORDER BY VECTOR_DISTANCE('cosine', @query, embedding) + `, + ) + + expect(results.length).to.equal(2) + // The first two results should be the similar ones + expect(results[0].content).to.be.oneOf([ + "Similar chunk", + "Also similar", + ]) + expect(results[1].content).to.be.oneOf([ + "Similar chunk", + "Also similar", + ]) + // Distance should be small for similar vectors + expect(results[0].distance).to.be.lessThan(0.1) + }), + )) + + it("should perform euclidean distance search using VECTOR_DISTANCE", () => + Promise.all( + connections.map(async (connection) => { + const repository = connection.getRepository(Point) + + // Create test data with known vectors + const points = [ + { + name: "Nearest point", + coords: [1.0, 1.0, 1.0], + }, + { + name: "Also near", + coords: [1.0, 1.0, 1.1], + }, + { + name: "Very different", + coords: [10.0, 10.0, 10.0], + }, + ] + + await repository.save(points) + + const origin = [1.0, 1.0, 1.05] + + const results = await connection.query( + ` + DECLARE @origin AS VECTOR (3) = '${JSON.stringify(origin)}'; + SELECT TOP (2) *, VECTOR_DISTANCE('euclidean', @origin, coords) AS distance + FROM point + ORDER BY VECTOR_DISTANCE('euclidean', @origin, coords) + `, + ) + + expect(results.length).to.equal(2) + // The first two results should be the similar ones + expect(results[0].name).to.be.oneOf([ + "Nearest point", + "Also near", + ]) + expect(results[1].name).to.be.oneOf([ + "Nearest point", + "Also near", + ]) + // Distance should be small for similar vectors + expect(results[0].distance).to.be.lessThan(0.1) + }), + )) + + it("should handle null vector values", () => + Promise.all( + connections.map(async (connection) => { + const repository = connection.getRepository(DocumentChunk) + + const chunk = new DocumentChunk() + chunk.content = "No embedding" + chunk.embedding = null as any + chunk.documentId = "doc-789" + + await repository.save(chunk) + + const loadedChunk = await repository.findOne({ + where: { id: chunk.id }, + }) + + expect(loadedChunk).to.exist + expect(loadedChunk!.embedding).to.be.null + }), + )) +})