feat(mysql): add support for vector columns on MariaDB and MySQL (#11670)

This commit is contained in:
Lucian Mocanu 2025-11-27 15:28:49 +01:00 committed by GitHub
parent dd55218648
commit cfb3d6c015
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 288 additions and 133 deletions

View File

@ -12,7 +12,7 @@ services:
MYSQL_DATABASE: "test"
mysql-9:
image: "mysql:9.4.0"
image: "mysql:9.5.0"
container_name: "typeorm-mysql-9"
ports:
- "3306:3306"
@ -24,7 +24,7 @@ services:
# mariadb
mariadb-10:
image: "mariadb:10.6.22-jammy"
image: "mariadb:10.6.24-jammy"
container_name: "typeorm-mariadb-10"
ports:
- "3307:3306"
@ -35,7 +35,7 @@ services:
MYSQL_DATABASE: "test"
mariadb-12:
image: "mariadb:12.0.1-rc"
image: "mariadb:12.1.2"
container_name: "typeorm-mariadb-12"
ports:
- "3307:3306"

View File

@ -207,7 +207,7 @@ const queryEmbedding = [
const results = await dataSource.query(
`
DECLARE @question AS VECTOR (1998) = @0;
SELECT TOP (10) dc.*,
SELECT TOP (10) dc.*,
VECTOR_DISTANCE('cosine', @question, embedding) AS distance
FROM document_chunk dc
ORDER BY VECTOR_DISTANCE('cosine', @question, embedding)

View File

@ -139,3 +139,7 @@ export class User {
roles: UserRoleType[]
}
```
### Vector Types
MySQL supports the [VECTOR type](https://dev.mysql.com/doc/refman/en/vector.html) since version 9.0, while in MariaDB, [vectors](https://mariadb.com/docs/server/reference/sql-structure/vectors/vector-overview) are available since 11.7.

View File

@ -60,7 +60,7 @@ Additional options can be added to the `extra` object and will be passed directl
### Column types for `postgres`
`int`, `int2`, `int4`, `int8`, `smallint`, `integer`, `bigint`, `decimal`, `numeric`, `real`, `float`, `float4`, `float8`, `double precision`, `money`, `character varying`, `varchar`, `character`, `char`, `text`, `citext`, `hstore`, `bytea`, `bit`, `varbit`, `bit varying`, `timetz`, `timestamptz`, `timestamp`, `timestamp without time zone`, `timestamp with time zone`, `date`, `time`, `time without time zone`, `time with time zone`, `interval`, `bool`, `boolean`, `enum`, `point`, `line`, `lseg`, `box`, `path`, `polygon`, `circle`, `cidr`, `inet`, `macaddr`, `macaddr8`, `tsvector`, `tsquery`, `uuid`, `xml`, `json`, `jsonb`, `jsonpath`, `int4range`, `int8range`, `numrange`, `tsrange`, `tstzrange`, `daterange`, `int4multirange`, `int8multirange`, `nummultirange`, `tsmultirange`, `tstzmultirange`, `multidaterange`, `geometry`, `geography`, `cube`, `ltree`
`int`, `int2`, `int4`, `int8`, `smallint`, `integer`, `bigint`, `decimal`, `numeric`, `real`, `float`, `float4`, `float8`, `double precision`, `money`, `character varying`, `varchar`, `character`, `char`, `text`, `citext`, `hstore`, `bytea`, `bit`, `varbit`, `bit varying`, `timetz`, `timestamptz`, `timestamp`, `timestamp without time zone`, `timestamp with time zone`, `date`, `time`, `time without time zone`, `time with time zone`, `interval`, `bool`, `boolean`, `enum`, `point`, `line`, `lseg`, `box`, `path`, `polygon`, `circle`, `cidr`, `inet`, `macaddr`, `macaddr8`, `tsvector`, `tsquery`, `uuid`, `xml`, `json`, `jsonb`, `jsonpath`, `int4range`, `int8range`, `numrange`, `tsrange`, `tstzrange`, `daterange`, `int4multirange`, `int8multirange`, `nummultirange`, `tsmultirange`, `tstzmultirange`, `multidaterange`, `geometry`, `geography`, `cube`, `ltree`, `vector`, `halfvec`.
### Column types for `cockroachdb`
@ -68,6 +68,33 @@ Additional options can be added to the `extra` object and will be passed directl
Note: CockroachDB returns all numeric data types as `string`. However, if you omit the column type and define your property as `number` ORM will `parseInt` string into number.
### Vector columns
Vector columns can be used for similarity searches using PostgreSQL's vector operators:
```typescript
// L2 distance (Euclidean) - <->
const results = await dataSource.sql`
SELECT id, embedding
FROM post
ORDER BY embedding <-> ${"[1,2,3]"}
LIMIT 5`
// Cosine distance - <=>
const results = await dataSource.sql`
SELECT id, embedding
FROM post
ORDER BY embedding <=> ${"[1,2,3]"}
LIMIT 5`
// Inner product - <#>
const results = await dataSource.sql`
SELECT id, embedding
FROM post
ORDER BY embedding <#> ${"[1,2,3]"}
LIMIT 5`
```
### Spatial columns
TypeORM's PostgreSQL and CockroachDB support uses [GeoJSON](http://geojson.org/) as an interchange format, so geometry columns should be tagged either as `object` or `Geometry` (or subclasses, e.g. `Point`) after importing [`geojson` types](https://www.npmjs.com/package/@types/geojson) or using the TypeORM built-in GeoJSON types:

View File

@ -37,15 +37,16 @@ SAP HANA 2.0 and SAP HANA Cloud support slightly different data types. Check the
- [SAP HANA 2.0 Data Types](https://help.sap.com/docs/SAP_HANA_PLATFORM/4fe29514fd584807ac9f2a04f6754767/20a1569875191014b507cf392724b7eb.html?locale=en-US)
- [SAP HANA Cloud Data Types](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/data-types)
TypeORM's `SapDriver` supports `tinyint`, `smallint`, `integer`, `bigint`, `smalldecimal`, `decimal`, `real`, `double`, `date`, `time`, `seconddate`, `timestamp`, `boolean`, `char`, `nchar`, `varchar`, `nvarchar`, `text`, `alphanum`, `shorttext`, `array`, `varbinary`, `blob`, `clob`, `nclob`, `st_geometry`, `st_point`, `real_vector`, `half_vector`, `vector`, and `halfvec`. Some of these data types have been deprecated or removed in SAP HANA Cloud, and will be converted to the closest available alternative when connected to a Cloud database.
TypeORM's `SapDriver` supports `tinyint`, `smallint`, `integer`, `bigint`, `smalldecimal`, `decimal`, `real`, `double`, `date`, `time`, `seconddate`, `timestamp`, `boolean`, `char`, `nchar`, `varchar`, `nvarchar`, `text`, `alphanum`, `shorttext`, `array`, `varbinary`, `blob`, `clob`, `nclob`, `st_geometry`, `st_point`, `real_vector` and `half_vector`. Some of these data types have been deprecated or removed in SAP HANA Cloud, and will be converted to the closest available alternative when connected to a Cloud database.
### Vector Types
The `real_vector` and `half_vector` data types were introduced in SAP HANA Cloud (2024Q1 and 2025Q2 respectively), and require a supported version of `@sap/hana-client` as well.
The `real_vector` and `half_vector` data types were introduced in SAP HANA Cloud (2024Q1 and 2025Q2 respectively), and require a supported version of `@sap/hana-client` as well.
For consistency with PostgreSQL's vector support, TypeORM also provides aliases:
- `vector` (alias for `real_vector`) - stores vectors as 4-byte floats
- `halfvec` (alias for `half_vector`) - stores vectors as 2-byte floats for memory efficiency
- `vector` (alias for `real_vector`) - stores vectors as 4-byte floats
- `halfvec` (alias for `half_vector`) - stores vectors as 2-byte floats for memory efficiency
```typescript
@Entity()
@ -70,3 +71,5 @@ export class Document {
```
By default, the client will return a `Buffer` in the `fvecs`/`hvecs` format, which is more efficient. It is possible to let the driver convert the values to a `number[]` by adding `{ extra: { vectorOutputType: "Array" } }` to the connection options. Check the SAP HANA Client documentation for more information about [REAL_VECTOR](https://help.sap.com/docs/SAP_HANA_CLIENT/f1b440ded6144a54ada97ff95dac7adf/0d197e4389c64e6b9cf90f6f698f62fe.html) or [HALF_VECTOR](https://help.sap.com/docs/SAP_HANA_CLIENT/f1b440ded6144a54ada97ff95dac7adf/8bb854b4ce4a4299bed27c365b717e91.html).
Use the appropriate [vector functions](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/vector-functions) for similarity searches.

View File

@ -180,88 +180,6 @@ There are several special column types with additional functionality available:
each time you call `save` of entity manager or repository, or during `upsert` operations when an update occurs.
You don't need to set this column - it will be automatically set.
### Vector columns
Vector columns are supported on PostgreSQL (via [`pgvector`](https://github.com/pgvector/pgvector) extension), Microsoft SQL Server, and SAP HANA Cloud, enabling storing and querying vector embeddings for similarity search and machine learning applications.
TypeORM supports both `vector` and `halfvec` column types across databases:
- `vector` - stores vectors as 4-byte floats (single precision)
- PostgreSQL: native `vector` type via pgvector extension
- SQL Server: native `vector` type
- SAP HANA: alias for `real_vector` type
- `halfvec` - stores vectors as 2-byte floats (half precision) for memory efficiency
- PostgreSQL: native `halfvec` type via pgvector extension
- SAP HANA: alias for `half_vector` type
You can specify the vector dimensions using the `length` option:
```typescript
@Entity()
export class Post {
@PrimaryGeneratedColumn()
id: number
// Vector without specified dimensions (works on PostgreSQL and SAP HANA; SQL Server requires explicit dimensions)
@Column("vector")
embedding: number[] | Buffer
// Vector with 3 dimensions: vector(3)
@Column("vector", { length: 3 })
embedding_3d: number[] | Buffer
// Half-precision vector with 4 dimensions: halfvec(4) (PostgreSQL and SAP HANA only)
@Column("halfvec", { length: 4 })
halfvec_embedding: number[] | Buffer
}
```
**PostgreSQL** - Vector columns can be used for similarity searches using vector operators:
```typescript
// L2 distance (Euclidean) - <->
const results = await dataSource.query(
`SELECT id, embedding FROM post ORDER BY embedding <-> $1 LIMIT 5`,
["[1,2,3]"],
)
// Cosine distance - <=>
const results = await dataSource.query(
`SELECT id, embedding FROM post ORDER BY embedding <=> $1 LIMIT 5`,
["[1,2,3]"],
)
// Inner product - <#>
const results = await dataSource.query(
`SELECT id, embedding FROM post ORDER BY embedding <#> $1 LIMIT 5`,
["[1,2,3]"],
)
```
**SQL Server** - Use the `VECTOR_DISTANCE` function for similarity searches:
```typescript
const queryEmbedding = [1, 2, 3]
// Cosine distance
const results = await dataSource.query(
`
DECLARE @question AS VECTOR(3) = @0;
SELECT TOP (5) id, embedding,
VECTOR_DISTANCE('cosine', @question, embedding) AS distance
FROM post
ORDER BY VECTOR_DISTANCE('cosine', @question, embedding)
`,
[JSON.stringify(queryEmbedding)],
)
```
> **Note**:
>
> - **PostgreSQL**: Vector columns require the `pgvector` extension to be installed. The extension provides the vector data types and similarity operators.
> - **SQL Server**: Vector type support requires a compatible SQL Server version with vector functionality enabled.
> - **SAP HANA**: Vector columns require SAP HANA Cloud (2024Q1+) and a supported version of `@sap/hana-client`. Use the appropriate [vector similarity functions](https://help.sap.com/docs/hana-cloud-database/sap-hana-cloud-sap-hana-database-sql-reference-guide/vector-functions) for similarity searches.
## Column types
TypeORM supports all of the most commonly used database-supported column types.
@ -414,6 +332,50 @@ Besides "uuid" there is also "increment", "identity" (Postgres 10+ only) and "ro
on some database platforms with this type of generation (for example some databases can only have one increment column,
or some of them require increment to be a primary key).
### Vector columns
Vector columns are supported on MariaDB/MySQL, Microsoft SQL Server, PostgreSQL (via [`pgvector`](https://github.com/pgvector/pgvector) extension) and SAP HANA Cloud, enabling storing and querying vector embeddings for similarity search and machine learning applications.
TypeORM supports both `vector` and `halfvec` column types across databases:
- `vector` - stores vectors as 4-byte floats (single precision)
- MariaDB/MySQL: native `vector` type
- Microsoft SQL Server: native `vector` type
- PostgreSQL: `vector` type, available via `pgvector` extension
- SAP HANA Cloud: alias for `real_vector` type
- `halfvec` - stores vectors as 2-byte floats (half precision) for memory efficiency
- PostgreSQL: `halfvec` type, available via `pgvector` extension
- SAP HANA Cloud: alias for `half_vector` type
You can specify the number of vector dimensions using the `length` option:
```typescript
@Entity()
export class Post {
@PrimaryGeneratedColumn()
id: number
// Vector without specified dimensions
@Column("vector")
embedding: number[] | Buffer
// Vector with 3 dimensions: vector(3)
@Column("vector", { length: 3 })
embedding_3d: number[] | Buffer
// Half-precision vector with 4 dimensions: halfvec(4) (works on PostgreSQL and SAP HANA only)
@Column("halfvec", { length: 4 })
halfvec_embedding: number[] | Buffer
}
```
> **Note**:
>
> - **MariaDB/MySQL**: Vectors are supported since MariaDB 11.7 and MySQL 9
> - **Microsoft SQL Server**: Vector type support requires SQL Server 2025 (17.x) or newer.
> - **PostgreSQL**: Vector columns require the `pgvector` extension to be installed. The extension provides the vector data types and similarity operators.
> - **SAP HANA**: Vector columns require SAP HANA Cloud (2024Q1+) and a supported version of `@sap/hana-client`.
### Spatial columns
Microsoft SQLServer, MySQL/MariaDB, PostgreSQL/CockroachDB and SAP HANA all support spatial columns. TypeORM's support for each varies slightly between databases, particularly as the column names vary between databases.

View File

@ -157,6 +157,8 @@ export class MysqlDriver implements Driver {
"multilinestring",
"multipolygon",
"geometrycollection",
// vector data types
"vector",
// additional data types for mariadb
"uuid",
"inet4",
@ -191,6 +193,7 @@ export class MysqlDriver implements Driver {
"nvarchar",
"binary",
"varbinary",
"vector",
]
/**
@ -280,6 +283,7 @@ export class MysqlDriver implements Driver {
char: { length: 1 },
binary: { length: 1 },
varbinary: { length: 255 },
vector: { length: 2048 }, // default length MySQL uses if not provided a value
decimal: { precision: 10, scale: 0 },
dec: { precision: 10, scale: 0 },
numeric: { precision: 10, scale: 0 },

View File

@ -2802,17 +2802,19 @@ export class MysqlQueryRunner extends BaseQueryRunner implements QueryRunner {
) !== -1 &&
dbColumn["CHARACTER_MAXIMUM_LENGTH"]
) {
const length =
dbColumn[
"CHARACTER_MAXIMUM_LENGTH"
].toString()
let length: number =
dbColumn["CHARACTER_MAXIMUM_LENGTH"]
if (tableColumn.type === "vector") {
// MySQL and MariaDb store the vector length in bytes, not in number of dimensions.
length = length / 4
}
tableColumn.length =
!this.isDefaultColumnLength(
table,
tableColumn,
length,
length.toString(),
)
? length
? length.toString()
: ""
}

View File

@ -75,7 +75,7 @@ export type WithLengthColumnType =
| "binary" // mssql
| "varbinary" // mssql, sap
| "string" // cockroachdb, spanner
| "vector" // postgres, mssql, sap
| "vector" // mariadb, mysql, mssql, postgres, sap
| "halfvec" // postgres, sap
| "half_vector" // sap
| "real_vector" // sap

View File

@ -0,0 +1,61 @@
import {
Column,
Entity,
PrimaryColumn,
ValueTransformer,
} from "../../../../../../src"
/*
* The mysql2 client partially supports the vector type. Newer versions support
* only deserializing from binary format. Currently mysql2 only accepts binary
* parameters for vector values, and not numeric arrays.
*/
const vectorTransformer: ValueTransformer = {
to: (value: number[]) => {
const length = value.length
const arrayBuffer = new ArrayBuffer(length * 4)
const dataView = new DataView(arrayBuffer)
for (let index = 0; index < length; index++) {
dataView.setFloat32(index * 4, value[index], true)
}
return Buffer.from(arrayBuffer)
},
from: (value: Buffer | number[]) => {
if (Array.isArray(value)) {
// newer versions of mysql2 already deserialize vector as number[]
return value
}
const dataView = new DataView(
value.buffer,
value.byteOffset,
value.byteLength,
)
const length = value.byteLength / 4
const array = new Array<number>(length)
for (let index = 0; index < length; index++) {
array[index] = dataView.getFloat32(index * 4, true)
}
return array
},
}
@Entity()
export class Embedding {
@PrimaryColumn()
id: number
@Column()
content: string
@Column()
metadata: string
@Column("vector", {
length: 16,
transformer: vectorTransformer,
})
vector: number[]
}

View File

@ -0,0 +1,85 @@
import { expect } from "chai"
import { DataSource, DeepPartial } from "../../../../../src"
import { DriverUtils } from "../../../../../src/driver/DriverUtils"
import {
closeTestingConnections,
createTestingConnections,
} from "../../../../utils/test-utils"
import { Embedding } from "./entity/Embedding"
describe("database-schema > vectors > mysql", () => {
describe("with vector output type Array", () => {
let dataSources: DataSource[]
before(async () => {
dataSources = await createTestingConnections({
entities: [Embedding],
enabledDrivers: ["mariadb", "mysql"],
driverSpecific: {
synchronize: false,
},
})
})
after(() => closeTestingConnections(dataSources))
it("should work correctly - create, persist and hydrate", () =>
Promise.all(
dataSources.map(async (dataSource) => {
if (
(dataSource.options.type === "mysql" &&
!DriverUtils.isReleaseVersionOrGreater(
dataSource.driver,
"9.0",
)) ||
(dataSource.options.type === "mariadb" &&
!DriverUtils.isReleaseVersionOrGreater(
dataSource.driver,
"11.7",
))
) {
return
}
await dataSource.synchronize()
// Verify column metadata
const queryRunner = dataSource.createQueryRunner()
const table = (await queryRunner.getTable(
dataSource.getMetadata(Embedding).tableName,
))!
await queryRunner.release()
expect(table.findColumnByName("vector")).to.contain({
type: "vector",
length: "16",
})
const vector = [
0.004318627528846264, -0.008295782841742039,
0.011462775990366936, -0.03171011060476303,
-0.003404685528948903, 0.018827877938747406,
0.010692788287997246, 0.014154385775327682,
-0.026206370443105698, -0.03977154940366745,
-0.008630559779703617, 0.040039367973804474,
0.0019048830727115273, 0.01347813569009304,
-0.02147931419312954, -0.004211498890072107,
]
const plainEmbedding = {
id: 1,
content: "This is a sample text to be analyzed by AI",
metadata: `{"client":"typeorm"}`,
vector,
} satisfies DeepPartial<Embedding>
const embeddingRepository =
dataSource.getRepository(Embedding)
const embedding = embeddingRepository.create(plainEmbedding)
await embeddingRepository.save(embedding)
const loadedEmbedding = await embeddingRepository.findOneBy(
{ id: 1 },
)
expect(loadedEmbedding).to.deep.equal(plainEmbedding)
}),
))
})
})

View File

@ -1,4 +1,38 @@
import { Column, Entity, PrimaryColumn } from "../../../../../../src"
import {
Column,
Entity,
PrimaryColumn,
ValueTransformer,
} from "../../../../../../src"
const vectorTransformer: ValueTransformer = {
to: (value: number[]) => {
const length = value.length
const arrayBuffer = new ArrayBuffer(4 + length * 4)
const dataView = new DataView(arrayBuffer)
dataView.setUint32(0, length, true)
for (let index = 0; index < length; index++) {
dataView.setFloat32(4 + index * 4, value[index], true)
}
return Buffer.from(arrayBuffer)
},
from: (value: Buffer) => {
const dataView = new DataView(
value.buffer,
value.byteOffset,
value.byteLength,
)
const length = dataView.getUint32(0, true)
const array = new Array<number>(length)
for (let index = 0; index < length; index++) {
array[index] = dataView.getFloat32(4 + index * 4, true)
}
return array
},
}
@Entity()
export class BufferEmbedding {
@ -11,6 +45,8 @@ export class BufferEmbedding {
@Column("nclob")
metadata: string
@Column("real_vector")
realVector: Buffer
@Column("real_vector", {
transformer: vectorTransformer,
})
realVector: number[]
}

View File

@ -119,34 +119,6 @@ describe("database-schema > vectors > sap", () => {
})
after(() => closeTestingConnections(dataSources))
function deserializeFvecs(buffer: Buffer) {
const dataView = new DataView(
buffer.buffer,
buffer.byteOffset,
buffer.byteLength,
)
const length = dataView.getUint32(0, true)
const array = new Array<number>(length)
for (let index = 0; index < length; index++) {
array[index] = dataView.getFloat32(4 + index * 4, true)
}
return array
}
function serializeFvecs(array: number[]) {
const length = array.length
const arrayBuffer = new ArrayBuffer(4 + length * 4)
const dataView = new DataView(arrayBuffer)
dataView.setUint32(0, length, true)
for (let index = 0; index < length; index++) {
dataView.setFloat32(4 + index * 4, array[index], true)
}
return Buffer.from(arrayBuffer)
}
it("should work correctly - persist and hydrate ", () =>
Promise.all(
dataSources.map(async (dataSource) => {
@ -177,7 +149,7 @@ describe("database-schema > vectors > sap", () => {
content:
"This is a sample text to be analyzed by SAP Joule AI",
metadata: `{"client":"typeorm"}`,
realVector: serializeFvecs(plainVector),
realVector: plainVector,
} satisfies DeepPartial<BufferEmbedding>
const embeddingRepository =
@ -188,10 +160,9 @@ describe("database-schema > vectors > sap", () => {
const loadedEmbedding = await embeddingRepository.findOneBy(
{ id: 1 },
)
const loadedVector = deserializeFvecs(
loadedEmbedding!.realVector,
expect(loadedEmbedding!.realVector).to.deep.equal(
plainVector,
)
expect(loadedVector).to.deep.equal(plainVector)
}),
))
})